Add prometheus deployment to autojoin cluster (#1057)

* Rename deploy script * Make deployment steps generic * Move to per-cluster deployment * Add cluster name as template parameter * Add autojoin cluster parameters * Update build steps for multiple clusters and projects * Add Helm config for autojoin cluster with public IPs * Add byos to autojoin cluster prometheus config * Remove byos config from prometheus-federation * Add per cluster config for autojoin * Make deployments conditional
m-lab · Sep 9, 2024 · b95f2bb · b95f2bb
1 parent 5e0b575
commit b95f2bb
Show file tree

Hide file tree

Showing 36 changed files with 771 additions and 59 deletions.
diff --git a/apply-data-pipeline.sh → apply-cluster.sh b/apply-data-pipeline.sh → apply-cluster.sh
@@ -16,12 +16,12 @@ source config.sh
 
 # Replace the template variables.
 sed -e 's|{{CLUSTER}}|'${CLUSTER}'|g' \
-    config/cluster/prometheus/prometheus.yml.template > \
-    config/cluster/prometheus/prometheus.yml
+    config/${CLUSTER}/prometheus/prometheus.yml.template > \
+    config/${CLUSTER}/prometheus/prometheus.yml
 
 # Prometheus config map.
 kubectl create configmap prometheus-cluster-config \
-    --from-file=config/cluster/prometheus \
+    --from-file=config/${CLUSTER}/prometheus \
     --dry-run="client" -o json | kubectl apply -f -
 
 kubectl create secret generic prometheus-auth \
@@ -32,7 +32,7 @@ kubectl create secret generic prometheus-auth \
 sed -i -e 's|{{OAUTH_PROXY_CLIENT_ID}}|'${!OAUTH_PROXY_CLIENT_ID}'|g' \
     -e 's|{{OAUTH_PROXY_CLIENT_SECRET}}|'${!OAUTH_PROXY_CLIENT_SECRET}'|g' \
     -e 's|{{OAUTH_PROXY_COOKIE_SECRET}}|'${!OAUTH_PROXY_COOKIE_SECRET}'|g' \
-    k8s/data-pipeline/deployments/oauth2-proxy.yml
+    k8s/${CLUSTER}/deployments/oauth2-proxy.yml
 
 # Additional k8s resources installed via Helm
 #
@@ -41,7 +41,7 @@ kubectl create namespace ingress-nginx --dry-run="client" -o json | kubectl appl
 ./linux-amd64/helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \
   --namespace ingress-nginx \
   --version ${K8S_INGRESS_NGINX_VERSION} \
-  --values helm/data-pipeline/ingress-nginx/${PROJECT}.yml
+  --values helm/${CLUSTER}/ingress-nginx/${PROJECT}.yml
 
 
 # Install cert-manager.
@@ -59,7 +59,7 @@ kubectl create namespace ingress-nginx --dry-run="client" -o json | kubectl appl
   --set installCRDs=true \
   --set ingressShim.defaultIssuerKind=ClusterIssuer \
   --set ingressShim.defaultIssuerName=letsencrypt
-  
+
 # Check for per-project template variables.
 if [[ ! -f "k8s/${CLUSTER}/${PROJECT}.yml" ]] ; then
   echo "No template variables found for k8s/${CLUSTER}/${PROJECT}.yml"

diff --git a/cloudbuild.yaml b/cloudbuild.yaml
@@ -149,25 +149,35 @@ steps:
     # Check all JSON files, mostly (likely only) provisioned Grafana dashboards
     find . -type f -name '*.json' | xargs jsonlint-php -q
 
-    # Check alert and recording rules
-    promtool check rules ./config/federation/prometheus/alerts.yml
-    promtool check rules ./config/federation/prometheus/rules.yml
-
-    export CLUSTER=prometheus-federation
-
-    # Get cluster credentials for the prometheus-federation cluster
-    gcloud container clusters get-credentials $$CLUSTER --project $$PROJECT --zone $$(get_cluster_zone $$CLUSTER)
-
-    # Apply various things in the prometheus-federation cluster
-    ./apply-global-prometheus.sh
-    ./apply-grafana-dashboards.sh
-    ./deploy-prometheus-targets.sh $$PROJECT
-
-    export CLUSTER=data-pipeline
-
-    # Get cluster credentials for the data-pipeline cluster
-    gcloud container clusters get-credentials $$CLUSTER --project $$PROJECT --zone $$(get_cluster_zone $$CLUSTER)
-    ./apply-data-pipeline.sh
-
-    # Deploy the IPv6 monitoring BBE configs to the IPv6 Linode.
-    ./deploy_bbe_config.sh $$PROJECT LINODE_PRIVATE_KEY_ipv6_monitoring
+    # TODO(soltesz): Separate configuration steps so we can use cbif conditions.
+    if [[ $$PROJECT = "mlab-sandbox" || $$PROJECT = "mlab-staging" || $$PROJECT = "mlab-oti" ]] ; then
+      # Check alert and recording rules
+      promtool check rules ./config/federation/prometheus/alerts.yml
+      promtool check rules ./config/federation/prometheus/rules.yml
+
+      export CLUSTER=prometheus-federation
+
+      # Get cluster credentials for the prometheus-federation cluster
+      gcloud container clusters get-credentials $$CLUSTER --project $$PROJECT --zone $$(get_cluster_zone $$CLUSTER)
+
+      # Apply various things in the prometheus-federation cluster
+      ./apply-global-prometheus.sh
+      ./apply-grafana-dashboards.sh
+      ./deploy-prometheus-targets.sh $$PROJECT
+
+      # Get cluster credentials for the data-pipeline cluster
+      export CLUSTER=data-pipeline
+      gcloud container clusters get-credentials $$CLUSTER --project $$PROJECT --zone $$(get_cluster_zone $$CLUSTER)
+      ./apply-cluster.sh
+
+      # Deploy the IPv6 monitoring BBE configs to the IPv6 Linode.
+      ./deploy_bbe_config.sh $$PROJECT LINODE_PRIVATE_KEY_ipv6_monitoring
+    fi
+
+    # TODO(soltesz): Separate configuration steps so we can use cbif conditions.
+    if [[ $$PROJECT = "mlab-sandbox" || $$PROJECT = "mlab-staging" || $$PROJECT = "mlab-autojoin" ]] ; then
+      export CLUSTER=autojoin
+      # Get cluster credentials for the autojoin cluster
+      gcloud container clusters get-credentials $$CLUSTER --project $$PROJECT --zone $$(get_cluster_zone $$CLUSTER)
+      ./apply-cluster.sh
+    fi
diff --git a/config/autojoin/prometheus/prometheus.yml.template b/config/autojoin/prometheus/prometheus.yml.template
@@ -0,0 +1,268 @@
+# M-Lab Prometheus configuration.
+
+global:
+  scrape_interval:     60s  # Set the scrape interval to every 60 seconds.
+  evaluation_interval: 60s  # Evaluate rules every 60 seconds.
+  # scrape_timeout is set to the global default (10s).
+
+  # These labels are attached to any time series or alert sent to external
+  # systems (federation, remote storage, Alertmanager).
+  # TODO(soltesz): use this when M-Lab adds federation or alertmanager.
+  external_labels:
+    cluster: {{CLUSTER}}
+
+
+# Load rules once and periodically evaluate them according to the global
+# 'evaluation_interval'.
+rule_files:
+  # - /etc/prometheus/rules.yml
+
+# Scrape configurations.
+#
+# Each job name defines monitoring targets (or a method for discovering
+# targets).
+#
+# The M-Lab Prometheus configuration uses three config types:
+#  * automatically discovered services via kubernetes (kubernetes_sd_config)
+#  * automatically discovered services via file (file_sd_config)
+#  * static targets (static_config)
+#
+# Kubernetes targets are discovered automatically by querying the kubernetes
+# master API. The configuration for this is simplest when Prometheus runs in
+# the same cluster as the kubernetes master being monitored. In particular,
+# the master CA certificates and an authentication token are mounted
+# automatically in every container's filesystem for easy access.
+#
+# Discovery of legacy targets occurs by reading a configuration file. This
+# configuration file can be updated out of band after start and Prometheus will
+# periodically re-read the contents, adding new targets or removing old ones.
+#
+# Static targets cannot change after Prometheus starts. They are the least
+# flexible. Because of this, only well known, or long lived targets, or
+# singleton targets that need special relabeling rules should be static.
+scrape_configs:
+
+  # Kubernetes configurations were inspired by:
+  # https://github.com/prometheus/prometheus/blob/main/documentation/examples
+  #
+  # The four kubernetes scrape configs correspond to specific cluster
+  # components.
+  #  * master API
+  #  * cluster nodes
+  #  * pods
+  #  * service endpoints
+  #
+  # The separation allows each component to use different authentication
+  # configs, or apply different relabeling rules.
+
+  # Scrape config for kubernetes master API server.
+  #
+  # The kubernetes API is exposed as an "endpoint". Since kubernetes may have
+  # many endpoints, this configuration restricts the targets monitored to the
+  # default/kubernetes service. The relabeling rules ignore other endpoints.
+  - job_name: 'kubernetes-apiservers'
+    kubernetes_sd_configs:
+      - role: endpoints
+
+    # The kubernetes API requires authentication and uses a privately signed
+    # certificate. The tls_config specifies the private CA cert and an
+    # auth token. Kubernetes automatically mounts these files in the container
+    # filesystem.
+    scheme: https
+    tls_config:
+      ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+    # The source_labels are concatenated with ';'. The regex matches a single
+    # value for the default kubernetes service endpoint. If there are
+    # multiple API servers, all will match this pattern.
+    relabel_configs:
+      - source_labels: [__meta_kubernetes_namespace,
+                        __meta_kubernetes_service_name,
+                        __meta_kubernetes_endpoint_port_name]
+        action: keep
+        regex: default;kubernetes;https
+
+
+  # Scrape config for kubernetes nodes.
+  #
+  # A kubernetes cluster consists of one or more nodes. Each reports metrics
+  # related to the whole machine.
+  - job_name: 'kubernetes-nodes'
+    kubernetes_sd_configs:
+      - role: node
+
+    scheme: https
+    tls_config:
+      ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+
+      # Nodes are discovered and scrapped using the kubernetes internal network
+      # IP. Unfortunately, the certificates do not validate on requests:
+      #
+      #   "x509: cannot validate certificate for 10.0.4.126 because it doesn't
+      #    contain any IP SANs"
+      #
+      # This is a known issue without a likely solution for private APIs:
+      #    https://github.com/prometheus/prometheus/issues/1822
+      #
+      # Since these IPs are internal to the kubernetes virtual network, it
+      # should be safe to skip certificate verification.
+      insecure_skip_verify: true
+    # TODO(soltesz): if we skip_verify, do we still need the bearer token?
+    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+    # Copy node labels from kubernetes to labels on the Prometheus metrics.
+    # TODO(soltesz): There are many labels. Some look unnecessary. Restrict
+    # pattern to match helpful labels.
+    relabel_configs:
+      - action: labelmap
+        regex: __meta_kubernetes_node_label_(.+)
+      # Node /metrics in v1.6+ are accessible via a proxy through the
+      # kubernetes api server. So, we must update the target and metric path.
+      - target_label: __address__
+        replacement: kubernetes.default.svc:443
+      - source_labels: [__meta_kubernetes_node_name]
+        regex: (.+)
+        target_label: __metrics_path__
+        replacement: /api/v1/nodes/${1}/proxy/metrics
+
+
+  # Scrape config for kubernetes pods.
+  #
+  # Kubernetes pods are scraped when they have an annotation:
+  #   `prometheus.io/scrape=true`.
+  #
+  # Only container that include an explicit containerPort declaration are
+  # scraped. For example:
+  #
+  #      ports:
+  #        - containerPort: 9090
+  #
+  # Configuration expects the default HTTP protocol scheme.
+  # Configuration expects the default path of /metrics on targets.
+  - job_name: 'kubernetes-pods'
+    kubernetes_sd_configs:
+      - role: pod
+
+    relabel_configs:
+      # For inventory, record whether a pod is ready. This helps distinguish
+      # between: missing from inventory, not ready and failing, ready but
+      # failing, ready and working.
+      # and working.
+      - source_labels: [__meta_kubernetes_pod_ready]
+        action: replace
+        target_label: ready
+
+      # Check for the prometheus.io/scrape=true annotation.
+      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
+        action: keep
+        regex: true
+
+      # Only keep containers that have a declared container port.
+      - source_labels: [__meta_kubernetes_pod_container_port_number]
+        action: keep
+        regex: (\d+)
+
+      # Copy all pod labels from kubernetes to the Prometheus metrics.
+      - action: labelmap
+        regex: __meta_kubernetes_pod_label_(.+)
+
+      # Add the kubernetes namespace as a Prometheus label.
+      - source_labels: [__meta_kubernetes_namespace]
+        action: replace
+        target_label: namespace
+
+      # Extract the "<cluster>-<node-pool>" name from the GKE node name.
+      - source_labels: [__meta_kubernetes_pod_node_name]
+        action: replace
+        regex: gke-(.*)(-[^-]+){2}
+        replacement: $1
+        target_label: nodepool
+
+      # Identify the deployment name for replica set or daemon set.  Pods
+      # created by deployments or daemon sets are processed here. The
+      # following two rules recognize these two cases.
+      #
+      # 1: For DaemonSet, remove the last 5-digit pod name hash.
+      #   e.g. node-exporter-ltxgz
+      - source_labels: [__meta_kubernetes_pod_controller_kind, __meta_kubernetes_pod_name]
+        action: replace
+        regex: DaemonSet;(.*)(-[^-]{5})
+        replacement: $1
+        target_label: deployment
+
+      # 2: For ReplicaSet, remove the last 10-digit + 5-digit pod name hash.
+      # In the case of a daemon set that does not have the trailing hash, the
+      # regex will not match and deployment remains unchanged.
+      #   e.g. prometheus-server-3165440997-ppf9w
+      - source_labels: [__meta_kubernetes_pod_controller_kind, __meta_kubernetes_pod_name]
+        action: replace
+        regex: ReplicaSet;(.*)(-[^-]+)(-[^-]{5})
+        replacement: $1
+        target_label: deployment
+
+      # TODO(soltesz): evaluate and remove from config if no-pod name is helpful
+      # in practice.
+      #
+      # Add the kubernetes pod name.
+      #- source_labels: [__meta_kubernetes_pod_name]
+      #  action: replace
+      #  target_label: pod
+
+      # Add the kubernetes pod container name.
+      - source_labels: [__meta_kubernetes_pod_container_name]
+        action: replace
+        target_label: container
+
+
+  # Scrape config for kubernetes service endpoints.
+  #
+  # Service endpoints are scraped when they have an annotation:
+  #   `prometheus.io/scrape=true`.
+  #
+  # Port 80 is sraped by default. To use a different port, use the annotation:
+  #   `prometheus.io/port=9090`.
+  #
+  # Configuration expects the default HTTP protocol scheme.
+  # Configuration expects the default path of /metrics on targets.
+  - job_name: 'kubernetes-service-endpoints'
+    kubernetes_sd_configs:
+      - role: endpoints
+
+    relabel_configs:
+      # Check for the prometheus.io/scrape=true annotation.
+      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+        action: keep
+        regex: true
+      # Check for the prometheus.io/port=<port> annotation.
+      - source_labels: [__address__,
+                        __meta_kubernetes_service_annotation_prometheus_io_port]
+        action: replace
+        target_label: __address__
+        # A google/re2 regex, matching addresses with or without default ports.
+        # NB: this will not work with IPv6 addresses. But, atm, kubernetes uses
+        # IPv4 addresses for internal network and GCE doesn not support IPv6.
+        regex: ([^:]+)(?::\d+)?;(\d+)
+        replacement: $1:$2
+      # Copy all service labels from kubernetes to the Prometheus metrics.
+      - action: labelmap
+        regex: __meta_kubernetes_service_label_(.+)
+      # Add the kubernetes namespace as a Prometheus label.
+      - source_labels: [__meta_kubernetes_namespace]
+        action: replace
+        target_label: kubernetes_namespace
+      # Add the kubernetes service name as a Prometheus label.
+      - source_labels: [__meta_kubernetes_service_name]
+        action: replace
+        target_label: kubernetes_name
+
+
+  # Scrape byos-nodes every minute.
+  - job_name: 'byos-nodes'
+    scrape_timeout: 40s
+    file_sd_configs:
+      - files:
+          - /byos-nodes/*.json
+        # Attempt to re-read files every five minutes.
+        refresh_interval: 5m
+    scheme: http
diff --git a/...luster/prometheus/prometheus.yml.template → ...peline/prometheus/prometheus.yml.template b/...luster/prometheus/prometheus.yml.template → ...peline/prometheus/prometheus.yml.template
diff --git a/config/federation/prometheus/prometheus.yml.template b/config/federation/prometheus/prometheus.yml.template
@@ -801,13 +801,3 @@ scrape_configs:
         regex: .*
         target_label: __address__
         replacement: switch-monitoring-service.default.svc.cluster.local:8080
-
-  # Scrape byos-nodes every minute.
-  - job_name: 'byos-nodes'
-    scrape_timeout: 40s
-    file_sd_configs:
-      - files:
-          - /byos-nodes/*.json
-        # Attempt to re-read files every five minutes.
-        refresh_interval: 5m
-    scheme: http
diff --git a/helm/autojoin/ingress-nginx/mlab-autojoin.yml b/helm/autojoin/ingress-nginx/mlab-autojoin.yml
@@ -0,0 +1,5 @@
+controller:
+  service:
+    loadBalancerIP: 34.30.73.176
+  ingressClassResource:
+    default: true
diff --git a/helm/autojoin/ingress-nginx/mlab-sandbox.yml b/helm/autojoin/ingress-nginx/mlab-sandbox.yml
@@ -0,0 +1,5 @@
+controller:
+  service:
+    loadBalancerIP: 34.30.138.62
+  ingressClassResource:
+    default: true