fix: updates the out-of-the-box helm chart monitors to work better

Panfactum · May 19, 2024 · 9060913 · 9060913
1 parent 0c02d90
commit 9060913
Show file tree

Hide file tree

Showing 8 changed files with 383 additions and 13 deletions.
diff --git a/packages/infrastructure/kube_cilium/main.tf b/packages/infrastructure/kube_cilium/main.tf
@@ -259,12 +259,6 @@ resource "helm_release" "cilium" {
           {
             key      = "node.kubernetes.io/not-ready"
             operator = "Exists"
-            effect   = "NoExecute"
-          },
-          {
-            key      = "node.kubernetes.io/not-ready"
-            operator = "Exists"
-            effect   = "NoSchedule"
           },
           {
             key      = "node.kubernetes.io/unreachable"

diff --git a/packages/infrastructure/kube_monitoring/main.tf b/packages/infrastructure/kube_monitoring/main.tf
@@ -30,6 +30,46 @@ locals {
   name      = "monitoring"
   namespace = module.namespace.namespace
 
+  default_tracked_labels = [
+    "panfactum.com/environment",
+    "panfactum.com/module",
+    "panfactum.com/region",
+    "panfactum.com/root-module",
+    "panfactum.com/stack-commit",
+    "panfactum.com/stack-version"
+  ]
+  labels_to_track = tolist(toset(concat(local.default_tracked_labels, var.additional_tracked_resource_labels)))
+
+  default_tracked_resources = [
+    "certificatesigningrequests",
+    "configmaps",
+    "cronjobs",
+    "daemonsets",
+    "deployments",
+    "endpoints",
+    "horizontalpodautoscalers",
+    "ingresses",
+    "jobs",
+    "leases",
+    "limitranges",
+    "mutatingwebhookconfigurations",
+    "namespaces",
+    "networkpolicies",
+    "nodes",
+    "persistentvolumeclaims",
+    "persistentvolumes",
+    "poddisruptionbudgets",
+    "pods",
+    "replicasets",
+    "resourcequotas",
+    "secrets",
+    "services",
+    "statefulsets",
+    "storageclasses",
+    "validatingwebhookconfigurations"
+  ]
+  resources_to_track = tolist(toset(concat(local.default_tracked_resources, var.additional_tracked_resources)))
+
   default_resources = {
     requests = {
       memory = "100Mi"
@@ -99,6 +139,10 @@ locals {
     id = random_id.thanos_query_frontend.hex
   }
 
+  alertmanager_match = {
+    id = random_id.alertmanager.hex
+  }
+
   thanos_store_gateway_index_config = {
     type = "REDIS"
     config = {
@@ -206,6 +250,11 @@ resource "random_id" "thanos_bucket_web" {
   prefix      = "thanos-bucket-web-"
 }
 
+resource "random_id" "alertmanager" {
+  byte_length = 8
+  prefix      = "alertmanager-"
+}
+
 module "kube_labels_operator" {
   source = "../kube_labels"
 
@@ -398,6 +447,22 @@ module "kube_labels_thanos_query_frontend" {
   extra_tags = merge(var.extra_tags, local.thanos_query_frontend_match)
 }
 
+module "kube_labels_alertmanager" {
+  source = "../kube_labels"
+
+  # generate: common_vars_no_extra_tags.snippet.txt
+  pf_stack_version = var.pf_stack_version
+  pf_stack_commit  = var.pf_stack_commit
+  environment      = var.environment
+  region           = var.region
+  pf_root_module   = var.pf_root_module
+  pf_module        = var.pf_module
+  is_local         = var.is_local
+  # end-generate
+
+  extra_tags = merge(var.extra_tags, local.alertmanager_match)
+}
+
 module "constants_operator" {
   source = "../constants"
 
@@ -614,6 +679,24 @@ module "constants_thanos_query_frontend" {
   extra_tags = merge(var.extra_tags, local.thanos_query_frontend_match)
 }
 
+module "constants_alertmanager" {
+  source = "../constants"
+
+  matching_labels = local.alertmanager_match
+
+  # generate: common_vars_no_extra_tags.snippet.txt
+  pf_stack_version = var.pf_stack_version
+  pf_stack_commit  = var.pf_stack_commit
+  environment      = var.environment
+  region           = var.region
+  pf_root_module   = var.pf_root_module
+  pf_module        = var.pf_module
+  is_local         = var.is_local
+  # end-generate
+
+  extra_tags = merge(var.extra_tags, local.alertmanager_match)
+}
+
 
 /***************************************
 * Namespace
@@ -836,6 +919,18 @@ resource "helm_release" "prometheus_stack" {
         enabled = true
       }
 
+      defaultRules = {
+        create = true
+        rules = {
+          etcd                   = var.monitoring_etcd_enabled
+          kubeSchedulerAlerting  = false // Not exposed in EKS
+          kubeSchedulerRecording = false // Not exposed in EKS
+          kubernetesSystem       = false // Not exposed in EKS
+          kubeControllerManager  = false // Not exposed in EKS
+          kubeProxy              = false // We do not use kube-proxy
+        }
+      }
+
       //////////////////////////////////////////////////////////
       // Prometheus Operator
       //////////////////////////////////////////////////////////
@@ -942,12 +1037,119 @@ resource "helm_release" "prometheus_stack" {
         image        = local.default_k8s_image
         customLabels = module.kube_labels_kube_state_metrics.kube_labels
         extraArgs = [
-          "--metric-labels-allowlist=pods=[*]"
+          "--metric-labels-allowlist=*=[${join(",", local.labels_to_track)}]"
         ]
         updateStrategy = "Recreate"
         tolerations    = module.constants_kube_state_metrics.burstable_node_toleration_helm
+        resources      = local.default_resources
+
+        collectors = local.resources_to_track
+
+        prometheus = {
+          monitor = {
+            metricRelabelings = concat(
+              // Removes the panfactum.com/ prefix
+              [for label in [
+                "label_panfactum_com_environment",
+                "label_panfactum_com_region",
+                "label_panfactum_com_stack_version",
+                "label_panfactum_com_stack_commit",
+                "label_panfactum_com_module",
+                "label_panfactum_com_root_module"
+                ] : {
+                sourceLabels = ["__name__", label],
+                regex        = "(.*_labels);(.+)"
+                targetLabel  = "label_${trimprefix(label, "label_panfactum_com_")}"
+                replacement  = "$2"
+                action       = "replace"
+                }
+              ],
+              [
+                {
+                  regex  = ".*panfactum_com.*"
+                  action = "labeldrop"
+                },
+
+                // This addresses a bug in a previous version of the stack
+                // where the access mode array contained duplicate entries
+                // for postgres deployments. This causes duplicate samples
+                // to be sent to prometheus which triggers alerts.
+                {
+                  action       = "drop"
+                  regex        = "kube_persistentvolumeclaim_access_mode"
+                  sourceLabels = ["__name__"]
+                },
+              ]
+            )
+          }
+        }
+      }
 
-        resources = local.default_resources
+      //////////////////////////////////////////////////////////
+      // etcd
+      //////////////////////////////////////////////////////////
+      kubeEtcd = {
+        enabled = var.monitoring_etcd_enabled
+      }
+
+      //////////////////////////////////////////////////////////
+      // Kubernetes API server monitoring
+      //////////////////////////////////////////////////////////
+      kubeApiServer = {
+        enabled = true
+        serviceMonitor = {
+          metricRelabelings = [
+            {
+              action       = "drop"
+              regex        = "apiserver_request_duration_seconds_.*" # Use apiserver_request_sli_duration_seconds_ instead
+              sourceLabels = ["__name__"]
+            },
+            # These aren't really important to track and they use a lot of space
+            {
+              action       = "drop"
+              regex        = "apiserver_request_body_size_.*"
+              sourceLabels = ["__name__"]
+            },
+            {
+              action       = "drop"
+              regex        = "apiserver_response_body_size_.*"
+              sourceLabels = ["__name__"]
+            },
+            {
+              action       = "drop"
+              regex        = "kubernetes_feature_enabled"
+              sourceLabels = ["__name__"]
+            }
+          ]
+        }
+      }
+
+      //////////////////////////////////////////////////////////
+      // Kubernetes Scheduler
+      //////////////////////////////////////////////////////////
+      kubeScheduler = {
+        enabled = false // not exposed in EKS
+      }
+
+      //////////////////////////////////////////////////////////
+      // kube-proxy
+      //////////////////////////////////////////////////////////
+      kubeProxy = {
+        enabled = false // we do not use kube-proxy
+      }
+
+      //////////////////////////////////////////////////////////
+      // Kubernetes Controller Manager
+      //////////////////////////////////////////////////////////
+      kubeControllerManager = {
+        enabled = false // not exposed in EKS
+      }
+
+      //////////////////////////////////////////////////////////
+      // coreDNS
+      //////////////////////////////////////////////////////////
+      coreDns = {
+        enabled = false // we monitor this in our own module
       }
 
       //////////////////////////////////////////////////////////
@@ -987,7 +1189,7 @@ resource "helm_release" "prometheus_stack" {
           logLevel          = var.prometheus_log_level
           logFormat         = "json"
           scrapeInterval    = "${var.prometheus_default_scrape_interval_seconds}s"
-          retention         = "6h" // This should be 3x the block window (2h) and then data will get shipped to s3 by thanos
+          retention         = "1h" // This is only for local retention (before data is shipped to s3 by thanos)
           disableCompaction = true
 
           storageSpec = {
@@ -1011,6 +1213,7 @@ resource "helm_release" "prometheus_stack" {
             logLevel  = var.prometheus_log_level
             logFormat = "json"
             resources = local.default_resources
+            blockSize = "30m"
             objectStorageConfig = {
               secret = {
                 type = "s3"
@@ -1025,6 +1228,47 @@ resource "helm_release" "prometheus_stack" {
         }
       }
 
+      //////////////////////////////////////////////////////////
+      // Alert Manager
+      //////////////////////////////////////////////////////////
+      alertmanager = {
+        enabled = true
+        service = {
+          labels = module.kube_labels_alertmanager.kube_labels
+        }
+        alertmanagerSpec = {
+          podMetadata = {
+            labels = module.kube_labels_alertmanager.kube_labels
+          }
+          image     = local.default_image
+          logLevel  = var.alertmanager_log_level
+          logFormat = "json"
+
+          storage = {
+            volumeClaimTemplate = {
+              spec = {
+                storageClassName = var.alertmanager_storage_class_name
+                resources = {
+                  requests = {
+                    storage = "${var.alertmanager_local_storage_initial_size_gb}Gi"
+                  }
+                }
+                annotations = {
+                  "velero.io/exclude-from-backup" = "true"
+                }
+              }
+            }
+          }
+
+          replicas                  = 2
+          resources                 = local.default_resources
+          affinity                  = module.constants_alertmanager.pod_anti_affinity_instance_type_helm
+          tolerations               = module.constants_alertmanager.burstable_node_toleration_helm
+          topologySpreadConstraints = module.constants_alertmanager.topology_spread_zone_strict
+          priorityClassName         = module.constants_alertmanager.cluster_important_priority_class_name
+        }
+      }
+
       //////////////////////////////////////////////////////////
       // Grafana
       //////////////////////////////////////////////////////////
@@ -1655,7 +1899,6 @@ resource "kubernetes_manifest" "pdb_thanos_query_frontend" {
 }
 
 resource "kubernetes_manifest" "pdb_thanos_query" {
-  count = var.thanos_bucket_web_enable ? 1 : 0
   manifest = {
     apiVersion = "policy/v1"
     kind       = "PodDisruptionBudget"
@@ -1674,6 +1917,25 @@ resource "kubernetes_manifest" "pdb_thanos_query" {
   depends_on = [helm_release.thanos]
 }
 
+resource "kubernetes_manifest" "alertmanager" {
+  manifest = {
+    apiVersion = "policy/v1"
+    kind       = "PodDisruptionBudget"
+    metadata = {
+      name      = "alertmanager"
+      namespace = local.namespace
+      labels    = module.kube_labels_alertmanager.kube_labels
+    }
+    spec = {
+      selector = {
+        matchLabels = local.alertmanager_match
+      }
+      maxUnavailable = 1
+    }
+  }
+  depends_on = [helm_release.prometheus_stack]
+}
+
 /***************************************
 * Autoscaling
 ***************************************/
@@ -1935,6 +2197,27 @@ resource "kubernetes_manifest" "vpa_thanos_query" {
   depends_on = [helm_release.thanos]
 }
 
+resource "kubernetes_manifest" "vpa_alertmanager" {
+  count = var.vpa_enabled ? 1 : 0
+  manifest = {
+    apiVersion = "autoscaling.k8s.io/v1"
+    kind       = "VerticalPodAutoscaler"
+    metadata = {
+      name      = "alertmanager"
+      namespace = local.namespace
+      labels    = module.kube_labels_alertmanager.kube_labels
+    }
+    spec = {
+      targetRef = {
+        apiVersion = "monitoring.coreos.com/v1"
+        kind       = "Alertmanager"
+        name       = "monitoring"
+      }
+    }
+  }
+  depends_on = [helm_release.prometheus_stack]
+}
+
 
 /***************************************
 * SSO Login for Grafana