diff --git a/packages/infrastructure/kube_cilium/main.tf b/packages/infrastructure/kube_cilium/main.tf index abe21ea5..de3907e5 100644 --- a/packages/infrastructure/kube_cilium/main.tf +++ b/packages/infrastructure/kube_cilium/main.tf @@ -259,12 +259,6 @@ resource "helm_release" "cilium" { { key = "node.kubernetes.io/not-ready" operator = "Exists" - effect = "NoExecute" - }, - { - key = "node.kubernetes.io/not-ready" - operator = "Exists" - effect = "NoSchedule" }, { key = "node.kubernetes.io/unreachable" diff --git a/packages/infrastructure/kube_monitoring/main.tf b/packages/infrastructure/kube_monitoring/main.tf index c46f4d77..a1795412 100644 --- a/packages/infrastructure/kube_monitoring/main.tf +++ b/packages/infrastructure/kube_monitoring/main.tf @@ -30,6 +30,46 @@ locals { name = "monitoring" namespace = module.namespace.namespace + default_tracked_labels = [ + "panfactum.com/environment", + "panfactum.com/module", + "panfactum.com/region", + "panfactum.com/root-module", + "panfactum.com/stack-commit", + "panfactum.com/stack-version" + ] + labels_to_track = tolist(toset(concat(local.default_tracked_labels, var.additional_tracked_resource_labels))) + + default_tracked_resources = [ + "certificatesigningrequests", + "configmaps", + "cronjobs", + "daemonsets", + "deployments", + "endpoints", + "horizontalpodautoscalers", + "ingresses", + "jobs", + "leases", + "limitranges", + "mutatingwebhookconfigurations", + "namespaces", + "networkpolicies", + "nodes", + "persistentvolumeclaims", + "persistentvolumes", + "poddisruptionbudgets", + "pods", + "replicasets", + "resourcequotas", + "secrets", + "services", + "statefulsets", + "storageclasses", + "validatingwebhookconfigurations" + ] + resources_to_track = tolist(toset(concat(local.default_tracked_resources, var.additional_tracked_resources))) + default_resources = { requests = { memory = "100Mi" @@ -99,6 +139,10 @@ locals { id = random_id.thanos_query_frontend.hex } + alertmanager_match = { + id = random_id.alertmanager.hex + } + thanos_store_gateway_index_config = { type = "REDIS" config = { @@ -206,6 +250,11 @@ resource "random_id" "thanos_bucket_web" { prefix = "thanos-bucket-web-" } +resource "random_id" "alertmanager" { + byte_length = 8 + prefix = "alertmanager-" +} + module "kube_labels_operator" { source = "../kube_labels" @@ -398,6 +447,22 @@ module "kube_labels_thanos_query_frontend" { extra_tags = merge(var.extra_tags, local.thanos_query_frontend_match) } +module "kube_labels_alertmanager" { + source = "../kube_labels" + + # generate: common_vars_no_extra_tags.snippet.txt + pf_stack_version = var.pf_stack_version + pf_stack_commit = var.pf_stack_commit + environment = var.environment + region = var.region + pf_root_module = var.pf_root_module + pf_module = var.pf_module + is_local = var.is_local + # end-generate + + extra_tags = merge(var.extra_tags, local.alertmanager_match) +} + module "constants_operator" { source = "../constants" @@ -614,6 +679,24 @@ module "constants_thanos_query_frontend" { extra_tags = merge(var.extra_tags, local.thanos_query_frontend_match) } +module "constants_alertmanager" { + source = "../constants" + + matching_labels = local.alertmanager_match + + # generate: common_vars_no_extra_tags.snippet.txt + pf_stack_version = var.pf_stack_version + pf_stack_commit = var.pf_stack_commit + environment = var.environment + region = var.region + pf_root_module = var.pf_root_module + pf_module = var.pf_module + is_local = var.is_local + # end-generate + + extra_tags = merge(var.extra_tags, local.alertmanager_match) +} + /*************************************** * Namespace @@ -836,6 +919,18 @@ resource "helm_release" "prometheus_stack" { enabled = true } + defaultRules = { + create = true + rules = { + etcd = var.monitoring_etcd_enabled + kubeSchedulerAlerting = false // Not exposed in EKS + kubeSchedulerRecording = false // Not exposed in EKS + kubernetesSystem = false // Not exposed in EKS + kubeControllerManager = false // Not exposed in EKS + kubeProxy = false // We do not use kube-proxy + } + } + ////////////////////////////////////////////////////////// // Prometheus Operator ////////////////////////////////////////////////////////// @@ -942,12 +1037,119 @@ resource "helm_release" "prometheus_stack" { image = local.default_k8s_image customLabels = module.kube_labels_kube_state_metrics.kube_labels extraArgs = [ - "--metric-labels-allowlist=pods=[*]" + "--metric-labels-allowlist=*=[${join(",", local.labels_to_track)}]" ] updateStrategy = "Recreate" tolerations = module.constants_kube_state_metrics.burstable_node_toleration_helm + resources = local.default_resources + + collectors = local.resources_to_track + + prometheus = { + monitor = { + metricRelabelings = concat( + // Removes the panfactum.com/ prefix + [for label in [ + "label_panfactum_com_environment", + "label_panfactum_com_region", + "label_panfactum_com_stack_version", + "label_panfactum_com_stack_commit", + "label_panfactum_com_module", + "label_panfactum_com_root_module" + ] : { + sourceLabels = ["__name__", label], + regex = "(.*_labels);(.+)" + targetLabel = "label_${trimprefix(label, "label_panfactum_com_")}" + replacement = "$2" + action = "replace" + } + ], + [ + { + regex = ".*panfactum_com.*" + action = "labeldrop" + }, + + // This addresses a bug in a previous version of the stack + // where the access mode array contained duplicate entries + // for postgres deployments. This causes duplicate samples + // to be sent to prometheus which triggers alerts. + { + action = "drop" + regex = "kube_persistentvolumeclaim_access_mode" + sourceLabels = ["__name__"] + }, + ] + ) + } + } + } - resources = local.default_resources + ////////////////////////////////////////////////////////// + // etcd + ////////////////////////////////////////////////////////// + kubeEtcd = { + enabled = var.monitoring_etcd_enabled + } + + ////////////////////////////////////////////////////////// + // Kubernetes API server monitoring + ////////////////////////////////////////////////////////// + kubeApiServer = { + enabled = true + serviceMonitor = { + metricRelabelings = [ + { + action = "drop" + regex = "apiserver_request_duration_seconds_.*" # Use apiserver_request_sli_duration_seconds_ instead + sourceLabels = ["__name__"] + }, + # These aren't really important to track and they use a lot of space + { + action = "drop" + regex = "apiserver_request_body_size_.*" + sourceLabels = ["__name__"] + }, + { + action = "drop" + regex = "apiserver_response_body_size_.*" + sourceLabels = ["__name__"] + }, + { + action = "drop" + regex = "kubernetes_feature_enabled" + sourceLabels = ["__name__"] + } + ] + } + } + + ////////////////////////////////////////////////////////// + // Kubernetes Scheduler + ////////////////////////////////////////////////////////// + kubeScheduler = { + enabled = false // not exposed in EKS + } + + ////////////////////////////////////////////////////////// + // kube-proxy + ////////////////////////////////////////////////////////// + kubeProxy = { + enabled = false // we do not use kube-proxy + } + + ////////////////////////////////////////////////////////// + // Kubernetes Controller Manager + ////////////////////////////////////////////////////////// + kubeControllerManager = { + enabled = false // not exposed in EKS + } + + ////////////////////////////////////////////////////////// + // coreDNS + ////////////////////////////////////////////////////////// + coreDns = { + enabled = false // we monitor this in our own module } ////////////////////////////////////////////////////////// @@ -987,7 +1189,7 @@ resource "helm_release" "prometheus_stack" { logLevel = var.prometheus_log_level logFormat = "json" scrapeInterval = "${var.prometheus_default_scrape_interval_seconds}s" - retention = "6h" // This should be 3x the block window (2h) and then data will get shipped to s3 by thanos + retention = "1h" // This is only for local retention (before data is shipped to s3 by thanos) disableCompaction = true storageSpec = { @@ -1011,6 +1213,7 @@ resource "helm_release" "prometheus_stack" { logLevel = var.prometheus_log_level logFormat = "json" resources = local.default_resources + blockSize = "30m" objectStorageConfig = { secret = { type = "s3" @@ -1025,6 +1228,47 @@ resource "helm_release" "prometheus_stack" { } } + ////////////////////////////////////////////////////////// + // Alert Manager + ////////////////////////////////////////////////////////// + alertmanager = { + enabled = true + service = { + labels = module.kube_labels_alertmanager.kube_labels + } + alertmanagerSpec = { + podMetadata = { + labels = module.kube_labels_alertmanager.kube_labels + } + image = local.default_image + logLevel = var.alertmanager_log_level + logFormat = "json" + + storage = { + volumeClaimTemplate = { + spec = { + storageClassName = var.alertmanager_storage_class_name + resources = { + requests = { + storage = "${var.alertmanager_local_storage_initial_size_gb}Gi" + } + } + annotations = { + "velero.io/exclude-from-backup" = "true" + } + } + } + } + + replicas = 2 + resources = local.default_resources + affinity = module.constants_alertmanager.pod_anti_affinity_instance_type_helm + tolerations = module.constants_alertmanager.burstable_node_toleration_helm + topologySpreadConstraints = module.constants_alertmanager.topology_spread_zone_strict + priorityClassName = module.constants_alertmanager.cluster_important_priority_class_name + } + } + ////////////////////////////////////////////////////////// // Grafana ////////////////////////////////////////////////////////// @@ -1655,7 +1899,6 @@ resource "kubernetes_manifest" "pdb_thanos_query_frontend" { } resource "kubernetes_manifest" "pdb_thanos_query" { - count = var.thanos_bucket_web_enable ? 1 : 0 manifest = { apiVersion = "policy/v1" kind = "PodDisruptionBudget" @@ -1674,6 +1917,25 @@ resource "kubernetes_manifest" "pdb_thanos_query" { depends_on = [helm_release.thanos] } +resource "kubernetes_manifest" "alertmanager" { + manifest = { + apiVersion = "policy/v1" + kind = "PodDisruptionBudget" + metadata = { + name = "alertmanager" + namespace = local.namespace + labels = module.kube_labels_alertmanager.kube_labels + } + spec = { + selector = { + matchLabels = local.alertmanager_match + } + maxUnavailable = 1 + } + } + depends_on = [helm_release.prometheus_stack] +} + /*************************************** * Autoscaling ***************************************/ @@ -1935,6 +2197,27 @@ resource "kubernetes_manifest" "vpa_thanos_query" { depends_on = [helm_release.thanos] } +resource "kubernetes_manifest" "vpa_alertmanager" { + count = var.vpa_enabled ? 1 : 0 + manifest = { + apiVersion = "autoscaling.k8s.io/v1" + kind = "VerticalPodAutoscaler" + metadata = { + name = "alertmanager" + namespace = local.namespace + labels = module.kube_labels_alertmanager.kube_labels + } + spec = { + targetRef = { + apiVersion = "monitoring.coreos.com/v1" + kind = "Alertmanager" + name = "monitoring" + } + } + } + depends_on = [helm_release.prometheus_stack] +} + /*************************************** * SSO Login for Grafana diff --git a/packages/infrastructure/kube_monitoring/vars.tf b/packages/infrastructure/kube_monitoring/vars.tf index 5d9e5eef..6026d4b8 100644 --- a/packages/infrastructure/kube_monitoring/vars.tf +++ b/packages/infrastructure/kube_monitoring/vars.tf @@ -70,6 +70,16 @@ variable "prometheus_log_level" { } } +variable "alertmanager_log_level" { + description = "The log level for the alertmanager pods" + type = string + default = "info" + validation { + condition = contains(["info", "error", "warn", "debug"], var.alertmanager_log_level) + error_message = "Invalid alertmanager_log_level provided." + } +} + variable "thanos_log_level" { description = "The log level for the thanos pods" type = string @@ -138,4 +148,34 @@ variable "thanos_bucket_web_enable" { description = "Whether to enable the web dashboard for the Thanos bucket analyzer which can show debugging information about your metrics data" type = bool default = true +} + +variable "alertmanager_storage_class_name" { + description = "The storage class to use for local alertmanager storage" + type = string + default = "ebs-standard" +} + +variable "alertmanager_local_storage_initial_size_gb" { + description = "Number of GB to use for the local alertmanager storage (before autoscaled)" + type = number + default = 2 +} + +variable "monitoring_etcd_enabled" { + description = "Whether to monitor the Kubernetes API server's etcd instances. Only enable for debugging purposes as it contains a huge amount of metrics." + type = bool + default = false +} + +variable "additional_tracked_resource_labels" { + description = "Kubernetes resource labels to include in metric labels" + type = list(string) + default = [] +} + +variable "additional_tracked_resources" { + description = "Additional Kubernetes resources to track in kube-state-metrics" + type = list(string) + default = [] } \ No newline at end of file diff --git a/packages/infrastructure/kube_pg_cluster/main.tf b/packages/infrastructure/kube_pg_cluster/main.tf index baff5a52..59abc0e5 100644 --- a/packages/infrastructure/kube_pg_cluster/main.tf +++ b/packages/infrastructure/kube_pg_cluster/main.tf @@ -434,7 +434,6 @@ resource "kubernetes_manifest" "postgres_cluster" { storage = { pvcTemplate = { - accessModes = ["ReadWriteOnce"] resources = { requests = { storage = "${var.pg_storage_gb}Gi" diff --git a/packages/reference/environments/production/us-east-2/kube_monitoring/module.yaml b/packages/reference/environments/production/us-east-2/kube_monitoring/module.yaml index bcccce6f..c4dc5497 100644 --- a/packages/reference/environments/production/us-east-2/kube_monitoring/module.yaml +++ b/packages/reference/environments/production/us-east-2/kube_monitoring/module.yaml @@ -3,4 +3,5 @@ providers: - helm - kubernetes - random - - aws \ No newline at end of file + - aws + - vault \ No newline at end of file diff --git a/packages/reference/environments/production/us-east-2/pf_website/version.yaml b/packages/reference/environments/production/us-east-2/pf_website/version.yaml index 4a931009..b927c59f 100644 --- a/packages/reference/environments/production/us-east-2/pf_website/version.yaml +++ b/packages/reference/environments/production/us-east-2/pf_website/version.yaml @@ -1 +1 @@ -version: alpha.87 +version: alpha.88 diff --git a/packages/website/src/app/changelog/edge/page.mdx b/packages/website/src/app/changelog/edge/page.mdx index abbfd660..6c05ebcd 100644 --- a/packages/website/src/app/changelog/edge/page.mdx +++ b/packages/website/src/app/changelog/edge/page.mdx @@ -69,6 +69,11 @@ provisions). * Updates many controller deployments to use the [Recreate](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#recreate-deployment) deployment strategy to improve timing and efficiency of applying Panfactum upgrades. +### Fixes + +- PVCs for postgres instances were inadvertently created with duplicated entries for accessModes. This has been fixed, +but the fix will not retroactively adjust existing PVCs as they are immutable. + ## edge.24-05-15 ### Breaking Changes diff --git a/packages/website/src/app/docs/main/reference/infrastructure-modules/kubernetes/kube_monitoring/page.mdx b/packages/website/src/app/docs/main/reference/infrastructure-modules/kubernetes/kube_monitoring/page.mdx index bccd15d2..f1a2a8bd 100644 --- a/packages/website/src/app/docs/main/reference/infrastructure-modules/kubernetes/kube_monitoring/page.mdx +++ b/packages/website/src/app/docs/main/reference/infrastructure-modules/kubernetes/kube_monitoring/page.mdx @@ -48,6 +48,46 @@ Type: `string` The following input variables are optional (have default values): +### [additional\_tracked\_resource\_labels](#input_additional_tracked_resource_labels) + +Description: Kubernetes resource labels to include in metric labels + +Type: `list(string)` + +Default: `[]` + +### [additional\_tracked\_resources](#input_additional_tracked_resources) + +Description: Additional Kubernetes resources to track in kube-state-metrics + +Type: `list(string)` + +Default: `[]` + +### [alertmanager\_local\_storage\_initial\_size\_gb](#input_alertmanager_local_storage_initial_size_gb) + +Description: Number of GB to use for the local alertmanager storage (before autoscaled) + +Type: `number` + +Default: `2` + +### [alertmanager\_log\_level](#input_alertmanager_log_level) + +Description: The log level for the alertmanager pods + +Type: `string` + +Default: `"info"` + +### [alertmanager\_storage\_class\_name](#input_alertmanager_storage_class_name) + +Description: The storage class to use for local alertmanager storage + +Type: `string` + +Default: `"ebs-standard"` + ### [aws\_iam\_ip\_allow\_list](#input_aws_iam_ip_allow_list) Description: A list of IPs that can use the service account token to authenticate with AWS API @@ -96,6 +136,14 @@ Type: `number` Default: `15` +### [monitoring\_etcd\_enabled](#input_monitoring_etcd_enabled) + +Description: Whether to monitor the Kubernetes API server's etcd instances. Only enable for debugging purposes as it contains a huge amount of metrics. + +Type: `bool` + +Default: `false` + ### [prometheus\_default\_scrape\_interval\_seconds](#input_prometheus_default_scrape_interval_seconds) Description: The default interval between prometheus scrapes (in seconds)