From 062cb9c2a486d1e2f291f2c8b79ea1a084bfa799 Mon Sep 17 00:00:00 2001 From: fullykubed Date: Sat, 19 Oct 2024 14:59:05 -0400 Subject: [PATCH] fix: misc bugfixes --- .../infrastructure/kube_aws_ebs_csi/main.tf | 38 ++++++++++++++ .../infrastructure/kube_ingress_nginx/main.tf | 8 +++ .../infrastructure/kube_ingress_nginx/vars.tf | 2 +- .../infrastructure/kube_karpenter/main.tf | 2 +- .../infrastructure/kube_karpenter/vars.tf | 2 +- .../kube_karpenter_node_pools/main.tf | 18 +++---- packages/reference/flake.lock | 2 +- .../website/src/app/changelog/edge/page.mdx | 50 +++++++++++++------ .../kubernetes/kube_ingress_nginx/page.mdx | 2 +- .../direct/kubernetes/kube_karpenter/page.mdx | 2 +- 10 files changed, 93 insertions(+), 33 deletions(-) diff --git a/packages/infrastructure/kube_aws_ebs_csi/main.tf b/packages/infrastructure/kube_aws_ebs_csi/main.tf index 32a4d39c..141370aa 100644 --- a/packages/infrastructure/kube_aws_ebs_csi/main.tf +++ b/packages/infrastructure/kube_aws_ebs_csi/main.tf @@ -266,6 +266,32 @@ resource "helm_release" "ebs_csi_driver" { ], module.util_controller.tolerations ) + volumes = [ + { + name = "selinuxfs" + hostPath = { + path = "/sys/fs/selinux" + type = "Directory" + } + }, + { + name = "selinux-config" + hostPath = { + path = "/etc/selinux/config" + type = "FileOrCreate" + } + } + ] + volumeMounts = [ + { + name = "selinuxfs" + mountPath = "/sys/fs/selinux" + }, + { + name = "selinux-config" + mountPath = "/etc/selinux/config" + }, + ] } }) ] @@ -306,6 +332,10 @@ resource "kubernetes_storage_class" "standard" { tagSpecification_2 : "panfactum.com/storageclass=ebs-standard" allowAutoIOPSPerGBIncrease = true } + + mount_options = [ + "context=\"system_u:object_r:local_t:s0\"" + ] } resource "kubernetes_storage_class" "standard_retained" { @@ -326,6 +356,10 @@ resource "kubernetes_storage_class" "standard_retained" { tagSpecification_2 : "panfactum.com/storageclass=ebs-standard-retained" allowAutoIOPSPerGBIncrease = true } + + mount_options = [ + "context=\"system_u:object_r:local_t:s0\"" + ] } @@ -358,6 +392,10 @@ resource "kubernetes_storage_class" "extra" { ext4BigAlloc = each.value.big_alloc ext4ClusterSize = each.value.cluster_size } : k => v if v != null } + + mount_options = [ + "context=\"system_u:object_r:local_t:s0\"" + ] } /*************************************** diff --git a/packages/infrastructure/kube_ingress_nginx/main.tf b/packages/infrastructure/kube_ingress_nginx/main.tf index 6aa7a304..5a23b070 100644 --- a/packages/infrastructure/kube_ingress_nginx/main.tf +++ b/packages/infrastructure/kube_ingress_nginx/main.tf @@ -461,6 +461,14 @@ resource "kubectl_manifest" "vpa_nginx" { labels = module.util.labels } spec = { + resourcePolicy = { + containerPolicies = [{ + containerName = "controller" + minAllowed = { + memory = "150Mi" + } + }] + } targetRef = { apiVersion = "apps/v1" kind = "Deployment" diff --git a/packages/infrastructure/kube_ingress_nginx/vars.tf b/packages/infrastructure/kube_ingress_nginx/vars.tf index f295a606..229cee65 100644 --- a/packages/infrastructure/kube_ingress_nginx/vars.tf +++ b/packages/infrastructure/kube_ingress_nginx/vars.tf @@ -1,7 +1,7 @@ variable "nginx_ingress_helm_version" { description = "The version of the nginx-ingress helm chart to deploy" type = string - default = "4.10.0" + default = "4.11.3" } variable "max_replicas" { diff --git a/packages/infrastructure/kube_karpenter/main.tf b/packages/infrastructure/kube_karpenter/main.tf index 7e73438d..0e09971f 100644 --- a/packages/infrastructure/kube_karpenter/main.tf +++ b/packages/infrastructure/kube_karpenter/main.tf @@ -629,7 +629,7 @@ resource "helm_release" "karpenter" { // during its sizing calculations by bumping vmMemoryOverheadPercent significantly. // This ensures that new nodes can run the dynamically sized daemonsets // as well as the other pods. - vmMemoryOverheadPercent = "0.3" + vmMemoryOverheadPercent = "0.2" } logConfig = { diff --git a/packages/infrastructure/kube_karpenter/vars.tf b/packages/infrastructure/kube_karpenter/vars.tf index 1d255cfe..1ec6bd01 100644 --- a/packages/infrastructure/kube_karpenter/vars.tf +++ b/packages/infrastructure/kube_karpenter/vars.tf @@ -35,7 +35,7 @@ variable "pull_through_cache_enabled" { variable "log_level" { description = "The log level for the karpenter pods" type = string - default = "warn" + default = "info" validation { condition = contains(["info", "error", "warn", "debug"], var.log_level) error_message = "Invalid log_level provided." diff --git a/packages/infrastructure/kube_karpenter_node_pools/main.tf b/packages/infrastructure/kube_karpenter_node_pools/main.tf index c0bc481e..1299a1a9 100644 --- a/packages/infrastructure/kube_karpenter_node_pools/main.tf +++ b/packages/infrastructure/kube_karpenter_node_pools/main.tf @@ -60,10 +60,7 @@ locals { "r7a", "r6g", "r6i", - "r6a", - "x8g", - "x2gd", - "x2iedn" + "r6a" ] // Blacklisted sizes @@ -411,7 +408,7 @@ resource "kubectl_manifest" "burstable_node_pool" { } disruption = local.disruption_policy - weight = 10 + weight = 25 } }) server_side_apply = true @@ -480,7 +477,7 @@ resource "kubectl_manifest" "burstable_arm_node_pool" { } disruption = local.disruption_policy - weight = 10 + weight = 30 } }) server_side_apply = true @@ -546,7 +543,7 @@ resource "kubectl_manifest" "spot_node_pool" { } disruption = local.disruption_policy - weight = 10 + weight = 15 } }) server_side_apply = true @@ -615,7 +612,7 @@ resource "kubectl_manifest" "spot_arm_node_pool" { } disruption = local.disruption_policy - weight = 10 + weight = 20 } }) server_side_apply = true @@ -681,8 +678,7 @@ resource "kubectl_manifest" "on_demand_arm_node_pool" { } disruption = local.disruption_policy - // This should have the lowest preference - weight = 1 + weight = 10 } }) server_side_apply = true @@ -748,7 +744,7 @@ resource "kubectl_manifest" "on_demand_node_pool" { disruption = local.disruption_policy // This should have the lowest preference - weight = 1 + weight = 5 } }) server_side_apply = true diff --git a/packages/reference/flake.lock b/packages/reference/flake.lock index 958b9a3b..933c8ce3 100644 --- a/packages/reference/flake.lock +++ b/packages/reference/flake.lock @@ -131,7 +131,7 @@ }, "locked": { "lastModified": 0, - "narHash": "sha256-0ffumZzWUBl02ZH4jvJ1VoCcdnBWL6LlaBh9cgQV0wc=", + "narHash": "sha256-JyYNeJ+WZC6YuGCr8JXQOpztxv9BkRHkWefD7Lto5CA=", "path": "../..", "type": "path" }, diff --git a/packages/website/src/app/changelog/edge/page.mdx b/packages/website/src/app/changelog/edge/page.mdx index b1b7c22a..4aa0f434 100644 --- a/packages/website/src/app/changelog/edge/page.mdx +++ b/packages/website/src/app/changelog/edge/page.mdx @@ -15,6 +15,24 @@ Learn more [here](/docs/edge/guides/versioning/releases).* * Prevents Karpenter from scheduling instances on bare metal instances which we have observed issues with. +* Upgrades [kube\_ingress\_nginx](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_nginx_ingress) so that it can run on nodes with a large number of CPU cores. + +* EBS-backed PVs with many large files took a long time to mount due to [this issue](https://github.com/bottlerocket-os/bottlerocket/issues/3151) + with Bottlerocket OS (our underlying node OS). We have added the recommended remediation and now PVs should mount + nearly instantly. Note that this fix will **not** apply to existing PVs, only new ones. + + To apply the fix to existing PVs, you will need to manually add the following mount option to their manifests: + + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: XXXX + spec: + mountOptions: + - context="system_u:object_r:local_t:s0" + ``` + ## edge.24-10-18 ### Breaking Changes @@ -82,39 +100,39 @@ Learn more [here](/docs/edge/guides/versioning/releases).* see [our documentation.](/docs/main/guides/development-shell/customizing) * `pf-get-version-hash` has been renamed to `pf-get-commit-hash` to better reflect what it does (get a commit hash given an arbitrary repo and [git ref](https://git-scm.com/book/ms/v2/Git-Internals-Git-References)). -In addition, it has been updated to take named rather than positioned arguments in order to align with other Panfactum scripts. -Finally, we have fixed several bugs in the script to make it more resilient to various inputs. + In addition, it has been updated to take named rather than positional arguments in order to align with other Panfactum scripts. + Finally, we have fixed several bugs in the script to make it more resilient to various inputs. * Removes `pgadmin4` from the devShell as it significantly increased build times and was not useful to all users. Users -should have an option to pick their favorite DB clients rather than us be prescriptive. + should have an option to pick their favorite DB clients rather than us be prescriptive. ### Changes -* Upgrades [kube_cilium](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_cilium) to v1.16.3. This change -brings [new Cilium features](https://isovalent.com/blog/post/cilium-1-16), reduces the per-node memory usage by 75MB, -and reduces the amount of errors that users can encounter during the bootstrapping guide. +* Upgrades [kube\_cilium](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_cilium) to v1.16.3. This change + brings [new Cilium features](https://isovalent.com/blog/post/cilium-1-16), reduces the per-node memory usage by 75MB, + and reduces the amount of errors that users can encounter during the bootstrapping guide. -* Upgrades [kube_aws_ebs_csi](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_aws_ebs_csi) to v1.36 in order -to support Karpenter v1 disruption taints and improve node shutdown performance. +* Upgrades [kube\_aws\_ebs\_csi](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_aws_ebs_csi) to v1.36 in order + to support Karpenter v1 disruption taints and improve node shutdown performance. -* Updates [wf_dockerfile_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build) to support 10 concurrent image builds per module -rather than just one. +* Updates [wf\_dockerfile\_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build) to support 10 concurrent image builds per module + rather than just one. ### Added -* Adds `cdn_mode_enabled` boolean to the [kube_vault](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_vault) & [kube_authentik](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_authentik) module to enable CDN for Vault. -* Adds `image_tag_prefix` string to the [wf_dockerfile_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build) +* Adds `cdn_mode_enabled` boolean to the [kube\_vault](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_vault) & [kube\_authentik](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_authentik) module to enable CDN for Vault. +* Adds `image_tag_prefix` string to the [wf\_dockerfile\_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build) ### Fixed -* Fixed a handful of scheduling constraint bugs that resulting in less than optimal resource utilization. These improvements -should result in a significant improvement to resource utilization in tiny clusters and a minor improvement in larger clusters. +* Fixed a handful of scheduling constraint bugs that resulted in less-than-optimal resource utilization. These improvements + should result in a significant improvement to resource utilization in tiny clusters and a minor improvement in larger clusters. * Fixed an issue where `pf_stack_version` could not be a commit hash. It can now be any valid [git ref](https://git-scm.com/book/ms/v2/Git-Internals-Git-References). * Fixed an issue where `pf-wf-git-checkout` would fail when given a branch name as a git ref. This impact both -[wf_tf_deploy](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_tf_deploy) -and [wf_dockerfile_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build). + [wf\_tf\_deploy](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_tf_deploy) + and [wf\_dockerfile\_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build). ## edge.24-10-15 diff --git a/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_ingress_nginx/page.mdx b/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_ingress_nginx/page.mdx index d936bd11..8cb6e79e 100644 --- a/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_ingress_nginx/page.mdx +++ b/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_ingress_nginx/page.mdx @@ -92,7 +92,7 @@ Description: The version of the nginx-ingress helm chart to deploy Type: `string` -Default: `"4.10.0"` +Default: `"4.11.3"` ### panfactum\_scheduler\_enabled diff --git a/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_karpenter/page.mdx b/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_karpenter/page.mdx index 665d8e50..e088380d 100644 --- a/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_karpenter/page.mdx +++ b/packages/website/src/app/docs/main/reference/infrastructure-modules/direct/kubernetes/kube_karpenter/page.mdx @@ -66,7 +66,7 @@ Description: The log level for the karpenter pods Type: `string` -Default: `"warn"` +Default: `"info"` ### monitoring\_enabled