Skip to content

Commit

Permalink
fix: misc bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
fullykubed committed Oct 19, 2024
1 parent 4f31586 commit 062cb9c
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 33 deletions.
38 changes: 38 additions & 0 deletions packages/infrastructure/kube_aws_ebs_csi/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,32 @@ resource "helm_release" "ebs_csi_driver" {
],
module.util_controller.tolerations
)
volumes = [
{
name = "selinuxfs"
hostPath = {
path = "/sys/fs/selinux"
type = "Directory"
}
},
{
name = "selinux-config"
hostPath = {
path = "/etc/selinux/config"
type = "FileOrCreate"
}
}
]
volumeMounts = [
{
name = "selinuxfs"
mountPath = "/sys/fs/selinux"
},
{
name = "selinux-config"
mountPath = "/etc/selinux/config"
},
]
}
})
]
Expand Down Expand Up @@ -306,6 +332,10 @@ resource "kubernetes_storage_class" "standard" {
tagSpecification_2 : "panfactum.com/storageclass=ebs-standard"
allowAutoIOPSPerGBIncrease = true
}

mount_options = [
"context=\"system_u:object_r:local_t:s0\""
]
}

resource "kubernetes_storage_class" "standard_retained" {
Expand All @@ -326,6 +356,10 @@ resource "kubernetes_storage_class" "standard_retained" {
tagSpecification_2 : "panfactum.com/storageclass=ebs-standard-retained"
allowAutoIOPSPerGBIncrease = true
}

mount_options = [
"context=\"system_u:object_r:local_t:s0\""
]
}


Expand Down Expand Up @@ -358,6 +392,10 @@ resource "kubernetes_storage_class" "extra" {
ext4BigAlloc = each.value.big_alloc
ext4ClusterSize = each.value.cluster_size
} : k => v if v != null }

mount_options = [
"context=\"system_u:object_r:local_t:s0\""
]
}

/***************************************
Expand Down
8 changes: 8 additions & 0 deletions packages/infrastructure/kube_ingress_nginx/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,14 @@ resource "kubectl_manifest" "vpa_nginx" {
labels = module.util.labels
}
spec = {
resourcePolicy = {
containerPolicies = [{
containerName = "controller"
minAllowed = {
memory = "150Mi"
}
}]
}
targetRef = {
apiVersion = "apps/v1"
kind = "Deployment"
Expand Down
2 changes: 1 addition & 1 deletion packages/infrastructure/kube_ingress_nginx/vars.tf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
variable "nginx_ingress_helm_version" {
description = "The version of the nginx-ingress helm chart to deploy"
type = string
default = "4.10.0"
default = "4.11.3"
}

variable "max_replicas" {
Expand Down
2 changes: 1 addition & 1 deletion packages/infrastructure/kube_karpenter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ resource "helm_release" "karpenter" {
// during its sizing calculations by bumping vmMemoryOverheadPercent significantly.
// This ensures that new nodes can run the dynamically sized daemonsets
// as well as the other pods.
vmMemoryOverheadPercent = "0.3"
vmMemoryOverheadPercent = "0.2"
}

logConfig = {
Expand Down
2 changes: 1 addition & 1 deletion packages/infrastructure/kube_karpenter/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ variable "pull_through_cache_enabled" {
variable "log_level" {
description = "The log level for the karpenter pods"
type = string
default = "warn"
default = "info"
validation {
condition = contains(["info", "error", "warn", "debug"], var.log_level)
error_message = "Invalid log_level provided."
Expand Down
18 changes: 7 additions & 11 deletions packages/infrastructure/kube_karpenter_node_pools/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,7 @@ locals {
"r7a",
"r6g",
"r6i",
"r6a",
"x8g",
"x2gd",
"x2iedn"
"r6a"
]

// Blacklisted sizes
Expand Down Expand Up @@ -411,7 +408,7 @@ resource "kubectl_manifest" "burstable_node_pool" {
}
disruption = local.disruption_policy

weight = 10
weight = 25
}
})
server_side_apply = true
Expand Down Expand Up @@ -480,7 +477,7 @@ resource "kubectl_manifest" "burstable_arm_node_pool" {
}
disruption = local.disruption_policy

weight = 10
weight = 30
}
})
server_side_apply = true
Expand Down Expand Up @@ -546,7 +543,7 @@ resource "kubectl_manifest" "spot_node_pool" {
}
disruption = local.disruption_policy

weight = 10
weight = 15
}
})
server_side_apply = true
Expand Down Expand Up @@ -615,7 +612,7 @@ resource "kubectl_manifest" "spot_arm_node_pool" {
}
disruption = local.disruption_policy

weight = 10
weight = 20
}
})
server_side_apply = true
Expand Down Expand Up @@ -681,8 +678,7 @@ resource "kubectl_manifest" "on_demand_arm_node_pool" {
}
disruption = local.disruption_policy

// This should have the lowest preference
weight = 1
weight = 10
}
})
server_side_apply = true
Expand Down Expand Up @@ -748,7 +744,7 @@ resource "kubectl_manifest" "on_demand_node_pool" {
disruption = local.disruption_policy

// This should have the lowest preference
weight = 1
weight = 5
}
})
server_side_apply = true
Expand Down
2 changes: 1 addition & 1 deletion packages/reference/flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 34 additions & 16 deletions packages/website/src/app/changelog/edge/page.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,24 @@ Learn more [here](/docs/edge/guides/versioning/releases).*

* Prevents Karpenter from scheduling instances on bare metal instances which we have observed issues with.

* Upgrades [kube\_ingress\_nginx](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_nginx_ingress) so that it can run on nodes with a large number of CPU cores.

* EBS-backed PVs with many large files took a long time to mount due to [this issue](https://github.com/bottlerocket-os/bottlerocket/issues/3151)
with Bottlerocket OS (our underlying node OS). We have added the recommended remediation and now PVs should mount
nearly instantly. Note that this fix will **not** apply to existing PVs, only new ones.

To apply the fix to existing PVs, you will need to manually add the following mount option to their manifests:

```yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: XXXX
spec:
mountOptions:
- context="system_u:object_r:local_t:s0"
```
## edge.24-10-18
### Breaking Changes
Expand Down Expand Up @@ -82,39 +100,39 @@ Learn more [here](/docs/edge/guides/versioning/releases).*
see [our documentation.](/docs/main/guides/development-shell/customizing)

* `pf-get-version-hash` has been renamed to `pf-get-commit-hash` to better reflect what it does (get a commit hash given an arbitrary repo and [git ref](https://git-scm.com/book/ms/v2/Git-Internals-Git-References)).
In addition, it has been updated to take named rather than positioned arguments in order to align with other Panfactum scripts.
Finally, we have fixed several bugs in the script to make it more resilient to various inputs.
In addition, it has been updated to take named rather than positional arguments in order to align with other Panfactum scripts.
Finally, we have fixed several bugs in the script to make it more resilient to various inputs.

* Removes `pgadmin4` from the devShell as it significantly increased build times and was not useful to all users. Users
should have an option to pick their favorite DB clients rather than us be prescriptive.
should have an option to pick their favorite DB clients rather than us be prescriptive.

### Changes

* Upgrades [kube_cilium](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_cilium) to v1.16.3. This change
brings [new Cilium features](https://isovalent.com/blog/post/cilium-1-16), reduces the per-node memory usage by 75MB,
and reduces the amount of errors that users can encounter during the bootstrapping guide.
* Upgrades [kube\_cilium](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_cilium) to v1.16.3. This change
brings [new Cilium features](https://isovalent.com/blog/post/cilium-1-16), reduces the per-node memory usage by 75MB,
and reduces the amount of errors that users can encounter during the bootstrapping guide.

* Upgrades [kube_aws_ebs_csi](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_aws_ebs_csi) to v1.36 in order
to support Karpenter v1 disruption taints and improve node shutdown performance.
* Upgrades [kube\_aws\_ebs\_csi](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_aws_ebs_csi) to v1.36 in order
to support Karpenter v1 disruption taints and improve node shutdown performance.

* Updates [wf_dockerfile_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build) to support 10 concurrent image builds per module
rather than just one.
* Updates [wf\_dockerfile\_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build) to support 10 concurrent image builds per module
rather than just one.

### Added

* Adds `cdn_mode_enabled` boolean to the [kube_vault](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_vault) & [kube_authentik](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_authentik) module to enable CDN for Vault.
* Adds `image_tag_prefix` string to the [wf_dockerfile_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build)
* Adds `cdn_mode_enabled` boolean to the [kube\_vault](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_vault) & [kube\_authentik](/docs/edge/reference/infrastructure-modules/direct/kubernetes/kube_authentik) module to enable CDN for Vault.
* Adds `image_tag_prefix` string to the [wf\_dockerfile\_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build)

### Fixed

* Fixed a handful of scheduling constraint bugs that resulting in less than optimal resource utilization. These improvements
should result in a significant improvement to resource utilization in tiny clusters and a minor improvement in larger clusters.
* Fixed a handful of scheduling constraint bugs that resulted in less-than-optimal resource utilization. These improvements
should result in a significant improvement to resource utilization in tiny clusters and a minor improvement in larger clusters.

* Fixed an issue where `pf_stack_version` could not be a commit hash. It can now be any valid [git ref](https://git-scm.com/book/ms/v2/Git-Internals-Git-References).

* Fixed an issue where `pf-wf-git-checkout` would fail when given a branch name as a git ref. This impact both
[wf_tf_deploy](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_tf_deploy)
and [wf_dockerfile_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build).
[wf\_tf\_deploy](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_tf_deploy)
and [wf\_dockerfile\_build](/docs/edge/reference/infrastructure-modules/submodule/workflow/wf_dockerfile_build).

## edge.24-10-15

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Description: The version of the nginx-ingress helm chart to deploy

Type: `string`

Default: `"4.10.0"`
Default: `"4.11.3"`

### panfactum\_scheduler\_enabled

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Description: The log level for the karpenter pods

Type: `string`

Default: `"warn"`
Default: `"info"`

### monitoring\_enabled

Expand Down

0 comments on commit 062cb9c

Please sign in to comment.