diff --git a/README.md b/README.md index 424d52c..3f701df 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,15 @@ Those include: - external secrets - metrics to cloudwatch -## Upgrading module major version: - - from 2.x.x to 3.x.x version needs some manual actions as we upgraded underlying eks module from 18.x.x to 20.x.x, +## Upgrading guide: + - from <2.19.0 to >=2.19.0 version needs some manual actions as we upgraded underlying eks module from 18.x.x to 20.x.x, here you can find needed actions/changes docs and ready scripts which can be used: + docs: https://github.com/terraform-aws-modules/terraform-aws-eks/blob/master/docs/UPGRADE-19.0.md https://github.com/terraform-aws-modules/terraform-aws-eks/blob/master/docs/UPGRADE-20.0.md + params: + The node group create\_launch\_template=false and launch\_template\_name="" pair params have been replaced with use\_custom\_launch\_template=false + scripts: ```sh # commands to move some states, run before applying the `terraform apply` for new version terraform state mv "module..module.eks-cluster[0].module.eks-cluster.kubernetes_config_map_v1_data.aws_auth[0]" "module..module.eks-cluster[0].module.aws_auth_config_map.kubernetes_config_map_v1_data.aws_auth[0]" @@ -199,11 +203,11 @@ worker_groups = { } ``` -# karpenter enabled -# NOTES: -# - enabling karpenter automatically disables cluster auto-scaler -# - then enabling karpenter on existing old cluster there is possibility to see cycle-dependency error, to overcome this you need at first to apply main eks module change (`terraform apply --target "module..module.eks-cluster"`) and then rest of cluster-autoloader destroy and karpenter install onse -# - when destroying cluster which have karpenter enabled there is possibility of failure on karpenter resource removal, you need to run destruction one more time to get it complete +## karpenter enabled +### NOTES: +### - enabling karpenter automatically disables cluster auto-scaler +### - then enabling karpenter on existing old cluster there is possibility to see cycle-dependency error, to overcome this you need at first to apply main eks module change (`terraform apply --target "module..module.eks-cluster"`) and then rest of cluster-autoloader destroy and karpenter install onse +### - when destroying cluster which have karpenter enabled there is possibility of failure on karpenter resource removal, you need to run destruction one more time to get it complete ```terraform module "eks" { source = "dasmeta/eks/aws" diff --git a/main.tf b/main.tf index 4833fcb..cafbd8f 100644 --- a/main.tf +++ b/main.tf @@ -11,11 +11,15 @@ * - metrics to cloudwatch * * - * ## Upgrading module major version: - * - from 2.x.x to 3.x.x version needs some manual actions as we upgraded underlying eks module from 18.x.x to 20.x.x, + * ## Upgrading guide: + * - from <2.19.0 to >=2.19.0 version needs some manual actions as we upgraded underlying eks module from 18.x.x to 20.x.x, * here you can find needed actions/changes docs and ready scripts which can be used: + * docs: * https://github.com/terraform-aws-modules/terraform-aws-eks/blob/master/docs/UPGRADE-19.0.md * https://github.com/terraform-aws-modules/terraform-aws-eks/blob/master/docs/UPGRADE-20.0.md + * params: + * The node group create_launch_template=false and launch_template_name="" pair params have been replaced with use_custom_launch_template=false + * scripts: * ```sh * # commands to move some states, run before applying the `terraform apply` for new version * terraform state mv "module..module.eks-cluster[0].module.eks-cluster.kubernetes_config_map_v1_data.aws_auth[0]" "module..module.eks-cluster[0].module.aws_auth_config_map.kubernetes_config_map_v1_data.aws_auth[0]" @@ -202,11 +206,11 @@ * } * ``` * - * # karpenter enabled - * # NOTES: - * # - enabling karpenter automatically disables cluster auto-scaler - * # - then enabling karpenter on existing old cluster there is possibility to see cycle-dependency error, to overcome this you need at first to apply main eks module change (`terraform apply --target "module..module.eks-cluster"`) and then rest of cluster-autoloader destroy and karpenter install onse - * # - when destroying cluster which have karpenter enabled there is possibility of failure on karpenter resource removal, you need to run destruction one more time to get it complete + * ## karpenter enabled + * ### NOTES: + * ### - enabling karpenter automatically disables cluster auto-scaler + * ### - then enabling karpenter on existing old cluster there is possibility to see cycle-dependency error, to overcome this you need at first to apply main eks module change (`terraform apply --target "module..module.eks-cluster"`) and then rest of cluster-autoloader destroy and karpenter install onse + * ### - when destroying cluster which have karpenter enabled there is possibility of failure on karpenter resource removal, you need to run destruction one more time to get it complete * ```terraform * module "eks" { * source = "dasmeta/eks/aws" diff --git a/modules/aws-load-balancer-controller/iam-policy.json b/modules/aws-load-balancer-controller/iam-policy.json index 344ffb3..da7d3b5 100644 --- a/modules/aws-load-balancer-controller/iam-policy.json +++ b/modules/aws-load-balancer-controller/iam-policy.json @@ -27,7 +27,8 @@ "elasticloadbalancing:DescribeTargetGroupAttributes", "elasticloadbalancing:DescribeTargetHealth", "elasticloadbalancing:DescribeTags", - "elasticloadbalancing:AddTags" + "elasticloadbalancing:AddTags", + "elasticloadbalancing:DescribeListenerAttributes" ], "Resource": "*" }, diff --git a/modules/karpenter/README.md b/modules/karpenter/README.md index 625c8ff..01e0bb6 100644 --- a/modules/karpenter/README.md +++ b/modules/karpenter/README.md @@ -57,6 +57,7 @@ module "karpenter" { | Name | Source | Version | |------|--------|---------| +| [karpenter\_custom\_default\_configs\_merged](#module\_karpenter\_custom\_default\_configs\_merged) | cloudposse/config/yaml//modules/deepmerge | 1.0.2 | | [this](#module\_this) | terraform-aws-modules/eks/aws//modules/karpenter | 20.30.1 | ## Resources @@ -86,7 +87,7 @@ module "karpenter" { | [oidc\_provider\_arn](#input\_oidc\_provider\_arn) | EKC oidc provider arn in format 'arn:aws:iam:::oidc-provider/oidc.eks..amazonaws.com/id/'. | `string` | n/a | yes | | [resource\_chart\_version](#input\_resource\_chart\_version) | The dasmeta karpenter-resources chart version | `string` | `"0.1.0"` | no | | [resource\_configs](#input\_resource\_configs) | Configurations to pass and override default ones for karpenter-resources chart. Check the helm chart available configs here: https://github.com/dasmeta/helm/tree/karpenter-resources-0.1.0/charts/karpenter-resources | `any` | `{}` | no | -| [resource\_configs\_defaults](#input\_resource\_configs\_defaults) | Configurations to pass and override default ones for karpenter-resources chart. Check the helm chart available configs here: https://github.com/dasmeta/helm/tree/karpenter-resources-0.1.0/charts/karpenter-resources |
object({
nodeClass = optional(any, {
amiFamily = "AL2" # Amazon Linux 2
detailedMonitoring = true
metadataOptions = {
httpEndpoint = "enabled"
httpProtocolIPv6 = "disabled"
httpPutResponseHopLimit = 2 # This is changed to disable IMDS access from containers not on the host network
httpTokens = "required"
}
})
nodeClassRef = optional(any, {
group = "karpenter.k8s.aws"
kind = "EC2NodeClass"
name = "default"
}),
requirements = optional(any, [
{
key = "karpenter.k8s.aws/instance-cpu"
operator = "Lt"
values = ["9"] # <=8 core cpu nodes
},
{
key = "karpenter.k8s.aws/instance-memory"
operator = "Lt"
values = ["33000"] # <=32 Gb memory nodes
},
{
key = "karpenter.k8s.aws/instance-memory"
operator = "Gt"
values = ["1000"] # >1Gb Gb memory nodes
},
{
key = "karpenter.k8s.aws/instance-generation"
operator = "Gt"
values = ["2"] # generation of ec2 instances >2 (like t3a.medium) are more performance and effectiveness
},
{
key = "kubernetes.io/arch"
operator = "In"
values = ["amd64"] # amd64 linux is main platform arch we will use
},
{
key = "karpenter.sh/capacity-type"
operator = "In"
values = ["spot", "on-demand"] # both spot and on-demand nodes, it will look at first available spot and if no then on-demand
}
])
disruption = optional(any, {
consolidationPolicy = "WhenEmptyOrUnderutilized"
consolidateAfter = "1m"
}),
limits = optional(any, {
cpu = 10
})
})
| `{}` | no | +| [resource\_configs\_defaults](#input\_resource\_configs\_defaults) | Configurations to pass and override default ones for karpenter-resources chart. Check the helm chart available configs here: https://github.com/dasmeta/helm/tree/karpenter-resources-0.1.0/charts/karpenter-resources |
object({
nodeClass = optional(any, {
amiFamily = "AL2" # Amazon Linux 2
detailedMonitoring = true
metadataOptions = {
httpEndpoint = "enabled"
httpProtocolIPv6 = "disabled"
httpPutResponseHopLimit = 2 # This is changed to disable IMDS access from containers not on the host network
httpTokens = "required"
}
blockDeviceMappings = [
{
deviceName = "/dev/xvda"
ebs = {
volumeSize = "100Gi"
volumeType = "gp3"
encrypted = true
}
}
]
})
nodeClassRef = optional(any, {
group = "karpenter.k8s.aws"
kind = "EC2NodeClass"
name = "default"
}),
requirements = optional(any, [
{
key = "karpenter.k8s.aws/instance-cpu"
operator = "Lt"
values = ["9"] # <=8 core cpu nodes
},
{
key = "karpenter.k8s.aws/instance-memory"
operator = "Lt"
values = ["33000"] # <=32 Gb memory nodes
},
{
key = "karpenter.k8s.aws/instance-memory"
operator = "Gt"
values = ["1000"] # >1Gb Gb memory nodes
},
{
key = "karpenter.k8s.aws/instance-generation"
operator = "Gt"
values = ["2"] # generation of ec2 instances >2 (like t3a.medium) are more performance and effectiveness
},
{
key = "kubernetes.io/arch"
operator = "In"
values = ["amd64"] # amd64 linux is main platform arch we will use
},
{
key = "karpenter.sh/capacity-type"
operator = "In"
values = ["spot", "on-demand"] # both spot and on-demand nodes, it will look at first available spot and if no then on-demand
}
])
disruption = optional(any, {
consolidationPolicy = "WhenEmptyOrUnderutilized"
consolidateAfter = "1m"
}),
limits = optional(any, {
cpu = 10
})
})
| `{}` | no | | [subnet\_ids](#input\_subnet\_ids) | VPC subnet ids used for default Ec2NodeClass as subnet selector. | `list(string)` | n/a | yes | | [wait](#input\_wait) | Whether use helm deploy with --wait flag | `bool` | `true` | no | diff --git a/modules/karpenter/locals.tf b/modules/karpenter/locals.tf index 0ddf7b6..28f7c28 100644 --- a/modules/karpenter/locals.tf +++ b/modules/karpenter/locals.tf @@ -10,8 +10,9 @@ locals { amiSelectorTerms = [ { id = data.aws_instance.ec2_from_eks_node_pool.ami } ] - detailedMonitoring = var.resource_configs_defaults.nodeClass.detailedMonitoring - metadataOptions = var.resource_configs_defaults.nodeClass.metadataOptions + detailedMonitoring = var.resource_configs_defaults.nodeClass.detailedMonitoring + metadataOptions = var.resource_configs_defaults.nodeClass.metadataOptions + blockDeviceMappings = var.resource_configs_defaults.nodeClass.blockDeviceMappings } nodePoolDefaultNodeClassRef = var.resource_configs_defaults.nodeClassRef @@ -28,7 +29,7 @@ locals { }) }) disruption = merge(var.resource_configs_defaults.disruption, try(value.disruption, {})) - limits = merge(var.resource_configs_defaults.limits, try(value.limit, {})) + limits = merge(var.resource_configs_defaults.limits, try(value.limits, {})) } ) } } diff --git a/modules/karpenter/main.tf b/modules/karpenter/main.tf index 3c39fac..0bc2ebe 100644 --- a/modules/karpenter/main.tf +++ b/modules/karpenter/main.tf @@ -69,32 +69,7 @@ resource "helm_release" "this" { atomic = var.atomic wait = var.wait - values = [jsonencode(merge({ - serviceAccount = { - name = module.this.service_account - annotations = { - "eks.amazonaws.com/role-arn" = module.this.iam_role_arn - } - } - settings = { - clusterName = var.cluster_name - clusterEndpoint = var.cluster_endpoint - interruptionQueue = module.this.queue_name - featureGates = { - spotToSpotConsolidation = true - } - } - resources = { - requests = { - cpu = "100m" - memory = "256Mi" - } - limits = { - cpu = "100m" - memory = "256Mi" - } - } - }, var.configs))] + values = [jsonencode(module.karpenter_custom_default_configs_merged.merged)] } # allows to create karpenter crd resources such as NodeClasses, NodePools @@ -120,3 +95,35 @@ resource "helm_release" "karpenter_nodes" { depends_on = [helm_release.this] } + +module "karpenter_custom_default_configs_merged" { + source = "cloudposse/config/yaml//modules/deepmerge" + version = "1.0.2" + + maps = [ + { + serviceAccount = { + name = module.this.service_account + annotations = { + "eks.amazonaws.com/role-arn" = module.this.iam_role_arn + } + } + settings = { + clusterName = var.cluster_name + clusterEndpoint = var.cluster_endpoint + interruptionQueue = module.this.queue_name + } + resources = { + requests = { + cpu = "100m" + memory = "256Mi" + } + limits = { + cpu = "100m" + memory = "256Mi" + } + } + }, + var.configs + ] +} diff --git a/modules/karpenter/variables.tf b/modules/karpenter/variables.tf index 0748c62..cf43f8d 100644 --- a/modules/karpenter/variables.tf +++ b/modules/karpenter/variables.tf @@ -101,6 +101,16 @@ variable "resource_configs_defaults" { httpPutResponseHopLimit = 2 # This is changed to disable IMDS access from containers not on the host network httpTokens = "required" } + blockDeviceMappings = [ + { + deviceName = "/dev/xvda" + ebs = { + volumeSize = "100Gi" + volumeType = "gp3" + encrypted = true + } + } + ] }) nodeClassRef = optional(any, { group = "karpenter.k8s.aws"