diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..1b4f757c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "cSpell.words": [ + "Autoscaler", + "Kubernetes", + "Rebalance", + "Rebalancing", + "Tolerations" + ] +} \ No newline at end of file diff --git a/content/running_spark_apps_with_emr_on_spot_instances/cloud9-awscli.md b/content/running_spark_apps_with_emr_on_spot_instances/cloud9-awscli.md index 4893acdc..80810716 100644 --- a/content/running_spark_apps_with_emr_on_spot_instances/cloud9-awscli.md +++ b/content/running_spark_apps_with_emr_on_spot_instances/cloud9-awscli.md @@ -32,4 +32,4 @@ aws --version ```bash aws ec2 create-key-pair --key-name emr-workshop-key-pair --query "KeyMaterial" --output text > emr-workshop-key-pair.pem chmod 400 emr-workshop-key-pair.pem -``` \ No newline at end of file +``` diff --git a/content/using_ec2_spot_instances_with_eks/introduction/_index.md b/content/using_ec2_spot_instances_with_eks/005_introduction/_index.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/_index.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/_index.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/architecture/_index.md b/content/using_ec2_spot_instances_with_eks/005_introduction/architecture/_index.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/architecture/_index.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/architecture/_index.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/architecture/architecture_control.md b/content/using_ec2_spot_instances_with_eks/005_introduction/architecture/architecture_control.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/architecture/architecture_control.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/architecture/architecture_control.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/architecture/architecture_control_and_data_overview.md b/content/using_ec2_spot_instances_with_eks/005_introduction/architecture/architecture_control_and_data_overview.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/architecture/architecture_control_and_data_overview.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/architecture/architecture_control_and_data_overview.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/architecture/architecture_worker.md b/content/using_ec2_spot_instances_with_eks/005_introduction/architecture/architecture_worker.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/architecture/architecture_worker.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/architecture/architecture_worker.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/architecture/cluster_setup_options.md b/content/using_ec2_spot_instances_with_eks/005_introduction/architecture/cluster_setup_options.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/architecture/cluster_setup_options.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/architecture/cluster_setup_options.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/basics/_index.md b/content/using_ec2_spot_instances_with_eks/005_introduction/basics/_index.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/basics/_index.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/basics/_index.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_nodes.md b/content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_nodes.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_nodes.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_nodes.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_objects.md b/content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_objects.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_objects.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_objects.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_objects_details_1.md b/content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_objects_details_1.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_objects_details_1.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_objects_details_1.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_objects_details_2.md b/content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_objects_details_2.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/basics/concepts_objects_details_2.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/basics/concepts_objects_details_2.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/basics/what_is_k8s.md b/content/using_ec2_spot_instances_with_eks/005_introduction/basics/what_is_k8s.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/basics/what_is_k8s.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/basics/what_is_k8s.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/eks/_index.md b/content/using_ec2_spot_instances_with_eks/005_introduction/eks/_index.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/eks/_index.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/eks/_index.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/eks/eks_control_plane.md b/content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_control_plane.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/eks/eks_control_plane.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_control_plane.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/eks/eks_customers.md b/content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_customers.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/eks/eks_customers.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_customers.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/eks/eks_high_architecture.md b/content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_high_architecture.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/eks/eks_high_architecture.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_high_architecture.md diff --git a/content/using_ec2_spot_instances_with_eks/introduction/eks/eks_high_level.md b/content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_high_level.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/introduction/eks/eks_high_level.md rename to content/using_ec2_spot_instances_with_eks/005_introduction/eks/eks_high_level.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/_index.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/_index.md similarity index 52% rename from content/using_ec2_spot_instances_with_eks/prerequisites/_index.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/_index.md index c6bae9b7..b4c21b19 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/_index.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/_index.md @@ -1,5 +1,5 @@ --- -title: "Start the workshop..." +title: "Start the workshop" chapter: true weight: 10 --- @@ -7,7 +7,7 @@ weight: 10 # Getting Started To start the workshop, follow one of the following depending on whether you are... -* ...[running the workshop on your own (in your own account)]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md" >}}), or -* ...[attending an AWS hosted event (using AWS provided hashes)]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md" >}}) +* ...[running the workshop on your own (in your own account)]({{< ref "/using_ec2_spot_instances_with_eks/010_prerequisites/self_paced.md" >}}), or +* ...[attending an AWS hosted event (using AWS provided hashes)]({{< ref "/using_ec2_spot_instances_with_eks/010_prerequisites/aws_event.md" >}}) -Once you have completed with either setup, continue with **[Create a Workspace]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/workspace.md" >}})** +Once you have completed with either setup, continue with **[Create a Workspace]({{< ref "/using_ec2_spot_instances_with_eks/010_prerequisites/workspace.md" >}})** diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/ap-southeast-1.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/ap-southeast-1.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/ap-southeast-1.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/ap-southeast-1.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/at_an_aws_updateiam.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/at_an_aws_updateiam.md similarity index 74% rename from content/using_ec2_spot_instances_with_eks/prerequisites/at_an_aws_updateiam.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/at_an_aws_updateiam.md index 3c6808cf..469debd0 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/at_an_aws_updateiam.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/at_an_aws_updateiam.md @@ -7,7 +7,8 @@ hidden: true ## Attach the IAM role to your Workspace -1. Follow this [deep link to find your Cloud9 EC2 instance](https://console.aws.amazon.com/ec2/v2/home?#Instances:tag:Name=aws-cloud9-eksworkshop;sort=desc:launchTime). +1. Click the grey circle button (in top right corner) and select **Manage EC2 Instance**. +![cloud9Role](/images/using_ec2_spot_instances_with_eks/prerequisites/cloud9-role.png) 1. Select the instance, then choose **Actions / Security / Modify IAM role** ![c9instancerole](/images/using_ec2_spot_instances_with_eks/prerequisites/c9instancerole.png) 1. Choose **TeamRoleInstance** from the **IAM role** drop down, and select **Save** diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/at_an_aws_validaterole.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/at_an_aws_validaterole.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/at_an_aws_validaterole.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/at_an_aws_validaterole.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/attach_workspaceiam.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/attach_workspaceiam.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/attach_workspaceiam.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/attach_workspaceiam.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/aws_event.md similarity index 90% rename from content/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/aws_event.md index 9d592d81..6d96bb50 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/aws_event.md @@ -9,7 +9,7 @@ weight: 20 {{% notice warning %}} Only complete this section if you are at an AWS hosted event (such as re:Invent, Kubecon, Immersion Day, or any other event hosted by an AWS employee). If you -are running the workshop on your own, go to: [Start the workshop on your own]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md" >}}). +are running the workshop on your own, go to: [Start the workshop on your own]({{< ref "/using_ec2_spot_instances_with_eks/010_prerequisites/self_paced.md" >}}). {{% /notice %}} ### Login to the AWS Workshop Portal @@ -42,8 +42,6 @@ We have deployed the below resources required to get started with the workshop u + An EKS cluster with the name `eksworkshop-eksctl` and a [EKS managed node group](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html) with 2 on-demand instances. -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/eksctl/create_eks_cluster_eksctl_command.md" %}} - #### Use your resources @@ -65,9 +63,9 @@ you will notice additional Cloudformation stacks were also deployed which is the - Click on the url against `Cloud9IDE` from the outputs -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/prerequisites/workspace_at_launch.md" %}} +{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/010_prerequisites/workspace_at_launch.md" %}} -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/prerequisites/update_workspace_settings.md" %}} +{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/010_prerequisites/update_workspace_settings.md" %}} ### Validate the IAM role {#validate_iam} @@ -78,10 +76,10 @@ aws sts get-caller-identity ``` -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/prerequisites/at_an_aws_validaterole.md" %}} +{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/010_prerequisites/at_an_aws_validaterole.md" %}} {{% notice note %}} -Since we have already setup the prerequisites, **you can head straight to [Test the Cluster]({{< relref "/using_ec2_spot_instances_with_eks/eksctl/test.md" >}})** +Since we have already setup the prerequisites, **you can head straight to [Test the Cluster]({{< relref "/using_ec2_spot_instances_with_eks/020_eksctl/test.md" >}})** {{% /notice %}} {{% /expand%}} diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/awscli.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/awscli.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/awscli.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/awscli.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/clone.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/clone.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/clone.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/clone.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/eu-west-1.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/eu-west-1.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/eu-west-1.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/eu-west-1.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/k8stools.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/k8stools.md similarity index 97% rename from content/using_ec2_spot_instances_with_eks/prerequisites/k8stools.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/k8stools.md index 741b57eb..3caa708c 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/k8stools.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/k8stools.md @@ -16,7 +16,7 @@ for the download links.](https://docs.aws.amazon.com/eks/latest/userguide/gettin #### Install kubectl ``` -export KUBECTL_VERSION=v1.20.0 +export KUBECTL_VERSION=v1.21.2 sudo curl --silent --location -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl sudo chmod +x /usr/local/bin/kubectl ``` diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/on_your_own_updateiam.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/on_your_own_updateiam.md similarity index 87% rename from content/using_ec2_spot_instances_with_eks/prerequisites/on_your_own_updateiam.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/on_your_own_updateiam.md index 5afbf2a7..8617eb9a 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/on_your_own_updateiam.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/on_your_own_updateiam.md @@ -16,7 +16,8 @@ hidden: true ## Attach the IAM role to your Workspace -1. Follow this [deep link to find your Cloud9 EC2 instance](https://console.aws.amazon.com/ec2/v2/home?#Instances:tag:Name=aws-cloud9-eksworkshop;sort=desc:launchTime). +1. Click the grey circle button (in top right corner) and select **Manage EC2 Instance**. +![cloud9Role](/images/using_ec2_spot_instances_with_eks/prerequisites/cloud9-role.png) 1. Select the instance, then choose **Actions / Security / Modify IAM role** ![c9instancerole](/images/using_ec2_spot_instances_with_eks/prerequisites/c9instancerole.png) 1. Choose **eksworkshop-admin** from the **IAM role** drop down, and select **Save** diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/on_your_own_validaterole.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/on_your_own_validaterole.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/on_your_own_validaterole.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/on_your_own_validaterole.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/prerequisites.files/eks-spot-workshop-quickstart-cnf.yml b/content/using_ec2_spot_instances_with_eks/010_prerequisites/prerequisites.files/eks-spot-workshop-quickstart-cnf.yml similarity index 95% rename from content/using_ec2_spot_instances_with_eks/prerequisites/prerequisites.files/eks-spot-workshop-quickstart-cnf.yml rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/prerequisites.files/eks-spot-workshop-quickstart-cnf.yml index a94368a0..a103d5f1 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/prerequisites.files/eks-spot-workshop-quickstart-cnf.yml +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/prerequisites.files/eks-spot-workshop-quickstart-cnf.yml @@ -28,22 +28,22 @@ Parameters: C9KubectlVersion: Description: Cloud9 instance kubectl version Type: String - Default: v1.18.12 + Default: v1.21.2 ConstraintDescription: Must be a valid kubectl version C9KubectlVersionTEST: Description: Cloud9 instance kubectl version Type: String - Default: v1.18.12 + Default: v1.21.2 ConstraintDescription: Must be a valid kubectl version C9EKSctlVersion: Description: Cloud9 instance eksctl version Type: String - Default: 0.31.0 + Default: v0.68.0 ConstraintDescription: Must be a valid eksctl version EKSClusterVersion: Description: EKS Cluster Vesion Type: String - Default: 1.18 + Default: 1.21 ConstraintDescription: Must be a valid eks version EKSClusterName: Description: EKS Cluster Name @@ -277,7 +277,10 @@ Resources: - sudo mv -v /tmp/eksctl /usr/local/bin - sudo -H -u ec2-user /usr/local/bin/eksctl version - echo '=== Create EKS Cluster ===' - - !Sub 'sudo -H -u ec2-user /usr/local/bin/eksctl create cluster --version=${EKSClusterVersion} --name=${EKSClusterName} --node-private-networking --managed --nodes=2 --alb-ingress-access --region=${AWS::Region} --node-labels="lifecycle=OnDemand,intent=control-apps" --asg-access' + - wget -P /home/ec2-user/environment -O eksworkshop.yaml "https://raw.githubusercontent.com/awslabs/ec2-spot-workshops/master/content/using_ec2_spot_instances_with_eks/010_prerequisites/prerequisites.files/eksworkshop.yaml" + - sed 's/{AWS_REGION}/${AWS::Region}/' /home/ec2-user/environment/eksworkshop.yaml + - sed 's/{EKS_VERSION}/${EKSClusterVersion}/' /home/ec2-user/environment/eksworkshop.yaml + - !Sub 'sudo -H -u ec2-user /usr/local/bin/eksctl create cluster -f /home/ec2-user/environment/eksworkshop.yaml' - sudo -H -u ec2-user /usr/local/bin/kubectl get nodes C9BootstrapAssociation: diff --git a/content/using_ec2_spot_instances_with_eks/010_prerequisites/prerequisites.files/eksworkshop.yaml b/content/using_ec2_spot_instances_with_eks/010_prerequisites/prerequisites.files/eksworkshop.yaml new file mode 100644 index 00000000..e118cb9b --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/prerequisites.files/eksworkshop.yaml @@ -0,0 +1,35 @@ +--- +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: eksworkshop-eksctl + region: ${AWS_REGION} + version: ${EKS_VERSION} # example "1.21" + +managedNodeGroups: +- amiFamily: AmazonLinux2 + instanceType: m5.large + name: mng-od-m5large + desiredCapacity: 2 + maxSize: 3 + minSize: 0 + labels: + alpha.eksctl.io/cluster-name: eksworkshop-eksctl + alpha.eksctl.io/nodegroup-name: mng-od-m5large + intent: control-apps + tags: + alpha.eksctl.io/nodegroup-name: mng-od-m5large + alpha.eksctl.io/nodegroup-type: managed + k8s.io/cluster-autoscaler/node-template/label/intent: control-apps + ssh: + enableSsm: true + iam: + withAddonPolicies: + autoScaler: true + privateNetworking: true + +# To enable all of the control plane logs, uncomment below: +# cloudWatch: +# clusterLogging: +# enableTypes: ["*"] \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/self_paced.md similarity index 91% rename from content/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/self_paced.md index d1616205..1862d908 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/self_paced.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/self_paced.md @@ -5,7 +5,7 @@ weight: 10 --- {{% notice warning %}} -Only complete this section if you are running the workshop on your own. If you are at an AWS hosted event (such as re:Invent, Kubecon, Immersion Day, etc), go to [Start the workshop at an AWS event]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/aws_event.md" >}}). +Only complete this section if you are running the workshop on your own. If you are at an AWS hosted event (such as re:Invent, Kubecon, Immersion Day, etc), go to [Start the workshop at an AWS event]({{< ref "/using_ec2_spot_instances_with_eks/010_prerequisites/aws_event.md" >}}). {{% /notice %}} ### Running the workshop on your own @@ -34,4 +34,4 @@ as an IAM user with administrator access to the AWS account: ![Login URL](/images/using_ec2_spot_instances_with_eks/prerequisites/iam-4-save-url.png) -Once you have completed the step above, **you can head straight to [Create a Workspace]({{< ref "/using_ec2_spot_instances_with_eks/prerequisites/workspace.md" >}})** \ No newline at end of file +Once you have completed the step above, **you can head straight to [Create a Workspace]({{< ref "/using_ec2_spot_instances_with_eks/010_prerequisites/workspace.md" >}})** \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/sshkey.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/sshkey.md similarity index 98% rename from content/using_ec2_spot_instances_with_eks/prerequisites/sshkey.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/sshkey.md index dedeb8c6..2878c344 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/sshkey.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/sshkey.md @@ -10,7 +10,7 @@ Starting from here, when you see command to be entered such as below, you will e Please run this command to generate SSH Key in Cloud9. This key will be used on the worker node instances to allow ssh access if necessary. -```bash +``` ssh-keygen ``` @@ -20,6 +20,6 @@ Press `enter` 3 times to take the default choices Upload the public key to your EC2 region: -```bash +``` aws ec2 import-key-pair --key-name "eksworkshop" --public-key-material fileb://~/.ssh/id_rsa.pub ``` diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/update_workspace_settings.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/update_workspace_settings.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/update_workspace_settings.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/update_workspace_settings.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/update_workspaceiam.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/update_workspaceiam.md similarity index 86% rename from content/using_ec2_spot_instances_with_eks/prerequisites/update_workspaceiam.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/update_workspaceiam.md index ac0c0834..11fbdceb 100644 --- a/content/using_ec2_spot_instances_with_eks/prerequisites/update_workspaceiam.md +++ b/content/using_ec2_spot_instances_with_eks/010_prerequisites/update_workspaceiam.md @@ -4,7 +4,7 @@ chapter: false weight: 60 --- -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/prerequisites/update_workspace_settings.md" %}} +{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/010_prerequisites/update_workspace_settings.md" %}} We should configure our aws cli with our current region as default: ``` @@ -17,5 +17,5 @@ aws configure set default.region ${AWS_REGION} aws configure get default.region ``` -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/prerequisites/validate_workspace_role.md" %}} +{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/010_prerequisites/validate_workspace_role.md" %}} diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/us-east-1.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/us-east-1.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/us-east-1.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/us-east-1.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/us-east-2.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/us-east-2.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/us-east-2.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/us-east-2.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/us-west-2.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/us-west-2.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/us-west-2.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/us-west-2.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/validate_workspace_role.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/validate_workspace_role.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/validate_workspace_role.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/validate_workspace_role.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/workspace.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/workspace.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/workspace.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/workspace.md diff --git a/content/using_ec2_spot_instances_with_eks/prerequisites/workspace_at_launch.md b/content/using_ec2_spot_instances_with_eks/010_prerequisites/workspace_at_launch.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/prerequisites/workspace_at_launch.md rename to content/using_ec2_spot_instances_with_eks/010_prerequisites/workspace_at_launch.md diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/_index.md b/content/using_ec2_spot_instances_with_eks/020_eksctl/_index.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/eksctl/_index.md rename to content/using_ec2_spot_instances_with_eks/020_eksctl/_index.md diff --git a/content/using_ec2_spot_instances_with_eks/020_eksctl/create_eks_cluster_eksctl_command.md b/content/using_ec2_spot_instances_with_eks/020_eksctl/create_eks_cluster_eksctl_command.md new file mode 100644 index 00000000..1605c02d --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/020_eksctl/create_eks_cluster_eksctl_command.md @@ -0,0 +1,60 @@ +--- +title: "Create EKS cluster Command" +chapter: false +disableToc: true +hidden: true +--- + + +Create an eksctl deployment file (eksworkshop.yaml) to create an EKS cluster: + +``` +cat << EOF > eksworkshop.yaml +--- +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: eksworkshop-eksctl + region: ${AWS_REGION} + version: "1.21" + +managedNodeGroups: +- amiFamily: AmazonLinux2 + instanceType: m5.large + name: mng-od-m5large + desiredCapacity: 2 + maxSize: 3 + minSize: 0 + labels: + alpha.eksctl.io/cluster-name: eksworkshop-eksctl + alpha.eksctl.io/nodegroup-name: mng-od-m5large + intent: control-apps + tags: + alpha.eksctl.io/nodegroup-name: mng-od-m5large + alpha.eksctl.io/nodegroup-type: managed + k8s.io/cluster-autoscaler/node-template/label/intent: control-apps + iam: + withAddonPolicies: + autoScaler: true + privateNetworking: true + +# To enable all of the control plane logs, uncomment below: +# cloudWatch: +# clusterLogging: +# enableTypes: ["*"] + +EOF +``` + +Next, use the file you created as the input for the eksctl cluster creation. + +``` +eksctl create cluster -f eksworkshop.yaml +``` + +{{% notice info %}} +Launching EKS and all the dependencies will take approximately 15 minutes +{{% /notice %}} \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop-kubeflow.yml.template b/content/using_ec2_spot_instances_with_eks/020_eksctl/launcheks.files/eksworkshop-kubeflow.yml.template similarity index 100% rename from content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop-kubeflow.yml.template rename to content/using_ec2_spot_instances_with_eks/020_eksctl/launcheks.files/eksworkshop-kubeflow.yml.template diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop.yml.template b/content/using_ec2_spot_instances_with_eks/020_eksctl/launcheks.files/eksworkshop.yml.template similarity index 100% rename from content/using_ec2_spot_instances_with_eks/eksctl/launcheks.files/eksworkshop.yml.template rename to content/using_ec2_spot_instances_with_eks/020_eksctl/launcheks.files/eksworkshop.yml.template diff --git a/content/using_ec2_spot_instances_with_eks/020_eksctl/launcheks.md b/content/using_ec2_spot_instances_with_eks/020_eksctl/launcheks.md new file mode 100644 index 00000000..50280711 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/020_eksctl/launcheks.md @@ -0,0 +1,39 @@ +--- +title: "Launch EKS" +date: 2018-08-07T13:34:24-07:00 +weight: 20 +--- + + +{{% notice warning %}} +**DO NOT PROCEED** with this step unless you have [validated the IAM role]({{< relref "../010_prerequisites/update_workspaceiam.md#validate_iam" >}}) in use by the Cloud9 IDE. You will not be able to run the necessary kubectl commands in the later modules unless the EKS cluster is built using the IAM role. +{{% /notice %}} + +#### Challenge: +**How do I check the IAM role on the workspace?** + +{{%expand "Expand here to see the solution" %}} + +{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/010_prerequisites/validate_workspace_role.md" %}} + +If you do not see the correct role, please go back and **[validate the IAM role]({{< relref "../010_prerequisites/update_workspaceiam.md" >}})** for troubleshooting. + +If you do see the correct role, proceed to next step to create an EKS cluster. +{{% /expand %}} + + +### Create an EKS cluster + +{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/020_eksctl/create_eks_cluster_eksctl_command.md" %}} + +`eksctl create cluster` command allows you to create the cluster and managed nodegroups in sequence. There are a few things to note in the configuration that we just used to create the cluster and a managed nodegroup. + + * Nodegroup configurations are set under the **managedNodeGroups** section, this indicates that the node group is managed by EKS. + * Nodegroup instance type is **m5.large** with **minSize** to 0, **maxSize** to 5 and **desiredCapacity** to 2. This nodegroup has capacity type set to On-Demand Instances by default. + * Notice that the we add 3 node labels: + + * **alpha.eksctl.io/cluster-name**, to indicate the nodes belong to **eksworkshop-eksctl** cluster. + * **alpha.eksctl.io/nodegroup-name**, to indicate the nodes belong to **mng-od-m5large** nodegroup. + * **intent**, to allow you to deploy control applications on nodes that have been labeled with value **control-apps** + + * Amazon EKS adds an additional Kubernetes label **eks.amazonaws.com/capacityType: ON_DEMAND**, to all On-Demand Instances in your managed node group. You can use this label to schedule stateful applications on On-Demand nodes. \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/prerequisites.md b/content/using_ec2_spot_instances_with_eks/020_eksctl/prerequisites.md similarity index 93% rename from content/using_ec2_spot_instances_with_eks/eksctl/prerequisites.md rename to content/using_ec2_spot_instances_with_eks/020_eksctl/prerequisites.md index 12d82789..c7f3acb9 100644 --- a/content/using_ec2_spot_instances_with_eks/eksctl/prerequisites.md +++ b/content/using_ec2_spot_instances_with_eks/020_eksctl/prerequisites.md @@ -6,7 +6,7 @@ weight: 10 For this module, we need to download the [eksctl](https://eksctl.io/) binary: ``` -export EKSCTL_VERSION=0.51.0 +export EKSCTL_VERSION=v0.68.0 curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/${EKSCTL_VERSION}/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp sudo mv -v /tmp/eksctl /usr/local/bin diff --git a/content/using_ec2_spot_instances_with_eks/020_eksctl/test.md b/content/using_ec2_spot_instances_with_eks/020_eksctl/test.md new file mode 100644 index 00000000..c3398e39 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/020_eksctl/test.md @@ -0,0 +1,82 @@ +--- +title: "Test the Cluster" +date: 2018-08-07T13:36:57-07:00 +weight: 30 +--- +#### Test the cluster: +Confirm your Nodes, if we see 2 nodes then we know we have authenticated correctly: + +``` +kubectl get nodes +``` + +Export the Managed Group Worker Role Name for use throughout the workshop. + +{{% notice tip %}} +Some of the optional exercises may require you to add extra IAM policies to the managed group role +for the nodes to get access to services like Cloudwatch, AppMesh, X-Ray. You can always com back to this section or the environment variable `$ROLE_NAME` to refer to the role. +{{% /notice %}} + +``` +NODE_GROUP_NAME=$(eksctl get nodegroup --cluster eksworkshop-eksctl -o json | jq -r '.[].Name') +ROLE_NAME=$(aws eks describe-nodegroup --cluster-name eksworkshop-eksctl --nodegroup-name $NODE_GROUP_NAME | jq -r '.nodegroup["nodeRole"]' | cut -f2 -d/) +echo "export ROLE_NAME=${ROLE_NAME}" >> ~/.bash_profile +``` + +#### Congratulations! + +You now have a fully working Amazon EKS Cluster that is ready to use! + +{{% notice tip %}} +Explore the Elastic Kubernetes Service (EKS) section in the AWS Console and the properties of the newly created EKS cluster. +{{% /notice %}} + +{{% notice warning %}} +You might see **Error loading Namespaces** while exploring the cluster on the AWS Console. It could be because the console user role doesnt have necessary permissions on the EKS cluster's RBAC configuration in the control plane. Please expand and follow the below instructions to add necessary permissions. +{{% /notice %}} + +{{%expand "Click to reveal detailed instructions" %}} + +### Console Credentials + +The EKS console allows you to see not only the configuration aspects of your cluster, but also to view Kubernetes cluster objects such as Deployments, Pods, and Nodes. For this type of access, the console IAM User or Role needs to be granted permission within the cluster. + +By default, the credentials used to create the cluster are automatically granted these permissions. Following along in the workshop, you've created a cluster using temporary IAM credentials from within Cloud9. This means that you'll need to add your AWS Console credentials to the cluster. + +#### Import your EKS Console credentials to your new cluster: + +IAM Users and Roles are bound to an EKS Kubernetes cluster via a ConfigMap named `aws-auth`. We can use `eksctl` to do this with one command. + +You'll need to determine the correct credential to add for your AWS Console access. If you know this already, you can skip ahead to the `eksctl create iamidentitymapping` step below. + +If you've built your cluster from Cloud9 as part of this tutorial, invoke the following within your environment to determine your IAM Role or User ARN. + +``` +c9builder=$(aws cloud9 describe-environment-memberships --environment-id=$C9_PID | jq -r '.memberships[].userArn') +if echo ${c9builder} | grep -q user; then + rolearn=${c9builder} + echo Role ARN: ${rolearn} +elif echo ${c9builder} | grep -q assumed-role; then + assumedrolename=$(echo ${c9builder} | awk -F/ '{print $(NF-1)}') + rolearn=$(aws iam get-role --role-name ${assumedrolename} --query Role.Arn --output text) + echo Role ARN: ${rolearn} +fi +``` + +With your ARN in hand, you can issue the command to create the identity mapping within the cluster. + +``` +eksctl create iamidentitymapping --cluster eksworkshop-eksctl --arn ${rolearn} --group system:masters --username admin +``` + +Note that permissions can be restricted and granular but as this is a workshop cluster, you're adding your console credentials as administrator. + +Now you can verify your entry in the AWS auth map within the console. + +``` +kubectl describe configmap -n kube-system aws-auth +``` + +Now you're all set to move on. For more information, check out the [EKS documentation](https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html) on this topic. + +{{% /expand%}} diff --git a/content/using_ec2_spot_instances_with_eks/030_k8s_tools/_index.md b/content/using_ec2_spot_instances_with_eks/030_k8s_tools/_index.md new file mode 100644 index 00000000..0d816140 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/030_k8s_tools/_index.md @@ -0,0 +1,9 @@ +--- +title: "Install Kubernetes Tools" +chapter: true +weight: 30 +--- + +# Install Kubernetes tools + +In this chapter we will install Kubernetes tools; Helm (package manager), Metric Server (resource usage data aggregator) and Kube-Ops-View (cluster visualization tool). diff --git a/content/using_ec2_spot_instances_with_eks/helm_root/deploy_metric_server.md b/content/using_ec2_spot_instances_with_eks/030_k8s_tools/deploy_metric_server.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/helm_root/deploy_metric_server.md rename to content/using_ec2_spot_instances_with_eks/030_k8s_tools/deploy_metric_server.md diff --git a/content/using_ec2_spot_instances_with_eks/helm_root/helm_deploy.md b/content/using_ec2_spot_instances_with_eks/030_k8s_tools/helm_deploy.md similarity index 56% rename from content/using_ec2_spot_instances_with_eks/helm_root/helm_deploy.md rename to content/using_ec2_spot_instances_with_eks/030_k8s_tools/helm_deploy.md index 9281d637..132fe9fe 100644 --- a/content/using_ec2_spot_instances_with_eks/helm_root/helm_deploy.md +++ b/content/using_ec2_spot_instances_with_eks/030_k8s_tools/helm_deploy.md @@ -4,18 +4,33 @@ date: 2018-08-07T08:30:11-07:00 weight: 10 --- +## Kubernetes Helm + +[Helm](https://helm.sh/) is a package manager for Kubernetes that packages multiple Kubernetes resources into a single logical deployment unit called **Chart**. + +Helm is a tool that streamlines installing and managing Kubernetes applications. Think of it like apt/yum/homebrew for Kubernetes. We will use Helm during the workshop to install other components out from the list of available charts. + +Helm helps you to: + +- Achieve a simple (one command) and repeatable deployment +- Manage application dependency, using specific versions of other application and services +- Manage multiple deployment configurations: test, staging, production and others +- Execute post/pre deployment jobs during application deployment +- Update/rollback and test application deployments + + ## Install the Helm CLI Before we can get started configuring Helm, we'll need to first install the command line tools that you will interact with. To do this, run the following: -```sh +``` curl -sSL https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash ``` We can verify the version -```sh +``` helm version --short ``` @@ -25,20 +40,20 @@ Homebrew on macOS. Download the `stable` repository so we have something to start with: -```sh +``` helm repo add stable https://charts.helm.sh/stable/ helm repo update ``` Once this is installed, we will be able to list the charts you can install: -```sh +``` helm search repo stable ``` Finally, let's configure Bash completion for the `helm` command: -```sh +``` helm completion bash >> ~/.bash_completion . /etc/profile.d/bash_completion.sh . ~/.bash_completion diff --git a/content/using_ec2_spot_instances_with_eks/helm_root/install_kube_ops_view.md b/content/using_ec2_spot_instances_with_eks/030_k8s_tools/install_kube_ops_view.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/helm_root/install_kube_ops_view.md rename to content/using_ec2_spot_instances_with_eks/030_k8s_tools/install_kube_ops_view.md diff --git a/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/_index.md b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/_index.md new file mode 100644 index 00000000..8d3fdbec --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/_index.md @@ -0,0 +1,12 @@ +--- +title: "Add EKS managed Spot workers" +chapter: true +weight: 40 +draft: false +--- + +# Add EKS managed Spot workers + +In this module, you will learn how to provision, manage, and maintain your Kubernetes clusters with Amazon EKS managed node groups with Spot capacity to optimize cost and scale. + +![Title Image](/images/using_ec2_spot_instances_with_eks/spotworkers/eks_spot_managed_architecture.png) diff --git a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/selecting_instance_types.md b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/selecting_instance_types.md similarity index 59% rename from content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/selecting_instance_types.md rename to content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/selecting_instance_types.md index 5b9aaac9..255e1067 100644 --- a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/selecting_instance_types.md +++ b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/selecting_instance_types.md @@ -1,5 +1,5 @@ --- -title: "Selecting Instance Types" +title: "Select Instance Types for Diversification" date: 2018-08-07T11:05:19-07:00 weight: 20 draft: false @@ -7,35 +7,35 @@ draft: false [Amazon EC2 Spot Instances](https://aws.amazon.com/ec2/spot/) offer spare compute capacity available in the AWS Cloud at steep discounts compared to On-Demand prices. EC2 can interrupt Spot Instances with two minutes of notification when EC2 needs the capacity back. You can use Spot Instances for various fault-tolerant and flexible applications. Some examples are analytics, containerized workloads, high-performance computing (HPC), stateless web servers, rendering, CI/CD, and other test and development workloads. -One of the best practices to successfully adopt Spot Instances is to implement **Spot Instance diversification** as part of your configuration. Spot Instance diversification helps to procure capacity from multiple Spot Instance pools, both for scaling up and for replacing Spot Instances that may receive a Spot Instance termination notification. A Spot Instance pool is a set of unused EC2 instances with the same instance type (for example, m5.large), operating system, Availability Zone. +One of the best practices to successfully adopt Spot Instances is to implement **Spot Instance diversification** as part of your configuration. Spot Instance diversification helps to procure capacity from multiple Spot Instance pools, both for scaling up and for replacing Spot Instances that may receive a Spot Instance termination notification. A Spot Instance pool is a set of unused EC2 instances with the same Instance type, operating system and Availability Zone (for example, m5.large on Red Hat Enterprise Linux in us-east-1a). ### Cluster Autoscaler And Spot Instance Diversification Cluster Autoscaler is a tool that automatically adjusts the size of the Kubernetes cluster when there are pods that fail to run in the cluster due to insufficient resources (Scale Out) or there are nodes in the cluster that have been underutilized for a period of time (Scale In). {{% notice info %}} -When using Spot Instances with [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) there are a few things that [should be considered](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md). For example Cluster Autoscaler makes the assumption that all nodes within a nodegroup will have the same number of vCPUs and Memory. +When using Spot Instances with [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) there are a few things that [should be considered](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md). One key consideration is, each Auto Scaling group should be composed of instance types that provide approximately equal capacity. Cluster Autoscaler will attempt to determine the CPU, memory, and GPU resources provided by an Auto Scaling Group based on first override provided in an ASG's Mixed Instances Policy. If any such overrides are found, only the first instance type found will be used. See [Using Mixed Instances Policies and Spot Instances](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#Using-Mixed-Instances-Policies-and-Spot-Instances) for details. {{% /notice %}} -When applying Spot Diversification best practices to EKS and K8s clusters, using Cluster Autoscaler to dynamically scale capacity, we must implement diversification in a way that adheres to Cluster Autoscaler expected operational mode. In this workshop we will assume that our cluster nodegroups should be provisioned with instance types that adhere to a 1vCPU:4GB RAM ratio. +When applying Spot Diversification best practices to EKS and K8s clusters, using Cluster Autoscaler to dynamically scale capacity, we must implement diversification in a way that adheres to Cluster Autoscaler expected operational mode. In this workshop we will assume that our cluster node groups should be provisioned with instance types that adhere to a 1vCPU:4GB RAM ratio. We can diversify Spot Instance pools using two strategies: - - By creating multiple node groups, each of different sizes. For example a node group of size 4VCPU's and 16GB Ram, and another node group of 8vCPU's and 32GB Ram. + - By creating multiple node groups, each of different sizes. For example, a node group of size 4 vCPUs and 16GB RAM, and another node group of 8 vCPUs and 32GB RAM. - - By Implementing instance diversification within the node groups, by selecting a mix of instance types and families from different Spot Instance pools that meet the same vCPU's and memory criteria. + - By Implementing instance diversification within the node groups, by selecting a mix of instance types and families from different Spot Instance pools that meet the same vCPUs and memory criteria. -Our goal in this workshop, is to create at least 2 diversified groups of instances that adhere the 1vCPU:4GB RAM ratio. +Our goal in this workshop, is to create 2 diversified node groups that adhere the 1 vCPU:4 GB RAM ratio. We will use **[amazon-ec2-instance-selector](https://github.com/aws/amazon-ec2-instance-selector)** to help us select the relevant instance -types and familes with sufficient number of vCPUs and RAM. +types and families with sufficient number of vCPUs and RAM. -There are over 350 different instance types available on EC2 which can make the process of selecting appropriate instance types difficult. **[amazon-ec2-instance-selector](https://github.com/aws/amazon-ec2-instance-selector)** helps you select compatible instance types for your application to run on. The command line interface can be passed resource criteria like vcpus, memory, network performance, and much more and then return the available, matching instance types. +There are over 350 different instance types available on EC2 which can make the process of selecting appropriate instance types difficult. amazon-ec2-instance-selector helps you select compatible instance types for your application to run on. The command line interface can be passed resource criteria like vcpus, memory, network performance, and much more and then return the available, matching instance types. Let's first install **amazon-ec2-instance-selector** : ``` -curl -Lo ec2-instance-selector https://github.com/aws/amazon-ec2-instance-selector/releases/download/v1.3.0/ec2-instance-selector-`uname | tr '[:upper:]' '[:lower:]'`-amd64 && chmod +x ec2-instance-selector +curl -Lo ec2-instance-selector https://github.com/aws/amazon-ec2-instance-selector/releases/download/v2.0.3/ec2-instance-selector-`uname | tr '[:upper:]' '[:lower:]'`-amd64 && chmod +x ec2-instance-selector sudo mv ec2-instance-selector /usr/local/bin/ ec2-instance-selector --version ``` @@ -43,51 +43,64 @@ ec2-instance-selector --version Now that you have ec2-instance-selector installed, you can run `ec2-instance-selector --help` to understand how you could use it for selecting instances that match your workload requirements. For the purpose of this workshop -we need to first get a group of instances that meet the 4vCPUs and 16GB of RAM. +we need to first get a group of instances that meet the 4 vCPUs and 16 GB of RAM. Run the following command to get the list of instances. -```bash -ec2-instance-selector --vcpus 4 --memory 16384 --gpus 0 --current-generation -a x86_64 --deny-list '.*n.*' +{{% notice note %}} +The results might differ if you created Cloud9 in any other region than the five regions (N. Virginia, Oregon, Ireland, Ohio and Singapore) suggested in [Create a Workspace]({{< relref "/using_ec2_spot_instances_with_eks/010_prerequisites/workspace" >}}) chapter. We will use **`--deny-list`** for filtering out the instances that are not supported across these five regions. +{{% /notice %}} + +``` +ec2-instance-selector --vcpus 4 --memory 16 --gpus 0 --current-generation -a x86_64 --deny-list '.*d3en.*' ``` -This should display a list like the one that follows (note results might differ depending on the region). We will use this instances as part of one of our node groups. +This should display a list like the one that follows . We will use this instances as part of one of our node groups. + + ``` m4.xlarge m5.xlarge m5a.xlarge +m5ad.xlarge m5d.xlarge +m5dn.xlarge +m5n.xlarge +m5zn.xlarge +m6i.xlarge t2.xlarge t3.xlarge -t3a.xlarge +t3a.xlarge ``` -Internally ec2-instance-selector is making calls to the [DescribeInstanceTypes](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstanceTypes.html) for the specific region and filtering the intstances based on the criteria selected in the command line, in our case we did filter for instances that meet the following criteria: +Internally ec2-instance-selector is making calls to the [DescribeInstanceTypes](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstanceTypes.html) for the specific region and filtering the instances based on the criteria selected in the command line, in our case we did filter for instances that meet the following criteria: + * Instances with no GPUs * of x86_64 Architecture (no ARM instances like A1 or m6g instances for example) - * Instances that have 4 vCPUs and 16GB of Ram + * Instances that have 4 vCPUs and 16 GB of RAM * Instances of current generation (4th gen onwards) - * Instances that don't meet the regular expresion `.*n.*`, so effectively m5n, m5dn. + * Instances that don’t meet the regular expression .*d3en.*, so effectively d3en. {{% notice warning %}} Your workload may have other constraints that you should consider when selecting instance types. For example. **t2** and **t3** instance types are [burstable instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/burstable-performance-instances.html) and might not be appropriate for CPU bound workloads that require CPU execution determinism. Instances such as m5**a** are [AMD Instances](https://aws.amazon.com/ec2/amd/), if your workload is sensitive to numerical differences (i.e: financial risk calculations, industrial simulations) mixing these instance types might not be appropriate. {{% /notice %}} {{% notice note %}} -You are encouraged to test what are the options that `ec2-instance-selector` provides and run a few commands with it to familiarize yourself with the tool. -For example, try running the same commands as you did before with the extra parameter **`--output table-wide`**. +You are encouraged to test what are the options that `ec2-instance-selector` provides and run a few commands with it to familiarize yourself with the tool.
+
+For example, try running the same commands as you did before with the extra parameters, like **`--output table-wide`** for a tabular view. {{% /notice %}} ### Challenge -Find out another group that adheres to a 1vCPU:4GB ratio, this time using instances with 8vCPU's and 32GB of RAM. +Find out another group that adheres to a 1 vCPU:4 GB ratio, this time using instances with 8 vCPUs and 32 GB of RAM. {{%expand "Expand this for an example on the list of instances" %}} That should be easy. You can run the command: -```bash -ec2-instance-selector --vcpus 8 --memory 32768 --gpus 0 --current-generation -a x86_64 --deny-list '.*n.*|.*h.*' +``` +ec2-instance-selector --vcpus 8 --memory 32 --gpus 0 --current-generation -a x86_64 --deny-list '.*d3en.*' ``` which should yield a list as follows @@ -96,7 +109,12 @@ which should yield a list as follows m4.2xlarge m5.2xlarge m5a.2xlarge +m5ad.2xlarge m5d.2xlarge +m5dn.2xlarge +m5n.2xlarge +m5zn.2xlarge +m6i.2xlarge t2.2xlarge t3.2xlarge t3a.2xlarge diff --git a/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/spotlifecycle.md b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/spotlifecycle.md new file mode 100644 index 00000000..b51e6399 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/spotlifecycle.md @@ -0,0 +1,33 @@ +--- +title: "Spot Best Practices and Interruption Handling" +date: 2021-04-07T12:00:00-00:00 +weight: 40 +draft: false +--- + +### View EKS managed node groups Configurations + +Use the AWS Management Console to inspect the managed node groups deployed in your Kubernetes cluster. + +* Go to **Elastic Kubernetes Service** >> click on **Clusters** >> select **eksworkshop-eksctl** cluster >> select **Configuration** tab >> go to **Compute** tab in the bottom pane. +* You can see 3 node groups created; one On-Demand node group and two Spot node groups. +* Click on **mng-spot-4vcpu-16gb** node group and you can see the instance types we selected in earlier section. +* Click on the Auto Scaling Group name in the **Details** tab. Scroll to the Purchase options and instance types settings. Note how Spot best practices are applied out of the box: + * **Capacity Optimized** allocation strategy, which will launch Spot Instances from the most-available spare capacity pools. This results in minimizing the Spot Interruptions. + * **Capacity Rebalance** helps EKS managed node groups manage the lifecycle of the Spot Instance by proactively replacing instances that are at higher risk of being interrupted. Node groups use Auto Scaling Group's Capacity Rebalance feature to launch replacement nodes in response to Rebalance Recommendation notice, thus proactively maintaining desired node capacity. + +![Spot Best Practices](/images/using_ec2_spot_instances_with_eks/spotworkers/asg_spot_best_practices.png) + +### Interruption Handling in EKS managed node groups with Spot capacity + +To handle Spot interruptions, you do not need to install any extra automation tools on the cluster such as the AWS Node Termination Handler. A managed node group configures an Amazon EC2 Auto Scaling group on your behalf and handles the Spot interruption in following manner: + +* Amazon EC2 Spot Capacity Rebalancing is enabled so that Amazon EKS can gracefully drain and rebalance your Spot nodes to minimize application disruption when a Spot node is at elevated risk of interruption. For more information, see [Amazon EC2 Auto Scaling Capacity Rebalancing](https://docs.aws.amazon.com/autoscaling/ec2/userguide/capacity-rebalance.html) in the Amazon EC2 Auto Scaling User Guide. + +* When a replacement Spot node is bootstrapped and in the Ready state on Kubernetes, Amazon EKS cordons and drains the Spot node that received the rebalance recommendation. Cordoning the Spot node ensures that the node is marked as unschedulable and kube-scheduler will not schedule any new pods on it. It also removes it from its list of healthy, active Spot nodes. [Draining](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/) the Spot node ensures that running pods are evicted gracefully. + +* If a Spot two-minute interruption notice arrives before the replacement Spot node is in a Ready state, Amazon EKS starts draining the Spot node that received the rebalance recommendation. + +This process avoids waiting for replacement Spot node till Spot interruption arrives, instead it procures replacement in advance and helps in minimizing the scheduling time for pending pods. + +![Spot Rebalance Recommendation](/images/using_ec2_spot_instances_with_eks/spotworkers/rebalance_recommendation.png) \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/why_select_managed_node_group.md b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/why_select_managed_node_group.md new file mode 100644 index 00000000..9a45fb02 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/why_select_managed_node_group.md @@ -0,0 +1,27 @@ +--- +title: "EKS managed node groups" +date: 2018-08-07T11:05:19-07:00 +weight: 10 +draft: false +--- + +[Amazon EKS managed node groups](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html) automate the provisioning and lifecycle management of nodes (Amazon EC2 instances) for Amazon EKS clusters. This greatly simplifies operational activities such as rolling updates for new AMIs or Kubernetes version deployments. + +Advantages of running Amazon EKS managed node groups: + +* Create, automatically update, or terminate nodes with a single operation using the Amazon EKS console, eksctl, AWS CLI, AWS API, or infrastructure as code tools including AWS CloudFormation. +* Provisioned nodes run using the latest Amazon EKS optimized AMIs. +* Nodes provisioned under managed node group are automatically tagged for auto-discovery by the Kubernetes cluster autoscaler via node labels: **k8s.io/cluster-autoscaler/enabled=true** and **k8s.io/cluster-autoscaler/** +* Node updates and terminations automatically and gracefully drain nodes to ensure that your applications stay available. +* No additional costs to use Amazon EKS managed node groups, pay only for the AWS resources provisioned. + +### EKS managed node groups with Spot capacity + +Amazon EKS managed node groups with Spot capacity enhances the managed node group experience with ease to provision and manage EC2 Spot Instances. EKS managed node groups launch an EC2 Auto Scaling group with Spot best practices and handle [Spot Instance interruptions](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-interruptions.html) automatically. This enables you to take advantage of the steep savings that Spot Instances provide for your interruption tolerant containerized applications. + +In addition to the advantages of managed node groups, EKS managed node groups with Spot capacity have these additional advantages: + +* Allocation strategy to provision Spot capacity is set to [Capacity Optimized](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-best-practices.html#use-capacity-optimized-allocation-strategy) to ensure that Spot nodes are provisioned in the optimal Spot capacity pools. +* Specify [multiple instance types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-best-practices.html#be-instance-type-flexible) during managed node groups creation, to increase the number of Spot capacity pools available for allocating capacity. +* Nodes provisioned under managed node groups with Spot capacity are automatically tagged with capacity type: **eks.amazonaws.com/capacityType: SPOT**. You can use this label to schedule fault tolerant applications on Spot nodes. +* Amazon EC2 Spot [Capacity Rebalancing](https://docs.aws.amazon.com/autoscaling/ec2/userguide/ec2-auto-scaling-capacity-rebalancing.html) enabled to ensure Amazon EKS can gracefully drain and rebalance your Spot nodes to minimize application disruption when a Spot node is at elevated risk of interruption. \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/workers_eksctl.md b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/workers_eksctl.md new file mode 100644 index 00000000..424390b9 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/workers_eksctl.md @@ -0,0 +1,185 @@ +--- +title: "Create EKS managed node groups with Spot capacity" +date: 2018-08-07T11:05:19-07:00 +weight: 30 +draft: false +--- + +In this section we will deploy the instance types we selected in previous chapter and create managed node groups that adhere to Spot diversification best practices. We will use **[`eksctl create nodegroup`](https://eksctl.io/usage/managing-nodegroups/)** to achieve this. + +Let's first create the configuration file: + +``` +cat << EOF > add-mngs-spot.yaml +--- +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +managedNodeGroups: +- name: mng-spot-4vcpu-16gb + amiFamily: AmazonLinux2 + desiredCapacity: 2 + minSize: 0 + maxSize: 4 + spot: true + instanceTypes: + - m4.xlarge + - m5.xlarge + - m5a.xlarge + - m5ad.xlarge + - m5d.xlarge + - m5dn.xlarge + - m5n.xlarge + - m5zn.xlarge + - m6i.xlarge + - t2.xlarge + - t3.xlarge + - t3a.xlarge + iam: + withAddonPolicies: + autoScaler: true + privateNetworking: true + taints: + - key: spotInstance + value: "true" + effect: PreferNoSchedule + labels: + alpha.eksctl.io/cluster-name: eksworkshop-eksctl + alpha.eksctl.io/nodegroup-name: mng-spot-4vcpu-16gb + intent: apps + tags: + alpha.eksctl.io/nodegroup-name: mng-spot-4vcpu-16gb + alpha.eksctl.io/nodegroup-type: managed + k8s.io/cluster-autoscaler/node-template/label/intent: apps + k8s.io/cluster-autoscaler/node-template/taint/spotInstance: "true:PreferNoSchedule" + +- name: mng-spot-8vcpu-32gb + amiFamily: AmazonLinux2 + desiredCapacity: 1 + minSize: 0 + maxSize: 2 + spot: true + instanceTypes: + - m4.2xlarge + - m5.2xlarge + - m5a.2xlarge + - m5ad.2xlarge + - m5d.2xlarge + - m5dn.2xlarge + - m5n.2xlarge + - m5zn.2xlarge + - m6i.2xlarge + - t2.2xlarge + - t3.2xlarge + - t3a.2xlarge + iam: + withAddonPolicies: + autoScaler: true + privateNetworking: true + taints: + - key: spotInstance + value: "true" + effect: PreferNoSchedule + labels: + alpha.eksctl.io/cluster-name: eksworkshop-eksctl + alpha.eksctl.io/nodegroup-name: mng-spot-8vcpu-32gb + intent: apps + tags: + alpha.eksctl.io/nodegroup-name: mng-spot-8vcpu-32gb + alpha.eksctl.io/nodegroup-type: managed + k8s.io/cluster-autoscaler/node-template/label/intent: apps + k8s.io/cluster-autoscaler/node-template/taint/spotInstance: "true:PreferNoSchedule" + +metadata: + name: eksworkshop-eksctl + region: ${AWS_REGION} + version: "1.21" + +EOF +``` +Create new EKS managed node groups with Spot Instances. + +``` +eksctl create nodegroup --config-file=add-mngs-spot.yaml +``` +{{% notice info %}} +Creation of node groups will take 3-4 minutes. +{{% /notice %}} + + +There are a few things to note in the configuration that we just used to create these node groups. + + * Node groups configurations are set under the **managedNodeGroups** section, this indicates that the node groups are managed by EKS. + * First node group has **xlarge** (4 vCPU and 16 GB) instance types with **minSize** 0, **maxSize** 4 and **desiredCapacity** 2. + * Second node group has **2xlarge** (8 vCPU and 32 GB) instance types with **minSize** 0, **maxSize** 2 and **desiredCapacity** 1. + * The configuration **spot: true** indicates that the node group being created is a EKS managed node group with Spot capacity. + * We applied a **[Taint](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)** `spotInstance: "true:PreferNoSchedule"`. **PreferNoSchedule** is used to indicate we prefer pods not be scheduled on Spot Instances. This is a “preference” or “soft” version of **NoSchedule** – the system will try to avoid placing a pod that does not tolerate the taint on the node, but it is not required. + * Notice that the we added 3 node labels per node: + + * **alpha.eksctl.io/cluster-name**, to indicate the nodes belong to **eksworkshop-eksctl** cluster. + * **alpha.eksctl.io/nodegroup-name**, to indicate the nodes belong to **mng-spot-4vcpu-16gb** or **mng-spot-8vcpu-32gb** node groups. + * **intent**, to allow you to deploy stateless applications on nodes that have been labeled with value **apps** + + * Notice that the we added 2 cluster autoscaler related tags to node groups: + * **k8s.io/cluster-autoscaler/node-template/label/intent** and **k8s.io/cluster-autoscaler/node-template/taint** are used by cluster autoscaler when node groups scale down to 0 (and scale up from 0). Cluster autoscaler acts on Auto Scaling groups belonging to node groups, therefore it requires same tags on ASG as well. Currently managed node groups do not auto propagate tags to ASG, see this [open issue](https://github.com/aws/containers-roadmap/issues/1524). Therefore, we will be adding these tags to ASG manually. + +Let's add these tags to Auto Scaling groups of each node group using AWS cli. + +``` +ASG_4VCPU_16GB=$(eksctl get nodegroup -n mng-spot-4vcpu-16gb --cluster eksworkshop-eksctl -o json | jq -r '.[].AutoScalingGroupName') +ASG_8VCPU_32GB=$(eksctl get nodegroup -n mng-spot-8vcpu-32gb --cluster eksworkshop-eksctl -o json | jq -r '.[].AutoScalingGroupName') + +aws autoscaling create-or-update-tags --tags \ +ResourceId=$ASG_4VCPU_16GB,ResourceType=auto-scaling-group,Key=k8s.io/cluster-autoscaler/node-template/label/intent,Value=apps,PropagateAtLaunch=true \ +ResourceId=$ASG_4VCPU_16GB,ResourceType=auto-scaling-group,Key=k8s.io/cluster-autoscaler/node-template/taint/spotInstance,Value="true:PreferNoSchedule",PropagateAtLaunch=true + +aws autoscaling create-or-update-tags --tags \ +ResourceId=$ASG_8VCPU_32GB,ResourceType=auto-scaling-group,Key=k8s.io/cluster-autoscaler/node-template/label/intent,Value=apps,PropagateAtLaunch=true \ +ResourceId=$ASG_8VCPU_32GB,ResourceType=auto-scaling-group,Key=k8s.io/cluster-autoscaler/node-template/taint/spotInstance,Value="true:PreferNoSchedule",PropagateAtLaunch=true + +``` + +{{% notice info %}} +If you are wondering at this stage: *Where is spot bidding price ?* you are missing some of the changes EC2 Spot Instances had since 2017. Since November 2017 [EC2 Spot price changes infrequently](https://aws.amazon.com/blogs/compute/new-amazon-ec2-spot-pricing/) based on long term supply and demand of spare capacity in each pool independently. You can still set up a **maxPrice** in scenarios where you want to set maximum budget. By default *maxPrice* is set to the On-Demand price; Regardless of what the *maxPrice* value, Spot Instances will still be charged at the current Spot market price. +{{% /notice %}} + +### Confirm the Nodes + +{{% notice tip %}} +Aside from familiarizing yourself with the kubectl commands below to obtain the cluster information, you should also explore your cluster using **kube-ops-view** and find out the nodes that were just created. +{{% /notice %}} + +Confirm that the new nodes joined the cluster correctly. You should see the nodes added to the cluster. + +``` +kubectl get nodes +``` + +Managed node groups automatically create a label **eks.amazonaws.com/capacityType** to identify which nodes are Spot Instances and which are On-Demand Instances so that we can schedule the appropriate workloads to run on Spot Instances. You can use this node label to identify the lifecycle of the nodes + +``` +kubectl get nodes \ + --label-columns=eks.amazonaws.com/capacityType \ + --selector=eks.amazonaws.com/capacityType=SPOT +``` +The output of this command should return nodes running on Spot Instances. The output of the command shows the **CAPACITYTYPE** for the current nodes is set to **SPOT**. + +``` +NAME STATUS ROLES AGE VERSION CAPACITYTYPE +ip-192-168-101-235.ap-southeast-1.compute.internal Ready 14m v1.21.4-eks-033ce7e SPOT +ip-192-168-130-210.ap-southeast-1.compute.internal Ready 14m v1.21.4-eks-033ce7e SPOT +ip-192-168-176-250.ap-southeast-1.compute.internal Ready 14m v1.21.4-eks-033ce7e SPOT +``` + +Now we will show all nodes running on On Demand Instances. The output of the command shows the **CAPACITYTYPE** for the current nodes is set to **ON_DEMAND**. + +``` +kubectl get nodes \ + --label-columns=eks.amazonaws.com/capacityType \ + --selector=eks.amazonaws.com/capacityType=ON_DEMAND +``` +``` +NAME STATUS ROLES AGE VERSION CAPACITYTYPE +ip-192-168-165-163.ap-southeast-1.compute.internal Ready 51m v1.21.4-eks-033ce7e ON_DEMAND +ip-192-168-99-237.ap-southeast-1.compute.internal Ready 51m v1.21.4-eks-033ce7e ON_DEMAND +``` diff --git a/content/using_ec2_spot_instances_with_eks/deploy/_index.md b/content/using_ec2_spot_instances_with_eks/050_deploy/_index.md similarity index 89% rename from content/using_ec2_spot_instances_with_eks/deploy/_index.md rename to content/using_ec2_spot_instances_with_eks/050_deploy/_index.md index d9069851..32969dc5 100644 --- a/content/using_ec2_spot_instances_with_eks/deploy/_index.md +++ b/content/using_ec2_spot_instances_with_eks/050_deploy/_index.md @@ -1,10 +1,10 @@ --- -title: "Deploying the example Microservice" +title: "Deploy an example Microservice" chapter: true -weight: 60 +weight: 50 --- -# Deploying the example Microservice +# Deploy an example Microservice To illustrate application scaling using [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) (HPA) and cluster scaling using [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) (CA), we will deploy a microservice that generates CPU load. diff --git a/content/using_ec2_spot_instances_with_eks/deploy/deploy.md b/content/using_ec2_spot_instances_with_eks/050_deploy/deploy.md similarity index 89% rename from content/using_ec2_spot_instances_with_eks/deploy/deploy.md rename to content/using_ec2_spot_instances_with_eks/050_deploy/deploy.md index dc4abe0e..870ef3db 100644 --- a/content/using_ec2_spot_instances_with_eks/deploy/deploy.md +++ b/content/using_ec2_spot_instances_with_eks/050_deploy/deploy.md @@ -1,11 +1,8 @@ --- -title: "Deploying the Application" +title: "Deploy Application" date: 2018-08-07T08:30:11-07:00 weight: 30 --- -{{% notice warning %}} -Before proceeding, check that your file `~/environment/monte-carlo-pi-service.yml` looks like: **[monte-carlo-pi-service-final.yml](tolerations_and_affinity.files/monte-carlo-pi-service-final.yml)** -{{% /notice %}} To deploy the application we just need to run: ``` @@ -62,11 +59,4 @@ You can also execute a request with the additional parameter from the console: ``` URL=$(kubectl get svc monte-carlo-pi-service | tail -n 1 | awk '{ print $4 }') time curl ${URL}/?iterations=100000000 -``` - - - - - - - +``` \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/deploy/monte_carlo_pi.md b/content/using_ec2_spot_instances_with_eks/050_deploy/monte_carlo_pi.md similarity index 70% rename from content/using_ec2_spot_instances_with_eks/deploy/monte_carlo_pi.md rename to content/using_ec2_spot_instances_with_eks/050_deploy/monte_carlo_pi.md index 8f351bbf..c4db28ea 100644 --- a/content/using_ec2_spot_instances_with_eks/deploy/monte_carlo_pi.md +++ b/content/using_ec2_spot_instances_with_eks/050_deploy/monte_carlo_pi.md @@ -4,9 +4,9 @@ date: 2018-08-07T08:30:11-07:00 weight: 10 --- -## Monte Carlo Pi +## Monte Carlo Pi Template -We will use this base configuration to deploy our application: +Let's create a template configuration file for monte carlo pi application: ``` cat < ~/environment/monte-carlo-pi-service.yml @@ -60,7 +60,7 @@ EoF ``` -This should create a `monte-carlo-pi-service.yml` file that defines a **Service** and a **Deployment**. The configuration instructs the cluster to deploy two replicas of a pod with a single container, that sets up [Resource request and limits](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) to a fixed value 1vCPU and 512Mi of RAM. You can read more about the differences between Resource requests and limits [here](https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html). +This should create a `monte-carlo-pi-service.yml` template file that defines a **Service** and a **Deployment**. The configuration instructs the cluster to deploy two replicas of a pod with a single container, that sets up [Resource request and limits](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) to a fixed value 1vCPU and 512Mi of RAM. You can read more about the differences between Resource requests and limits [here](https://docs.aws.amazon.com/eks/latest/userguide/platform-versions.html). The deployment does not include any toleration or affinities. If deployed as is, it will be scheduled to the on-demand nodes that we created during the cluster creation phase! @@ -72,13 +72,8 @@ There are a few best practices for managing multi-tenant dynamic clusters. One o Before we deploy our application and start scaling it, there are two requirements that we should apply and implement in the configuration file: - 1.- The first requirement is for the application to be deployed only on nodes that have been labeled with `intent: apps` - - 2.- The second requirement is for the application to prefer Spot Instances over on-demand instances. - - -In the next section we will explore how to implement this requirements. - - + 1. The first requirement is for the application to be deployed only on nodes that have been labeled with `intent: apps` + 2. The second requirement is for the application to prefer Spot Instances over on-demand instances. +In the next section we will explore how to implement this requirements. \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final-self-managed.yml b/content/using_ec2_spot_instances_with_eks/050_deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final-self-managed.yml similarity index 100% rename from content/using_ec2_spot_instances_with_eks/deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final-self-managed.yml rename to content/using_ec2_spot_instances_with_eks/050_deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final-self-managed.yml diff --git a/content/using_ec2_spot_instances_with_eks/deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final.yml b/content/using_ec2_spot_instances_with_eks/050_deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final.yml similarity index 100% rename from content/using_ec2_spot_instances_with_eks/deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final.yml rename to content/using_ec2_spot_instances_with_eks/050_deploy/tolerations_and_affinity.files/monte-carlo-pi-service-final.yml diff --git a/content/using_ec2_spot_instances_with_eks/050_deploy/tolerations_and_affinity.md b/content/using_ec2_spot_instances_with_eks/050_deploy/tolerations_and_affinity.md new file mode 100644 index 00000000..adee45f0 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/050_deploy/tolerations_and_affinity.md @@ -0,0 +1,142 @@ +--- +title: "Tolerations and Affinity" +date: 2018-08-07T08:30:11-07:00 +weight: 20 +--- + +## Add Tolerations + +In the previous chapter [Create EKS managed node groups with Spot capacity]({{< ref "/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/workers_eksctl.md" >}}) we added a taint `spotInstance: "true:PreferNoSchedule"` to both node groups. **PreferNoSchedule** is used to indicate we prefer pods not to be scheduled on Spot Instances. **NoSchedule** can also be used to enforce a hard discrimination as a taint. To overcome this taint, we need to add a toleration in the deployment. Read about how [tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) are applied and modify the **monte-carlo-pi-service.yml** file accordingly. + +{{%expand "Show me a hint for implementing this." %}} +As per the [tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) documentation +the objective is to add the following section to the `monte-carlo-pi-service.yml`. + +The following toleration must be added at the *spec.template.spec* level + +``` + tolerations: + - key: "spotInstance" + operator: "Equal" + value: "true" + effect: "PreferNoSchedule" +``` + +{{% /expand %}} + +## Add Affinities + +Our next task before deployment is to add [affinities](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) to the configuration. + +In the previous chapters we labeled managed node groups with On-Demand capacity with **intent: control-apps** and managed node groups with Spot capacity with **intent: apps**. Additionally EKS adds a label **eks.amazonaws.com/capacityType** and sets its value to **ON_DEMAND** for node groups with On-Demand capacity and **SPOT** for node group with Spot capacity. + +To meet the requirements we defined in previous chapter: *application to be deployed only on nodes that have been labeled with `intent: apps`* and *application to prefer Spot Instances over on-demand Instances*, we need to add two affinity properties: + +- a *requiredDuringSchedulingIgnoredDuringExecution* affinity: also known as "hard" affinity that will limit our deployment to nodes label with **intent: apps** +- a *preferredDuringSchedulingIgnoredDuringExecution* affinity: also known as "soft" affinity that express our preference for nodes of a specific type. In this case Spot Instances labeled with **eks.amazonaws.com/capacityType: SPOT**. + +Read about how affinities can be used to [assign pods to nodes](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) and modify the **monte-carlo-pi-service.yml** file accordingly. + +{{%expand "Show me a hint for implementing this." %}} +As per the [Assign Pods to Nodes](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) documentation the objective is to add the following section to the `monte-carlo-pi-service.yml`. + +The following affinities must be added at the *spec.template.spec* level + +``` + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - SPOT + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: intent + operator: In + values: + - apps +``` + +{{% /expand %}} + + +If you are still struggling with the implementation, then run below command to overwrite `monte-carlo-pi-service.yml` template with the final solution. + +``` +cat < ~/environment/monte-carlo-pi-service.yml +--- +apiVersion: v1 +kind: Service +metadata: + name: monte-carlo-pi-service +spec: + type: LoadBalancer + ports: + - port: 80 + targetPort: 8080 + selector: + app: monte-carlo-pi-service +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: monte-carlo-pi-service + labels: + app: monte-carlo-pi-service +spec: + replicas: 2 + selector: + matchLabels: + app: monte-carlo-pi-service + template: + metadata: + labels: + app: monte-carlo-pi-service + spec: + tolerations: + - key: "spotInstance" + operator: "Equal" + value: "true" + effect: "PreferNoSchedule" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - SPOT + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: intent + operator: In + values: + - apps + containers: + - name: monte-carlo-pi-service + image: ruecarlo/monte-carlo-pi-service + resources: + requests: + memory: "512Mi" + cpu: "1024m" + limits: + memory: "512Mi" + cpu: "1024m" + securityContext: + privileged: false + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + ports: + - containerPort: 8080 + +EoF + +``` \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/scaling/_index.md b/content/using_ec2_spot_instances_with_eks/060_scaling/_index.md similarity index 93% rename from content/using_ec2_spot_instances_with_eks/scaling/_index.md rename to content/using_ec2_spot_instances_with_eks/060_scaling/_index.md index d46e37d9..ec6d82b6 100644 --- a/content/using_ec2_spot_instances_with_eks/scaling/_index.md +++ b/content/using_ec2_spot_instances_with_eks/060_scaling/_index.md @@ -1,7 +1,7 @@ --- -title: "Autoscaling our Applications and Clusters" +title: "Autoscale Cluster and Application" chapter: true -weight: 70 +weight: 60 --- # Implement AutoScaling with HPA and CA diff --git a/content/using_ec2_spot_instances_with_eks/scaling/cleanup.md b/content/using_ec2_spot_instances_with_eks/060_scaling/cleanup.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/scaling/cleanup.md rename to content/using_ec2_spot_instances_with_eks/060_scaling/cleanup.md diff --git a/content/using_ec2_spot_instances_with_eks/scaling/deploy_ca.files/cluster_autoscaler.yml b/content/using_ec2_spot_instances_with_eks/060_scaling/deploy_ca.files/cluster_autoscaler.yml similarity index 94% rename from content/using_ec2_spot_instances_with_eks/scaling/deploy_ca.files/cluster_autoscaler.yml rename to content/using_ec2_spot_instances_with_eks/060_scaling/deploy_ca.files/cluster_autoscaler.yml index 3b9e6fc9..7dc2a33e 100644 --- a/content/using_ec2_spot_instances_with_eks/scaling/deploy_ca.files/cluster_autoscaler.yml +++ b/content/using_ec2_spot_instances_with_eks/060_scaling/deploy_ca.files/cluster_autoscaler.yml @@ -48,7 +48,7 @@ rules: resources: ["storageclasses"] verbs: ["watch","list","get"] - apiGroups: ["storage.k8s.io"] - resources: ["csinodes"] + resources: ["csinodes","csistoragecapacities","csidrivers"] verbs: ["watch","list","get"] - apiGroups: ["batch"] resources: ["jobs"] @@ -132,15 +132,15 @@ spec: nodeSelector: intent: control-apps containers: - - image: k8s.gcr.io/autoscaling/cluster-autoscaler:v1.20.0 + - image: k8s.gcr.io/autoscaling/cluster-autoscaler:v1.21.1 name: cluster-autoscaler resources: limits: - cpu: 100m - memory: 300Mi + cpu: 1024m + memory: 1024Mi requests: - cpu: 100m - memory: 300Mi + cpu: 1024m + memory: 1024Mi command: - ./cluster-autoscaler - --v=4 diff --git a/content/using_ec2_spot_instances_with_eks/scaling/deploy_ca.md b/content/using_ec2_spot_instances_with_eks/060_scaling/deploy_ca.md similarity index 80% rename from content/using_ec2_spot_instances_with_eks/scaling/deploy_ca.md rename to content/using_ec2_spot_instances_with_eks/060_scaling/deploy_ca.md index ea89714e..f0a580ff 100644 --- a/content/using_ec2_spot_instances_with_eks/scaling/deploy_ca.md +++ b/content/using_ec2_spot_instances_with_eks/060_scaling/deploy_ca.md @@ -6,27 +6,27 @@ weight: 10 We will start by deploying [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler). Cluster Autoscaler for AWS provides integration with Auto Scaling groups. It enables users to choose from four different options of deployment: -* One Auto Scaling group +* One Auto Scaling group * Multiple Auto Scaling groups * **Auto-Discovery** - This is what we will use * Master Node setup -In this workshop we will configure Cluster Autoscaler to scale using **[Cluster Autoscaler Auto-Discovery functionality](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md)**. When configured in Auto-Discovery mode on AWS, Cluster Autoscaler will look for Auto Scaling Groups that match a set of pre-set AWS tags. As a convention we use the tags : `k8s.io/cluster-autoscaler/enabled`, and `k8s.io/cluster-autoscaler/eksworkshop-eksctl` . +In this workshop we will configure Cluster Autoscaler to scale using **[Cluster Autoscaler Auto-Discovery functionality](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md)**. When configured in Auto-Discovery mode on AWS, Cluster Autoscaler will look for Auto Scaling groups that match a set of pre-set AWS tags. As a convention we use the tags : `k8s.io/cluster-autoscaler/enabled`, and `k8s.io/cluster-autoscaler/eksworkshop-eksctl` . -This will select the two Auto Scaling groups that have been created for Spot instances. +This will select the two Auto Scaling groups that have been created for Spot Instances. {{% notice note %}} -The **[following link](https://console.aws.amazon.com/ec2/autoscaling/home?#AutoScalingGroups:filter=eksctl-eksworkshop-eksctl-nodegroup-dev;view=details)** Should take you to the -Auto Scaling Group console and select the two spot node-group we have previously created; You should check that +The **[following link](https://console.aws.amazon.com/ec2autoscaling/home?#/details)** should take you to the +Auto Scaling groups console. Type 'eks' in the search bar to see managed node groups we created previously . Verify the tags `k8s.io/cluster-autoscaler/enabled`, and `k8s.io/cluster-autoscaler/eksworkshop-eksctl` are present -in both groups. This has been done automatically by **eksctl** upon creation of the groups. +in all managed node groups. These tags were automatically added by EKS upon the creation of managed node groups. {{% /notice %}} We have provided a manifest file to deploy the CA. Copy the commands below into your Cloud9 Terminal. ``` mkdir -p ~/environment/cluster-autoscaler -curl -o ~/environment/cluster-autoscaler/cluster_autoscaler.yml https://raw.githubusercontent.com/awslabs/ec2-spot-workshops/master/content/using_ec2_spot_instances_with_eks/scaling/deploy_ca.files/cluster_autoscaler.yml +curl -o ~/environment/cluster-autoscaler/cluster_autoscaler.yml https://raw.githubusercontent.com/awslabs/ec2-spot-workshops/master/content/using_ec2_spot_instances_with_eks/060_scaling/deploy_ca.files/cluster_autoscaler.yml sed -i "s/--AWS_REGION--/${AWS_REGION}/g" ~/environment/cluster-autoscaler/cluster_autoscaler.yml ``` diff --git a/content/using_ec2_spot_instances_with_eks/scaling/deploy_hpa.md b/content/using_ec2_spot_instances_with_eks/060_scaling/deploy_hpa.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/scaling/deploy_hpa.md rename to content/using_ec2_spot_instances_with_eks/060_scaling/deploy_hpa.md diff --git a/content/using_ec2_spot_instances_with_eks/scaling/test_ca.md b/content/using_ec2_spot_instances_with_eks/060_scaling/test_ca.md similarity index 54% rename from content/using_ec2_spot_instances_with_eks/scaling/test_ca.md rename to content/using_ec2_spot_instances_with_eks/060_scaling/test_ca.md index a1a98dd4..e5fb079d 100644 --- a/content/using_ec2_spot_instances_with_eks/scaling/test_ca.md +++ b/content/using_ec2_spot_instances_with_eks/060_scaling/test_ca.md @@ -1,5 +1,5 @@ --- -title: "Scale a Cluster with CA" +title: "Scale Cluster with CA" date: 2018-08-07T08:30:11-07:00 weight: 30 --- @@ -37,9 +37,7 @@ kubectl scale deployment/monte-carlo-pi-service --replicas=0 **Question:** Can you predict what would be the result of scaling down to 0 replicas? {{%expand "Show me the answer" %}} -The configuration that we applied to procure our nodegroups states that the minimum number of instances in the auto scaling group is 2 for both Spot managed node groups. - -(Self managed node group) The configuration that we applied to procure our nodegroups states that the minimum number of instances in the auto scaling group is 0 for both nodegroups. Starting from 1.14 version Cluster Autoscaler does support scaling down to 0. +The configuration that we applied to procure our node groups states that the minimum number of instances in the auto scaling group is 0 for both node groups. Starting from `1.14` version Cluster Autoscaler supports scaling down to 0. By setting the number of replicas to 0, Cluster Autoscaler will detect that the current instances are idle and can be removed to the minSize of the Auto Scaling Group. This may take up to 3 minutes. Cluster autoscaler will log lines such as the one below flagging that the instance is unneeded. @@ -47,25 +45,21 @@ By setting the number of replicas to 0, Cluster Autoscaler will detect that the I1120 00:22:37.204988 1 static_autoscaler.go:382] ip-192-168-54-241.eu-west-1.compute.internal is unneeded since 2021-03-20 00:21:16.651612719 +0000 UTC m=+4789.747568996 duration 1m20.552551794s ``` -After some time, you should be able to confirm that running `kubectl get nodes` return only our 6 initial nodes: +After some time, you should be able to confirm that running `kubectl get nodes` return only our 2 initial On-Demand nodes: ``` -$ kubectl get nodes -NAME STATUS ROLES AGE VERSION -ip-192-168-100-154.us-west-2.compute.internal Ready 87m v1.20.4-eks-6b7464 -ip-192-168-186-233.us-west-2.compute.internal Ready 87m v1.20.4-eks-6b7464 -ip-192-168-33-105.us-west-2.compute.internal Ready 69m v1.20.4-eks-6b7464 -ip-192-168-37-117.us-west-2.compute.internal Ready 17m v1.20.4-eks-6b7464 -ip-192-168-73-255.us-west-2.compute.internal Ready 73m v1.20.4-eks-6b7464 -ip-192-168-85-132.us-west-2.compute.internal Ready 69m v1.20.4-eks-6b7464 +$ kubectl get nodes --label-columns=eks.amazonaws.com/capacityType +``` +``` +NAME STATUS ROLES AGE VERSION CAPACITYTYPE +ip-192-168-165-163.ap-southeast-1.compute.internal Ready 4h1m v1.21.4-eks-033ce7e ON_DEMAND +ip-192-168-99-237.ap-southeast-1.compute.internal Ready 4h1m v1.21.4-eks-033ce7e ON_DEMAND ``` {{% notice tip %}} -Check in the AWS console that both auto-scaling groups have now the Desired capacity set to 0. You can **[follow this link](https://console.aws.amazon.com/ec2/autoscaling/home?#AutoScalingGroups:filter=eksctl-eksworkshop-eksctl-nodegroup-dev;view=details)** to get into the Auto Scaling Group AWS console. +Check in the AWS console that Spot auto-scaling groups have now the Desired capacity set to 0. You can **[follow this link](https://console.aws.amazon.com/ec2/autoscaling/home?#AutoScalingGroups:filter=eksctl-eksworkshop-eksctl-nodegroup-dev;view=details)** to get into the Auto Scaling Group AWS console. {{% /notice %}} - - {{% /expand %}} @@ -84,23 +78,23 @@ kubectl get pods --watch ``` NAME READY STATUS RESTARTS AGE -monte-carlo-pi-service-584f6ddff-fk2nj 1/1 Running 0 20m21s +monte-carlo-pi-service-584f6ddff-fk2nj 1/1 Running 0 103s monte-carlo-pi-service-584f6ddff-fs9x6 1/1 Running 0 103s monte-carlo-pi-service-584f6ddff-jst55 1/1 Running 0 103s ``` You should also be able to visualize the scaling action using kube-ops-view. Kube-ops-view provides an option to highlight pods meeting a regular expression. All pods in green are **monte-carlo-pi-service** pods. -![Scaling up to 10 replicas](/images/using_ec2_spot_instances_with_eks/scaling/scaling-kov-10-replicas.png) +![Scaling up to 3 replicas](/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-3-replicas.png) {{% notice info %}} -Given we started from 2 node capacity in both Spot node groups, this should trigger a scaling event for Cluster Autoscaler. Can you predict which size (and type!) of node will be provided? +Given we started from 0 nodes in both Spot node groups, this should trigger a scaling event for Cluster Autoscaler. Can you predict which size (and type!) of node will be provided? {{% /notice %}} #### Challenge Try to answer the following questions: - - Could you predict what should happen if we increase the number of replicas to 23? - - How would you scale up the replicas to 23? + - Could you predict what should happen if we increase the number of replicas to 20? + - How would you scale up the replicas to 20? - If you are expecting a new node, which size will it be: (a) 4vCPU's 16GB RAM or (b) 8vCPU's 32GB RAM? - Which EC2 Instance type you would expect to be selected? - How would you confirm your predictions? @@ -109,18 +103,18 @@ Try to answer the following questions: {{%expand "Show me the answers" %}} To scale up the number of replicas run: ``` -kubectl scale deployment/monte-carlo-pi-service --replicas=23 +kubectl scale deployment/monte-carlo-pi-service --replicas=20 ``` -When the number of replicas scales up, there will be a few pods pending. You can confirm which pods are pending running the command below. +When the number of replicas scales up, there will be pods pending. You can confirm which pods are pending by running the command below. ``` kubectl get pods --watch ``` -Kube-ops-view, will show 3 pending yellow pods outside the node. -![Scale Up](/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-up-kov.png) +Kube-ops-view, will show pending yellow pods outside the node. +![Scale Up](/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-20replicas.png) -When inspecting cluster-autoscaler logs with the command line below +When inspecting cluster-autoscaler logs with the command line below. ``` kubectl logs -f deployment/cluster-autoscaler -n kube-system ``` @@ -135,32 +129,28 @@ kubectl get node --selector=intent=apps --show-labels You can verify in AWS Management Console to confirm that the Auto Scaling groups are scaling up to meet demand. This may take a few minutes. You can also follow along with the pod deployment from the command line. You should see the pods transition from pending to running as nodes are scaled up. -![Scale Up](/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-up.png) +![Scale Up](/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-20.png) {{% notice info %}} -Cluster Autoscaler expands capacity according to the [Expander](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-expanders) configuration. By default, Cluster Autoscaler uses the **random** expander. This means that there is equal probability of cluster autoscaler selecting the 4vCPUs 16GB RAM group or the 8vCPUs 32GB RAM group. You may consider also using other expanders like **least-waste**, or the **priority** expander. +Cluster Autoscaler expands capacity according to the [**Expander**](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-expanders) configuration. By default, Cluster Autoscaler uses the **random** expander. This means that there is equal probability of cluster autoscaler selecting the 4vCPUs 16GB RAM group or the 8vCPUs 32GB RAM group. You may consider also using other expanders like **least-waste**, or the **priority** expander. {{% /notice %}} -As for the node that was selected, by default the Autoscaling Groups that we created with eksctl use [capacity-optimized allocation strategy](https://docs.aws.amazon.com/en_pv/autoscaling/ec2/userguide/asg-purchase-options.html#asg-allocation-strategies), this makes the Auto Scaling Group procure capacity from the pools that has less chances of interruptions. +As for the EC2 Instance type that was selected, by default the Auto Scaling groups that we created with eksctl use [capacity-optimized allocation strategy](https://docs.aws.amazon.com/en_pv/autoscaling/ec2/userguide/asg-purchase-options.html#asg-allocation-strategies), this makes the Auto Scaling group procure capacity from the pools that has less chances of interruptions. {{% /expand %}} -After you've completed the exercise, scale down your replicas back down in preparation for the configuration of Horizontal Pod Autoscheduler. +After you've completed the exercise, scale down your replicas back down in preparation for the configuration of Horizontal Pod Autoscaler. ``` kubectl scale deployment/monte-carlo-pi-service --replicas=3 ``` -{{% notice info %}} -It is a recommended to use **[capacity-optimized](https://aws.amazon.com/blogs/compute/introducing-the-capacity-optimized-allocation-strategy-for-amazon-ec2-spot-instances/)** as an allocation strategy for your mixed instances EC2 Spot nodegroups. Other Strategies like *[Lowest Price](https://docs.aws.amazon.com/autoscaling/ec2/userguide/asg-purchase-options.html)* might be still considered for nodes that just process [Kubernetes retriable Jobs](https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/) -{{% /notice %}} - ### Optional Exercises {{% notice warning %}} Some of this exercises will take time for Cluster Autoscaler to scale up and down. If you are running this workshop at a AWS event or with limited time, we recommend to come back to this section once you have completed the workshop, and before getting into the **cleanup** section. {{% /notice %}} - * What will happen when modifying Cluster Autoscaler **expander** configuration from **random** to **least-waste**. What happens when we increase the replicas back to 23? What happens if we increase the number of replicas to 30? Can you predict which node group will be expanded in each case: (a) 4vCPUs 16GB RAM (b) 8vCPUs 32GB RAM? What's Cluster Autoscaler log looking like in this case? + * What will happen when modifying Cluster Autoscaler **expander** configuration from **random** to **least-waste**. What happens if we increase the number of replicas to 40? Can you predict which node group will be expanded in each case: (a) 4vCPUs 16GB RAM (b) 8vCPUs 32GB RAM? What's Cluster Autoscaler log looking like in this case? * How would you expect Cluster Autoscaler to Scale in the cluster? How about scaling out? How much time you'll expect for it to take? @@ -168,8 +158,4 @@ Some of this exercises will take time for Cluster Autoscaler to scale up and dow * Scheduling in Kubernetes is the process of binding pending pods to nodes, and is performed by a component of Kubernetes called [kube-scheduler](https://kubernetes.io/docs/concepts/scheduling/kube-scheduler/). When running on Spot the cluster is expected to be dynamic; the state is expected to change over time; The original scheduling decision may not be adequate after the state changes. Could you think or research for a project that could help address this? ([hint_1](https://github.com/kubernetes-sigs/descheduler)) [hint_2](https://github.com/pusher/k8s-spot-rescheduler). If so apply the solution and see what is the impact on scale-in operations. - * During the workshop, we did use nodegroups that expand across multiple AZ's; There are scenarios where might create issues. Could you think which scenarios? ([hint](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider/aws#common-notes-and-gotchas)). Could you think of ways of mitigating the risk in those scenarios? ([hint 1](https://github.com/aws-samples/amazon-k8s-node-drainer), [hint 2](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler)) - - * **Managed Spot node groups only:** EC2 Auto Scaling Group automatically sets the node group to use the [capacity-optimized allocation strategy](https://docs.aws.amazon.com/en_pv/autoscaling/ec2/userguide/asg-purchase-options.html#asg-allocation-strategies). - - * **Self managed Spot node groups only:** At the moment EC2 Auto Scaling Group backing up the node group are setup to use the [capacity-optimized allocation strategy](https://docs.aws.amazon.com/en_pv/autoscaling/ec2/userguide/asg-purchase-options.html#asg-allocation-strategies). What do you think is the trade-off when you switch to [lowest price](https://docs.aws.amazon.com/en_pv/autoscaling/ec2/userguide/asg-purchase-options.html#asg-allocation-strategies) allocation strategy? \ No newline at end of file + * During the workshop, we used node groups that expand across multiple AZ's; There are scenarios where might create issues. Could you think which scenarios? ([hint](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider/aws#common-notes-and-gotchas)). Could you think of ways of mitigating the risk in those scenarios? ([hint 1](https://github.com/aws-samples/amazon-k8s-node-drainer), [hint 2](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler)) \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/scaling/test_hpa.md b/content/using_ec2_spot_instances_with_eks/060_scaling/test_hpa.md similarity index 96% rename from content/using_ec2_spot_instances_with_eks/scaling/test_hpa.md rename to content/using_ec2_spot_instances_with_eks/060_scaling/test_hpa.md index bc4e7746..2f604c45 100644 --- a/content/using_ec2_spot_instances_with_eks/scaling/test_hpa.md +++ b/content/using_ec2_spot_instances_with_eks/060_scaling/test_hpa.md @@ -1,5 +1,5 @@ --- -title: "Scale the Application" +title: "Stress test the Application" date: 2018-08-07T08:30:11-07:00 weight: 50 --- @@ -42,7 +42,7 @@ kubectl get svc kube-ops-view | tail -n 1 | awk '{ print "Kube-ops-view URL = ht Run the stress test ! This time around we will run 2000 requests each expected to take ~1.3sec or so. ``` -time ~/environment/submit_mc_pi_k8s_requests/submit_mc_pi_k8s_requests.py -p 100 -r 30 -i 30000000 -u "http://${URL}" +time ~/environment/submit_mc_pi_k8s_requests/submit_mc_pi_k8s_requests.py -p 100 -r 20 -i 30000000 -u "http://${URL}" ``` ### Challenge @@ -78,10 +78,10 @@ monte-carlo-pi-service Deployment/monte-carlo-pi-service 92%/50% 4 monte-carlo-pi-service Deployment/monte-carlo-pi-service 92%/50% 4 100 19 37m monte-carlo-pi-service Deployment/monte-carlo-pi-service 94%/50% 4 100 19 38m monte-carlo-pi-service Deployment/monte-carlo-pi-service 85%/50% 4 100 19 39m -monte-carlo-pi-service Deployment/monte-carlo-pi-service 85%/50% 4 100 29 39m -monte-carlo-pi-service Deployment/monte-carlo-pi-service 54%/50% 4 100 29 40m -monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 4 100 29 41m -monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 4 100 29 45m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 85%/50% 4 100 19 39m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 54%/50% 4 100 19 40m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 4 100 19 41m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 4 100 19 45m monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 4 100 12 46m monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 4 100 12 47m monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 4 100 4 48m diff --git a/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/_index.md b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/_index.md new file mode 100644 index 00000000..059f4e6b --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/_index.md @@ -0,0 +1,12 @@ +--- +title: "(Optional) Add self managed Spot workers" +chapter: true +weight: 70 +draft: false +--- + +# (Optional) Add self managed Spot workers + +In this module, you will learn how to provision self managed node groups with Spot Instances to optimize cost and scale. + +![Title Image](/images/using_ec2_spot_instances_with_eks/spotworkers/eks_self_managed_architecture.png) diff --git a/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/deploy_scale.md b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/deploy_scale.md new file mode 100644 index 00000000..1bd37934 --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/deploy_scale.md @@ -0,0 +1,108 @@ +--- +title: "Test Autoscaling of Cluster and Application" +date: 2018-08-07T08:30:11-07:00 +weight: 50 +--- + +{{% notice info %}} +We added same labels (**intent: apps** and **eks.amazonaws.com/capacityType: SPOT**) and taint (**spotInstance: "true:PreferNoSchedule"**) to EKS managed node groups and Self managed node groups, therefore the same example service can be scaled on both types of node groups. +{{% /notice %}} + +{{% notice note %}} +If you are starting with **self managed Spot workers** chapter directly and planning to run only self managed node groups with Spot Instances, then complete below chapters and proceed to cleanup chapter directly.
+
+[Deploy an example Microservice]({{< relref "/using_ec2_spot_instances_with_eks/050_deploy" >}})
+[Autoscale Cluster and Application]({{< relref "/using_ec2_spot_instances_with_eks/060_scaling" >}}) +{{% /notice %}} + +{{% notice info %}} + +If you are have already completed **EKS managed Spot workers** chapters and still want to explore self managed node groups with Spot Instances, then continue with this chapter. + +{{% /notice %}} + +At this point we have 5 node groups in our cluster: + +* One **EKS managed** node group with **On-Demand** capacity (mng-od-m5large) +* Two **EKS managed** node groups with **Spot** capacity (mng-spot-4vcpu-16gb and mng-spot-8vcpu-32gb) +* Two **self managed** node groups with **Spot** Instances (ng-spot-4vcpu-16gb and ) + + +### Stress test the application + +Let's do a repeat of earlier Stress test with double the number of requests. We will test to see if stressing the same application can trigger autoscaling of both **EKS managed** node groups with Spot capacity and **self managed** node groups with Spot Instances. + +{{% notice note %}} +Before starting the stress test, predict what would be the expected outcome. Use **kube-ops-view** to verify that the changes you were expecting to happen, do in fact happen over time. +{{% /notice %}} +{{%expand "Show me how to get kube-ops-view url" %}} +Execute the following command on Cloud9 terminal +``` +kubectl get svc kube-ops-view | tail -n 1 | awk '{ print "Kube-ops-view URL = http://"$4 }' +``` +{{% /expand %}} + +Run the stress test ! This time around we will run **4000** requests (as we have twice the capacity) each expected to take ~1.3sec or so. +``` +time ~/environment/submit_mc_pi_k8s_requests/submit_mc_pi_k8s_requests.py -p 100 -r 40 -i 30000000 -u "http://${URL}" +``` + +To display the progress of the rule was setup in Horizontal Pod Autoscaler we can run: +``` +kubectl get hpa -w +``` +This should show the current progress and target pods, and refresh a new line every few seconds. +``` +:~/environment $ kubectl get hpa -w +monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 3 100 0 11s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 50%/50% 3 100 6 15s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 100%/50% 3 100 3 75s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 100%/50% 3 100 6 90s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 60%/50% 3 100 6 2m16s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 100%/50% 3 100 6 3m16s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 100%/50% 3 100 12 3m31s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 74%/50% 3 100 21 7m17s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 80%/50% 3 100 21 8m17s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 14%/50% 3 100 21 9m17s +monte-carlo-pi-service Deployment/monte-carlo-pi-service 14%/50% 3 100 21 10m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 14%/50% 3 100 21 11m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 14%/50% 3 100 21 14m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 50%/50% 3 100 6 14m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 33%/50% 3 100 6 18m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 30%/50% 3 100 6 21m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 3 100 6 22m +monte-carlo-pi-service Deployment/monte-carlo-pi-service 0%/50% 3 100 6 23m +``` + + +To display the node or pod you can use +``` +kubectl top nodes +``` +or +``` +kubectl top pods +``` + +{{% notice info %}} +Cluster Autoscaler will use default **random** expander to scale both types of node groups providing they have matching labels and taints. +{{% /notice %}} + +After some time, you should be able to confirm that running `kubectl get nodes` return both self managed and EKS managed Spot nodes: + +``` +kubectl get nodes --label-columns=alpha.eksctl.io/nodegroup-name,eks.amazonaws.com/capacityType,type +``` + +``` +NAME STATUS ROLES AGE VERSION NODEGROUP-NAME CAPACITYTYPE TYPE +ip-192-168-111-213.ap-southeast-1.compute.internal Ready 22h v1.21.4-eks-033ce7e mng-od-m5large ON_DEMAND +ip-192-168-140-47.ap-southeast-1.compute.internal Ready 22h v1.21.4-eks-033ce7e mng-od-m5large ON_DEMAND +ip-192-168-189-229.ap-southeast-1.compute.internal Ready 3h2m v1.21.4-eks-033ce7e mng-spot-4vcpu-16gb SPOT +ip-192-168-34-125.ap-southeast-1.compute.internal Ready 3m3s v1.21.4-eks-033ce7e ng-spot-4vcpu-16gb SPOT self-managed-spot +ip-192-168-6-44.ap-southeast-1.compute.internal Ready 3m8s v1.21.4-eks-033ce7e ng-spot-4vcpu-16gb SPOT self-managed-spot +ip-192-168-64-221.ap-southeast-1.compute.internal Ready 6m v1.21.4-eks-033ce7e ng-spot-4vcpu-16gb SPOT self-managed-spot +``` + +You should also be able to visualize the scaling action using kube-ops-view. Kube-ops-view provides an option to highlight pods meeting a regular expression. All pods in green are **monte-carlo-pi-service** pods. +![Scaling up to 10 replicas](/images/using_ec2_spot_instances_with_eks/scaling/scaling-kov-10-replicas.png) diff --git a/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/deployhandler.md b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/deployhandler.md new file mode 100644 index 00000000..ddf6542d --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/deployhandler.md @@ -0,0 +1,57 @@ +--- +title: "Deploy AWS Node Termination Handler" +date: 2018-08-07T12:32:40-07:00 +weight: 40 +draft: false +--- + +When users requests On-Demand Instances from a pool to the point that the pool is depleted, the system will select a set of Spot Instances from the pool to be terminated. A Spot Instance pool is a set of unused EC2 instances with the same instance type (for example, m5.large), operating system, Availability Zone, and network platform. The Spot Instance is sent an interruption notice two minutes ahead to gracefully wrap up things. + +We will deploy a pod on each Spot Instance to detect the instance termination notification signal so that we can both terminate gracefully any pod that was running on that node, drain from load balancers and redeploy applications elsewhere in the cluster. + +**[AWS Node Termination Handler](https://github.com/aws/aws-node-termination-handler)** + ensures that the Kubernetes control plane responds appropriately to events that can cause your EC2 instance to become unavailable, such as [EC2 maintenance events](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/monitoring-instances-status-check_sched.html), [EC2 Spot interruptions](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-interruptions.html), [ASG Scale-In](https://docs.aws.amazon.com/autoscaling/ec2/userguide/AutoScalingGroupLifecycle.html#as-lifecycle-scale-in), [ASG AZ Rebalance](https://docs.aws.amazon.com/autoscaling/ec2/userguide/auto-scaling-benefits.html#AutoScalingBehavior.InstanceUsage), and EC2 Instance Termination via the API or Console. If not handled, your application code may not stop gracefully, take longer to recover full availability, or accidentally schedule work to nodes that are going down. + +The aws-node-termination-handler (NTH) can operate in two different modes: **Instance Metadata Service (IMDS)** or the **Queue Processor**. + +* The aws-node-termination-handler **Instance Metadata Service Monitor** will run a small pod on each host to perform monitoring of IMDS paths like /spot or /events and react accordingly to drain and/or cordon the corresponding node. +* The aws-node-termination-handler **Queue Processor** will monitor an SQS queue of events from Amazon EventBridge for ASG lifecycle events, EC2 status change events, Spot Interruption Termination Notice events, and Spot Rebalance Recommendation events. When NTH detects an instance is going down, we use the Kubernetes API to cordon the node to ensure no new work is scheduled there, then drain it, removing any existing work. The termination handler Queue Processor requires AWS IAM permissions to monitor and manage the SQS queue and to query the EC2 API. Review below table to decide on which option to use: + +| Syntax | IMDS Processor | Queue Processor | +| ------------ | ----------- | ----------- | +| K8s DaemonSet | ✅ | ❌ | +| K8s Deployment | ❌ | ✅ | +| Spot Instance Interruptions (ITN) | ✅ | ✅ | +| Scheduled Events | ✅ | ✅ | +| EC2 Instance Rebalance Recommendation | ✅ | ✅ | +| ASG Lifecycle Hooks | ❌ | ✅ | +| EC2 Status Changes | ❌ | ✅ | +| Setup Required | ❌ | ✅ | + + +To keep it simple, we will use Helm chart to deploy aws-node-termination-handler in IMDS mode on each Spot Instance as a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/). Within the aws-node-termination-handler in IMDS mode, the workflow can be summarized as: + +* Identify that a Spot Instance is being reclaimed. +* Use the 2-minute notification window to gracefully prepare the node for termination. +* [**Taint**](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) the node and cordon it off to prevent new pods from being placed. +* [**Drain**](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/) connections on the running pods. +* Replace the pods on remaining nodes to maintain the desired capacity. + +By default, **[aws-node-termination-handler](https://github.com/aws/aws-node-termination-handler)** will run on all of your nodes. Let's limit it's scope to only self managed node groups with Spot Instances. + + +``` +helm repo add eks https://aws.github.io/eks-charts +helm install aws-node-termination-handler \ + --namespace kube-system \ + --version 0.15.4 \ + --set nodeSelector.type=self-managed-spot \ + eks/aws-node-termination-handler +``` + +Verify that the pods are running on all nodes: +``` +kubectl get daemonsets --all-namespaces +``` + +Use **kube-ops-view** to confirm *AWS Node Termination Handler* DaemonSet has been deployed to all nodes. \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/spotworkers/preferspot.files/deployment-solution.yml b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/preferspot.files/deployment-solution.yml similarity index 100% rename from content/using_ec2_spot_instances_with_eks/spotworkers/preferspot.files/deployment-solution.yml rename to content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/preferspot.files/deployment-solution.yml diff --git a/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/self_managed_cleanup.md b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/self_managed_cleanup.md new file mode 100644 index 00000000..e369f6bc --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/self_managed_cleanup.md @@ -0,0 +1,20 @@ +--- +title: "Self managed Spot workers cleanup" +date: 2018-08-07T08:30:11-07:00 +weight: 100 +--- + +{{% notice note %}} +If you're running in an account that was created for you as part of an AWS event, there's no need to go through the cleanup stage - the account will be closed automatically.\ +If you're running in your own account, make sure you run through these steps to make sure you don't encounter unwanted costs. +{{% /notice %}} + +## Cleaning up NTH +``` +helm delete aws-node-termination-handler --namespace kube-system +``` + +### Removing self managed node groups +``` +eksctl delete nodegroup -f add-ngs-spot.yaml --approve +``` diff --git a/content/using_ec2_spot_instances_with_eks/spotworkers/spotworkers.files/cloudformation_mixed_workers.yml b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/spotworkers.files/cloudformation_mixed_workers.yml similarity index 100% rename from content/using_ec2_spot_instances_with_eks/spotworkers/spotworkers.files/cloudformation_mixed_workers.yml rename to content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/spotworkers.files/cloudformation_mixed_workers.yml diff --git a/content/using_ec2_spot_instances_with_eks/spotworkers/spotworkers.files/eksctl_mixed_workers_bootstrap.yml b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/spotworkers.files/eksctl_mixed_workers_bootstrap.yml similarity index 100% rename from content/using_ec2_spot_instances_with_eks/spotworkers/spotworkers.files/eksctl_mixed_workers_bootstrap.yml rename to content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/spotworkers.files/eksctl_mixed_workers_bootstrap.yml diff --git a/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/workers_eksctl.md b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/workers_eksctl.md new file mode 100644 index 00000000..8097a52e --- /dev/null +++ b/content/using_ec2_spot_instances_with_eks/070_selfmanagednodegroupswithspot/workers_eksctl.md @@ -0,0 +1,148 @@ +--- +title: "Create self managed node groups with Spot Instances" +date: 2018-08-07T11:05:19-07:00 +weight: 30 +draft: false +--- +{{% notice warning %}} + +If you are starting with **self managed Spot workers** chapter directly and planning to run only self managed node groups with Spot Instances, then first complete below chapters and then return back here:
+
+[Start the workshop]({{< relref "/using_ec2_spot_instances_with_eks/010_prerequisites" >}})
+[Launch using eksctl]({{< relref "/using_ec2_spot_instances_with_eks/020_eksctl" >}})
+[Install Kubernetes Tools]({{< relref "/using_ec2_spot_instances_with_eks/030_k8s_tools" >}})
+[Select Instance Types for Diversification]({{< relref "/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/selecting_instance_types.md" >}}) + +{{% /notice %}} + +{{% notice info %}} + +If you are have already completed **EKS managed Spot workers** chapters and still want to explore self managed node groups with Spot Instances, then continue with this chapter. + +{{% /notice %}} + +In this section we will create self managed node groups with Spot best practices. To adhere to the best practice of instance diversification we will include instance types we identified in [Select Instance Types for Diversification]({{< relref "/using_ec2_spot_instances_with_eks/040_eksmanagednodegroupswithspot/selecting_instance_types.md" >}}) chapter. + + + +Let's first create the configuration file: +``` +cat < ~/environment/add-ngs-spot.yaml +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig +metadata: + name: eksworkshop-eksctl + region: $AWS_REGION +nodeGroups: + - name: ng-spot-4vcpu-16gb + minSize: 0 + maxSize: 4 + desiredCapacity: 2 + instancesDistribution: + instanceTypes: ["m4.xlarge", "m5.xlarge", "m5a.xlarge", "m5ad.xlarge", "m5d.xlarge", "m5dn.xlarge", "m5n.xlarge", "m5zn.xlarge", "m6i.xlarge", "t2.xlarge", "t3.xlarge", "t3a.xlarge"] + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 + spotAllocationStrategy: capacity-optimized + labels: + eks.amazonaws.com/capacityType: SPOT + intent: apps + type: self-managed-spot + taints: + spotInstance: "true:PreferNoSchedule" + tags: + k8s.io/cluster-autoscaler/node-template/label/eks.amazonaws.com/capacityType: SPOT + k8s.io/cluster-autoscaler/node-template/label/intent: apps + k8s.io/cluster-autoscaler/node-template/label/type: self-managed-spot + k8s.io/cluster-autoscaler/node-template/taint/spotInstance: "true:PreferNoSchedule" + iam: + withAddonPolicies: + autoScaler: true + cloudWatch: true + albIngress: true + - name: ng-spot-8vcpu-32gb + minSize: 0 + maxSize: 2 + desiredCapacity: 1 + instancesDistribution: + instanceTypes: ["m4.2xlarge", "m5.2xlarge", "m5a.2xlarge", "m5ad.2xlarge", "m5d.2xlarge", "m5dn.2xlarge", "m5n.2xlarge", "m5zn.2xlarge", "m6i.2xlarge", "t2.2xlarge", "t3.2xlarge", "t3a.2xlarge"] + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 + spotAllocationStrategy: capacity-optimized + labels: + eks.amazonaws.com/capacityType: SPOT + intent: apps + type: self-managed-spot + taints: + spotInstance: "true:PreferNoSchedule" + tags: + k8s.io/cluster-autoscaler/node-template/label/eks.amazonaws.com/capacityType: SPOT + k8s.io/cluster-autoscaler/node-template/label/intent: apps + k8s.io/cluster-autoscaler/node-template/label/type: self-managed-spot + k8s.io/cluster-autoscaler/node-template/taint/spotInstance: "true:PreferNoSchedule" + iam: + withAddonPolicies: + autoScaler: true + cloudWatch: true + albIngress: true +EoF +``` + +This will create a `add-ngs-spot.yaml` file that we will use to instruct eksctl to create two node groups. + +``` +eksctl create nodegroup -f add-ngs-spot.yaml +``` + +{{% notice note %}} +The creation of the workers will take about 3-4 minutes. +{{% /notice %}} + +There are a few things to note in the configuration that we just used to create these nodegroups. + + * First node group has **xlarge** (4 vCPU and 16 GB) instance types with **minSize** 0, **maxSize** 4 and **desiredCapacity** 2. + * Second node group has **2xlarge** (8 vCPU and 32 GB) instance types with **minSize** 0, **maxSize** 2 and **desiredCapacity** 1. + * **onDemandBaseCapacity** and **onDemandPercentageAboveBaseCapacity** both set to **0**. which implies all nodes in the node group would be **Spot instances**. + * **spotAllocationStrategy** is set as **[Capacity Optimized](https://aws.amazon.com/about-aws/whats-new/2019/08/new-capacity-optimized-allocation-strategy-for-provisioning-amazon-ec2-spot-instances/)**. This will ensure the capacity we provision in our node groups is procured from the pools that will have less chances of being interrupted. + * We applied a **[Taint](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)** `spotInstance: "true:PreferNoSchedule"`. **PreferNoSchedule** is used to indicate we prefer pods not be scheduled on Spot Instances. This is a “preference” or “soft” version of **NoSchedule** – the system will try to avoid placing a pod that does not tolerate the taint on the node, but it is not required. + * Notice that the we added 3 node labels per node: + * **eks.amazonaws.com/capacityType: SPOT**, to identify Spot nodes and use as [affinities](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/). + * **intent: apps**, to allow you to deploy stateless applications on nodes that have been labeled with value **apps** + * **type: self-managed-spot**, to identify self managed Spot nodes and use as [nodeSelectors](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector). + + * Notice that the we added 4 cluster autoscaler related tags to node groups. These tags are used by cluster autoscaler when node groups scale down to 0 (and scale up from 0). Cluster autoscaler acts on Auto Scaling groups belonging to node groups, therefore it requires same tags on ASG as well. EKSCTL adds these tags to ASG automatically while creating self managed node groups. + +{{% notice info %}} +If you are wondering at this stage: *Where is spot bidding price ?* you are missing some of the changes EC2 Spot Instances had since 2017. Since November 2017 [EC2 Spot price changes infrequently](https://aws.amazon.com/blogs/compute/new-amazon-ec2-spot-pricing/) based on long term supply and demand of spare capacity in each pool independently. You can still set up a **maxPrice** in scenarios where you want to set maximum budget. By default *maxPrice* is set to the On-Demand price; Regardless of what the *maxPrice* value, Spot Instances will still be charged at the current spot market price. +{{% /notice %}} + +### Confirm the Nodes + +{{% notice tip %}} +Aside from familiarizing yourself with the kubectl commands below to obtain the cluster information, you should also explore your cluster using **kube-ops-view** and find out the nodes that were just created. +{{% /notice %}} + +Confirm that the new nodes joined the cluster correctly. You should see the nodes added to the cluster. + +```bash +kubectl get nodes +``` + +You can use the node-labels to identify the lifecycle of the nodes + +```bash +kubectl get nodes --label-columns=eks.amazonaws.com/capacityType --selector=type=self-managed-spot | grep SPOT +``` + +``` +ip-192-168-11-135.ap-southeast-1.compute.internal NotReady 13s v1.21.4-eks-033ce7e SPOT +ip-192-168-78-145.ap-southeast-1.compute.internal NotReady 5s v1.21.4-eks-033ce7e SPOT +ip-192-168-78-41.ap-southeast-1.compute.internal NotReady 12s v1.21.4-eks-033ce7e SPOT +``` + +You can use the `kubectl describe nodes` with one of the spot nodes to see the taints applied to the EC2 Spot Instances. + +![Spot Taints](/images/using_ec2_spot_instances_with_eks/spotworkers/spot-self-mng-taint.png) + +{{% notice note %}} +Explore your cluster using kube-ops-view and find out the nodes that have just been created. +{{% /notice %}} \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/jenkins/_index.md b/content/using_ec2_spot_instances_with_eks/080_jenkins/_index.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/jenkins/_index.md rename to content/using_ec2_spot_instances_with_eks/080_jenkins/_index.md diff --git a/content/using_ec2_spot_instances_with_eks/jenkins/autoscaling_nodes.md b/content/using_ec2_spot_instances_with_eks/080_jenkins/autoscaling_nodes.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/jenkins/autoscaling_nodes.md rename to content/using_ec2_spot_instances_with_eks/080_jenkins/autoscaling_nodes.md diff --git a/content/using_ec2_spot_instances_with_eks/jenkins/increasing_resilience.md b/content/using_ec2_spot_instances_with_eks/080_jenkins/increasing_resilience.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/jenkins/increasing_resilience.md rename to content/using_ec2_spot_instances_with_eks/080_jenkins/increasing_resilience.md diff --git a/content/using_ec2_spot_instances_with_eks/jenkins/jenkins_cleanup.md b/content/using_ec2_spot_instances_with_eks/080_jenkins/jenkins_cleanup.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/jenkins/jenkins_cleanup.md rename to content/using_ec2_spot_instances_with_eks/080_jenkins/jenkins_cleanup.md diff --git a/content/using_ec2_spot_instances_with_eks/jenkins/running_jobs.md b/content/using_ec2_spot_instances_with_eks/080_jenkins/running_jobs.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/jenkins/running_jobs.md rename to content/using_ec2_spot_instances_with_eks/080_jenkins/running_jobs.md diff --git a/content/using_ec2_spot_instances_with_eks/jenkins/setup_agents.md b/content/using_ec2_spot_instances_with_eks/080_jenkins/setup_agents.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/jenkins/setup_agents.md rename to content/using_ec2_spot_instances_with_eks/080_jenkins/setup_agents.md diff --git a/content/using_ec2_spot_instances_with_eks/jenkins/setup_jenkins.md b/content/using_ec2_spot_instances_with_eks/080_jenkins/setup_jenkins.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/jenkins/setup_jenkins.md rename to content/using_ec2_spot_instances_with_eks/080_jenkins/setup_jenkins.md diff --git a/content/using_ec2_spot_instances_with_eks/cleanup.md b/content/using_ec2_spot_instances_with_eks/200_cleanup.md similarity index 69% rename from content/using_ec2_spot_instances_with_eks/cleanup.md rename to content/using_ec2_spot_instances_with_eks/200_cleanup.md index 71f18ea5..c4da1bb3 100644 --- a/content/using_ec2_spot_instances_with_eks/cleanup.md +++ b/content/using_ec2_spot_instances_with_eks/200_cleanup.md @@ -1,7 +1,7 @@ --- title: "Cleanup" date: 2018-08-07T08:30:11-07:00 -weight: 100 +weight: 200 --- {{% notice note %}} @@ -21,36 +21,19 @@ kubectl delete -f monte-carlo-pi-service.yml helm delete kube-ops-view metrics-server ``` -## Removing Spot node groups -```bash -eksctl delete nodegroup -f spot_nodegroup_4vcpu_16gb.yml --approve -eksctl delete nodegroup -f spot_nodegroup_8vcpu_32gb.yml --approve +## Removing EKS managed node groups ``` - -This operation may take 3-5 minutes to complete. - -## (Optional) Removing Self managed node groups -If you have followed the optional path to create self managed node groups, run this command to delete the node group. -```bash -eksctl delete nodegroup -f spot_nodegroups.yml --approve +eksctl delete nodegroup -f add-mngs-spot.yaml --approve ``` - This operation may take 3-5 minutes to complete. -## Removing On-Demand node group -```bash -od_nodegroup=$(eksctl get nodegroup --cluster eksworkshop-eksctl | tail -n 1 | awk '{print $2}') -eksctl delete nodegroup --cluster eksworkshop-eksctl --name $od_nodegroup -``` - -This operation may take some time. Once that it completes you can proceed with the deletion of the cluster. ## Removing the cluster ``` -eksctl delete cluster --name eksworkshop-eksctl +eksctl delete cluster -f eksworkshop.yaml ``` -## Clean Cloud 9 +## Delete SSH Key Pair and Cloud 9 ``` aws ec2 delete-key-pair --key-name eksworkshop CLOUD_9_IDS=$(aws cloud9 list-environments | jq -c ".environmentIds | flatten(0)" | sed -E -e 's/\[|\]|\"|//g' | sed 's/,/ /g') diff --git a/content/using_ec2_spot_instances_with_eks/conclusion/_index.md b/content/using_ec2_spot_instances_with_eks/300_conclusion/_index.md similarity index 98% rename from content/using_ec2_spot_instances_with_eks/conclusion/_index.md rename to content/using_ec2_spot_instances_with_eks/300_conclusion/_index.md index 9539b489..27f8a349 100644 --- a/content/using_ec2_spot_instances_with_eks/conclusion/_index.md +++ b/content/using_ec2_spot_instances_with_eks/300_conclusion/_index.md @@ -1,7 +1,7 @@ --- title: "Conclusion" chapter: true -weight: 110 +weight: 300 --- # Conclusion diff --git a/content/using_ec2_spot_instances_with_eks/conclusion/conclusion.md b/content/using_ec2_spot_instances_with_eks/300_conclusion/conclusion.md similarity index 100% rename from content/using_ec2_spot_instances_with_eks/conclusion/conclusion.md rename to content/using_ec2_spot_instances_with_eks/300_conclusion/conclusion.md diff --git a/content/using_ec2_spot_instances_with_eks/deploy/tolerations_and_affinity.md b/content/using_ec2_spot_instances_with_eks/deploy/tolerations_and_affinity.md deleted file mode 100644 index a0d1bcdc..00000000 --- a/content/using_ec2_spot_instances_with_eks/deploy/tolerations_and_affinity.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: "Tolerations and Affinity" -date: 2018-08-07T08:30:11-07:00 -weight: 20 ---- - -## Adding Affinities - -Our next task before deployment is to add [affinities](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) to the configuration. - -In the previous section we created nodegroups with the label `intent` and values **control-apps** and **apps**. Managed node groups automatically create a label **eks.amazonaws.com/capacityType** and sets its value to *SPOT* and *ON_DEMAND* respectively to identify which nodes are Spot Instances and which are On-Demand Instances. For us to adhere to the criteria above we will need to add two affinity properties: - -- a *requiredDuringSchedulingIgnoredDuringExecution* affinity: also known as "hard" affinity that will limit our deployment to nodes label with **intent: apps** -- a *preferredDuringSchedulingIgnoredDuringExecution* affinity: also known as "soft" affinity that express our preference for nodes of a specific type. In this case Spot instances labeled with **eks.amazonaws.com/capacityType: SPOT**. - -Read about how affinities can be used to [assign pods to nodes](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) and modify the **monte-carlo-pi-service.yml** file accordingly. - -{{%expand "Show me a hint for implementing this." %}} -As per the [Assign Pods to Nodes](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity) documentation the objective is to add the following section to the `monte-carlo-pi-service.yml`. - -The following affinities must be added at the *spec.template.spec* level - -``` - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 1 - preference: - matchExpressions: - - key: eks.amazonaws.com/capacityType - operator: In - values: - - SPOT - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: intent - operator: In - values: - - apps -``` - -If you are still struggling with the implementation, the solution file is available here : **[monte-carlo-pi-service-final.yml](tolerations_and_affinity.files/monte-carlo-pi-service-final.yml)** - -{{% /expand %}} - -{{%attachments title="Related files" pattern="monte-carlo-pi-service-final.yml"/%}} - -## Adding Tolerations (Self Managed Spot Node Group only) - -When creating self managed node groups, we created two nodegroups that were tainted with -`spotInstance: "true:PreferNoSchedule"`. **PreferNoSchedule** is used to indicate we prefer pods not to be scheduled on Spot Instances. **NoSchedule** can also be used to enforce a hard discrimination as a taint. To overcome the `spotInstance: "true:PreferNoSchedule"` taint, we need to create a toleration in the deployment. Read about how [tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) are applied and modify the **monte-carlo-pi-service.yml** file accordingly. - -{{%expand "Show me a hint for implementing this." %}} -As per the [tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) documentation -the objective is to add the following section to the `monte-carlo-pi-service.yml`. - -The following toleration must be added at the *spec.template.spec* level - -``` - tolerations: - - key: "spotInstance" - operator: "Equal" - value: "true" - effect: "PreferNoSchedule" -``` - -{{% /expand %}} - -{{%attachments title="Related files" pattern="monte-carlo-pi-service-final-self-managed.yml"/%}} - diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/create_eks_cluster_eksctl_command.md b/content/using_ec2_spot_instances_with_eks/eksctl/create_eks_cluster_eksctl_command.md deleted file mode 100644 index edf453d9..00000000 --- a/content/using_ec2_spot_instances_with_eks/eksctl/create_eks_cluster_eksctl_command.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: "Create EKS cluster Command" -chapter: false -disableToc: true -hidden: true ---- - - -```bash -eksctl create cluster \ - --version=1.20 \ - --name=eksworkshop-eksctl \ - --node-private-networking \ - --managed --nodes=2 \ - --alb-ingress-access \ - --region=${AWS_REGION} \ - --node-labels="intent=control-apps" \ - --asg-access -``` diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.md b/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.md deleted file mode 100644 index 6abc101f..00000000 --- a/content/using_ec2_spot_instances_with_eks/eksctl/launcheks.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: "Launch EKS" -date: 2018-08-07T13:34:24-07:00 -weight: 20 ---- - - -{{% notice warning %}} -**DO NOT PROCEED** with this step unless you have [validated the IAM role]({{< relref "../prerequisites/update_workspaceiam.md#validate_iam" >}}) in use by the Cloud9 IDE. You will not be able to run the necessary kubectl commands in the later modules unless the EKS cluster is built using the IAM role. -{{% /notice %}} - -#### Challenge: -**How do I check the IAM role on the workspace?** - -{{%expand "Expand here to see the solution" %}} - -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/prerequisites/validate_workspace_role.md" %}} - -If you do not see the correct role, please go back and **[validate the IAM role]({{< relref "../prerequisites/update_workspaceiam.md" >}})** for troubleshooting. - -If you do see the correct role, proceed to next step to create an EKS cluster. -{{% /expand %}} - - -### Create an EKS cluster - -The following command will create an eks cluster with the name `eksworkshop-eksctl`. It will also create a nodegroup with 2 on-demand instances. - -{{% insert-md-from-file file="using_ec2_spot_instances_with_eks/eksctl/create_eks_cluster_eksctl_command.md" %}} - -eksctl allows us to pass parameters to initialize the cluster. While initializing the cluster, eksctl does also allow us to create nodegroups. - -The managed nodegroup will have two m5.large nodes (m5.large is the default instance type used if no instance types are specified) and it will bootstrap with the label **intent=control-apps**. - -Amazon EKS adds the following Kubernetes label to all nodes in your managed node group: **eks.amazonaws.com/capacityType: ON_DEMAND**. You can use this label to schedule stateful or fault intolerant applications on On-Demand nodes. - -{{% notice info %}} -Launching EKS and all the dependencies will take approximately **15 minutes** -{{% /notice %}} - -The command above, created a **Managed Node Group**. [Amazon EKS managed node groups](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html) automate the provisioning and lifecycle management of nodes. Managed Nodegroups use the latest [EKS-optimized AMIs](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html). The node run in your AWS account provisioned as apart of an EC2 Auto Scaling group that is managed for you by Amazon EKS. This means EKS takes care of the lifecycle management and undifferentiated heavy lifting on operations such as node updates, handling of terminations, gracefully drain of nodes to ensure that your applications stay available. \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/eksctl/test.md b/content/using_ec2_spot_instances_with_eks/eksctl/test.md deleted file mode 100644 index 9d979992..00000000 --- a/content/using_ec2_spot_instances_with_eks/eksctl/test.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "Test the Cluster" -date: 2018-08-07T13:36:57-07:00 -weight: 30 ---- -#### Test the cluster: -Confirm your Nodes, if we see our 2 nodes, we know we have authenticated correctly: - -``` -kubectl get nodes -``` - -Export the Managed Group Worker Role Name for use throughout the workshop. - -{{% notice tip %}} -Some of the optional exercises may require you to add extra IAM policies to the managed group role -for the nodes to get access to services like Cloudwatch, AppMesh, X-Ray. You can always com back to this section or the environment variable `$ROLE_NAME` to refer to the role. -{{% /notice %}} - -``` -NODE_GROUP_NAME=$(eksctl get nodegroup --cluster eksworkshop-eksctl -o json | jq -r '.[].Name') -ROLE_NAME=$(aws eks describe-nodegroup --cluster-name eksworkshop-eksctl --nodegroup-name $NODE_GROUP_NAME | jq -r '.nodegroup["nodeRole"]' | cut -f2 -d/) -echo "export ROLE_NAME=${ROLE_NAME}" >> ~/.bash_profile -``` - - - - -#### Congratulations! - -You now have a fully working Amazon EKS Cluster that is ready to use! - -{{% notice tip %}} -Explore the Elastic Kubernetes Service (EKS) section in the AWS Console and the properties of the newly created EKS cluster. -{{% /notice %}} - -{{% notice warning %}} -You might see **Error loading Namespaces** while exploring the cluster on the AWS Console. It could be because the console user role doesnt have necessary permissions on the EKS cluster's RBAC configuration in the control plane. Please expand and follow the below instructions to add necessary permissions. -{{% /notice %}} - -{{%expand "Click to reveal detailed instructions" %}} - -### Add your IAM role Arn as cluster-admin on RBAC - -Get the ARN for your IAM role, it should look something like - -``` -arn:aws:iam:::role/ -``` - -Edit the ConfigMap **aws-auth** using the below command - -``` -kubectl edit configmap -n kube-system aws-auth -``` - -Add the below snippet at the end, that will add the IAM role to the **masters** group on EKS cluster RBAC, thereby assigning a **cluster-admin** role on the cluster. Please refer the documentation [here](https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html) - -Please make sure to replace the `` and `` with your AWS Account Number and IAM Role Name respectively - -``` - - groups: - - system:masters - rolearn: arn:aws:iam:::role/ - username: -``` - -{{% /expand%}} diff --git a/content/using_ec2_spot_instances_with_eks/helm_root/_index.md b/content/using_ec2_spot_instances_with_eks/helm_root/_index.md deleted file mode 100644 index 347dd274..00000000 --- a/content/using_ec2_spot_instances_with_eks/helm_root/_index.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -title: "Helm" -chapter: true -weight: 30 ---- - -# Kubernetes Helm - -[Helm](https://helm.sh/) is a package manager for Kubernetes that packages multiple Kubernetes resources into a single logical deployment unit called **Chart**. - -Helm is a tool that streamlines installing and managing Kubernetes applications. Think of it like apt/yum/homebrew for Kubernetes. We will use Helm during the workshop to install other components out from the list of available charts. - -Helm helps you to: - -- Achieve a simple (one command) and repeatable deployment -- Manage application dependency, using specific versions of other application and services -- Manage multiple deployment configurations: test, staging, production and others -- Execute post/pre deployment jobs during application deployment -- Update/rollback and test application deployments - -![Helm Logo](/images/using_ec2_spot_instances_with_eks/helm/helm-logo.svg) - diff --git a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/_index.md b/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/_index.md deleted file mode 100644 index 2ee5e249..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/_index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: "Creating Spot managed node groups on EKS" -chapter: true -weight: 40 -draft: false ---- - -# Using Spot Instances with EKS (via Spot managed node groups) - -In this module, you will learn how to provision, manage, and maintain your Kubernetes clusters with Amazon EKS Spot managed node groups on Spot Instances to optimize cost and scale. - -![Title Image](/images/using_ec2_spot_instances_with_eks/spotworkers/eks_spot_managed_architecture.png) diff --git a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/spotlifecycle.md b/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/spotlifecycle.md deleted file mode 100644 index d9011d9d..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/spotlifecycle.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: "Spot configuration and lifecycle" -date: 2021-04-07T12:00:00-00:00 -weight: 40 -draft: false ---- - -### View the Spot Managed Node Group Configuration - -Use the AWS Management Console to inspect the Spot managed node group deployed in your Kubernetes cluster. Select **Elastic Kubernetes Service**, click on **Clusters**, and then on **eksworkshop-eksctl** cluster. Select the **Configuration** tab and **Compute** sub tab. You can see 3 node groups created - one On-Demand node group and two Spot node groups. - -Click on **dev-4vcpu-16gb-spot** group and you can see the instance types set from the create command. - -Click on the Auto Scaling Group name in the **Details** tab. Scroll to the Purchase options and instance types settings. Note how Spot best practices are applied out of the box: - -* **Capacity Optimized** allocation strategy, which will launch Spot Instances from the most-available spare capacity pools. This results in minimizing the Spot Interruptions. -* **Capacity Rebalance** helps EKS managed node groups manage the lifecycle of the Spot Instance by proactively replacing instances that are at higher risk of being interrupted. Node groups use Auto Scaling Group's Capacity Rebalance feature to launch replacement nodes in response to Rebalance Recommendation notice, thus proactively maintaining desired node capacity. - -![Spot Best Practices](/images/using_ec2_spot_instances_with_eks/spotworkers/asg_spot_best_practices.png) - -### Interruption Handling in Spot Managed Node Groups - -To handle Spot interruptions, you do not need to install any extra automation tools on the cluster such as the AWS Node Termination Handler. The managed node group handles Spot interruptions for you in the following way: the underlying EC2 Auto Scaling Group is opted-in to Capacity Rebalancing, which means that when one of the Spot Instances in your node group is at elevated risk of interruption and gets an EC2 instance rebalance recommendation, it will attempt to launch a replacement instance. The more instance types you configure in the managed node group, the more chances EC2 Auto Scaling Group has of launching a replacement Spot Instance. -sw replacement Spot node and waits until it successfully joins the cluster. -* When a replacement Spot node is bootstrapped and in the Ready state on Kubernetes, Amazon EKS cordons and drains the Spot node that received the rebalance recommendation. Cordoning the Spot node ensures that the node is marked as ‘unschedulable’ and kube-scheduler will not schedule any new pods on it. It also removes it from its list of healthy, active Spot nodes. [Draining](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/) the Spot node ensures that running pods are evicted gracefully. -* If a Spot two-minute interruption notice arrives before the replacement Spot node is in a Ready state, Amazon EKS starts draining the Spot node that received the rebalance recommendation. - -This process avoids waiting for new capacity to be available when there is a termination notice, and instead procures capacity in advance, limiting the time that pods might be left pending. - -![Spot Rebalance Recommendation](/images/using_ec2_spot_instances_with_eks/spotworkers/rebalance_recommendation.png) \ No newline at end of file diff --git a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/why_select_managed_node_group.md b/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/why_select_managed_node_group.md deleted file mode 100644 index aafa7d7f..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/why_select_managed_node_group.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: "Advantages of EKS Spot Managed Node Group" -date: 2018-08-07T11:05:19-07:00 -weight: 10 -draft: false ---- - -### Why EKS Managed Node Groups? - -[Amazon EKS managed node groups](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html) automate the provisioning and lifecycle management of nodes (Amazon EC2 instances) for Amazon EKS Kubernetes clusters. This greatly simplifies operational activities such as rolling updates for new AMIs or Kubernetes version deployments. - -Advantages of running Amazon EKS managed node groups: - -* Create, automatically update, or terminate nodes with a single operation using the Amazon EKS console, eksctl, AWS CLI, AWS API, or infrastructure as code tools including AWS CloudFormation. -* Provisioned nodes run using the latest Amazon EKS optimized AMIs. -* Nodes provisioned under managed node group are automatically tagged for auto-discovery by the Kubernetes cluster autoscaler via node labels: **k8s.io/cluster-autoscaler/enabled=true** and **k8s.io/cluster-autoscaler/** -* Node updates and terminations automatically and gracefully drain nodes to ensure that your applications stay available. -* No additional costs to use Amazon EKS managed node groups, pay only for the AWS resources provisioned. - -### Why EKS Spot Managed Node Groups? - -**Amazon EKS Spot managed node groups** enhances the managed node group experience in using EKS managed node groups to easily provision and manage EC2 Spot Instances. EKS managed node group will configure and launch an EC2 Autoscaling group of Spot Instances following Spot best practices and draining Spot worker nodes automatically before the instances are interrupted by AWS. This enables you to take advantage of the steep savings that Spot Instances provide for your interruption tolerant containerized applications. - -In addition to the advantages of managed node groups, Amazon EKS Spot managed node groups have these additional advantages: - -* Allocation strategy to provision Spot capacity is set to **Capacity Optimized** to ensure that Spot nodes are provisioned in the optimal Spot capacity pools. -* Specify **multiple instance types** during EKS Spot managed Node Group creation, to increase the number of Spot capacity pools available for allocating capacity. -* Nodes provisioned under Spot managed node group are automatically tagged with capacity type: **eks.amazonaws.com/capacityType: SPOT**. You can use this label to schedule fault tolerant applications on Spot nodes. -* Amazon EC2 Spot **Capacity Rebalancing** enabled to ensure Amazon EKS can gracefully drain and rebalance your Spot nodes to minimize application disruption when a Spot node is at elevated risk of interruption. diff --git a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.files/spot_nodegroup_4vcpu_16gb.yml b/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.files/spot_nodegroup_4vcpu_16gb.yml deleted file mode 100644 index a3ce9be6..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.files/spot_nodegroup_4vcpu_16gb.yml +++ /dev/null @@ -1,58 +0,0 @@ -apiVersion: eksctl.io/v1alpha5 -kind: ClusterConfig -managedNodeGroups: -- amiFamily: AmazonLinux2 - desiredCapacity: 2 - disableIMDSv1: false - disablePodIMDS: false - iam: - withAddonPolicies: - albIngress: false - appMesh: false - appMeshPreview: false - autoScaler: false - certManager: false - cloudWatch: false - ebs: false - efs: false - externalDNS: false - fsx: false - imageBuilder: false - xRay: false - instanceTypes: - - m4.xlarge - - m5.xlarge - - m5a.xlarge - - m5ad.xlarge - - m5d.xlarge - - t2.xlarge - - t3.xlarge - - t3a.xlarge - labels: - alpha.eksctl.io/cluster-name: eksworkshop-eksctl - alpha.eksctl.io/nodegroup-name: dev-4vcpu-16gb-spot - intent: apps - maxSize: 5 - minSize: 1 - name: dev-4vcpu-16gb-spot - privateNetworking: false - securityGroups: - withLocal: null - withShared: null - spot: true - ssh: - allow: false - enableSsm: false - publicKeyPath: "" - tags: - alpha.eksctl.io/nodegroup-name: dev-4vcpu-16gb-spot - alpha.eksctl.io/nodegroup-type: managed - k8s.io/cluster-autoscaler/node-template/label/intent: apps - volumeIOPS: 3000 - volumeSize: 80 - volumeThroughput: 125 - volumeType: gp3 -metadata: - name: eksworkshop-eksctl - region: $AWS_REGION - version: auto diff --git a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.files/spot_nodegroup_8vcpu_32gb.yml b/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.files/spot_nodegroup_8vcpu_32gb.yml deleted file mode 100644 index b5f43a1b..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.files/spot_nodegroup_8vcpu_32gb.yml +++ /dev/null @@ -1,58 +0,0 @@ -apiVersion: eksctl.io/v1alpha5 -kind: ClusterConfig -managedNodeGroups: -- amiFamily: AmazonLinux2 - desiredCapacity: 2 - disableIMDSv1: false - disablePodIMDS: false - iam: - withAddonPolicies: - albIngress: false - appMesh: false - appMeshPreview: false - autoScaler: false - certManager: false - cloudWatch: false - ebs: false - efs: false - externalDNS: false - fsx: false - imageBuilder: false - xRay: false - instanceTypes: - - m4.2xlarge - - m5.2xlarge - - m5a.2xlarge - - m5ad.2xlarge - - m5d.2xlarge - - t2.2xlarge - - t3.2xlarge - - t3a.2xlarge - labels: - alpha.eksctl.io/cluster-name: eksworkshop-eksctl - alpha.eksctl.io/nodegroup-name: dev-8vcpu-32gb-spot - intent: apps - maxSize: 5 - minSize: 1 - name: dev-8vcpu-32gb-spot - privateNetworking: false - securityGroups: - withLocal: null - withShared: null - spot: true - ssh: - allow: false - enableSsm: false - publicKeyPath: "" - tags: - alpha.eksctl.io/nodegroup-name: dev-8vcpu-32gb-spot - alpha.eksctl.io/nodegroup-type: managed - k8s.io/cluster-autoscaler/node-template/label/intent: apps - volumeIOPS: 3000 - volumeSize: 80 - volumeThroughput: 125 - volumeType: gp3 -metadata: - name: eksworkshop-eksctl - region: $AWS_REGION - version: auto diff --git a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.md b/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.md deleted file mode 100644 index 00e1a903..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotmanagednodegroups/workers_eksctl.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: "Adding Spot Workers with eksctl" -date: 2018-08-07T11:05:19-07:00 -weight: 30 -draft: false ---- - -In this section we will deploy the instance types we selected and request nodegroups that adhere to Spot diversification best practices. For that we will use **[eksctl create nodegroup](https://eksctl.io/usage/managing-nodegroups/)** to generate the ClusterConfig file and save the output. The ClusterConfig file will be edited with additional configuration and use it to add the new nodes to the cluster. - -Let's first create the configuration file: - -```bash -eksctl create nodegroup \ - --cluster=eksworkshop-eksctl \ - --region=$AWS_REGION \ - --dry-run \ - --managed \ - --spot \ - --name=dev-4vcpu-16gb-spot \ - --nodes=2 \ - --nodes-min=1 \ - --nodes-max=5 \ - --node-labels="intent=apps" \ - --tags="k8s.io/cluster-autoscaler/node-template/label/intent=apps" \ - --instance-types m4.xlarge,m5.xlarge,m5a.xlarge,m5ad.xlarge,m5d.xlarge,t2.xlarge,t3.xlarge,t3a.xlarge \ - --asg-access \ - > ~/environment/spot_nodegroup_4vcpu_16gb.yml - -eksctl create nodegroup \ - --cluster=eksworkshop-eksctl \ - --region=$AWS_REGION \ - --dry-run \ - --managed \ - --spot \ - --name=dev-8vcpu-32gb-spot \ - --nodes=2 \ - --nodes-min=1 \ - --nodes-max=5 \ - --node-labels="intent=apps" \ - --tags="k8s.io/cluster-autoscaler/node-template/label/intent=apps" \ - --instance-types m4.2xlarge,m5.2xlarge,m5a.2xlarge,m5ad.2xlarge,m5d.2xlarge,t2.2xlarge,t3.2xlarge,t3a.2xlarge \ - --asg-access \ - > ~/environment/spot_nodegroup_8vcpu_32gb.yml -``` - -This will create 2 files, `spot_nodegroups_4vcpu_16gb.yml` and `spot_nodegroups_8vcpu_32gb.yml`, that we will use to instruct eksctl to create two nodegroups, both with a diversified configuration. - -Let's edit the 2 configuration files before using it to create the node groups: - -1. Remove the section **instanceSelector:**. - -If you are still struggling with the implementation, the solution files are available here - -Note: Remember to change the region *$AWS_REGION* to *your region* before creating the node groups. - -{{%attachments title="Related files" pattern=".yml"/%}} - -Create the two node groups: - -```bash -eksctl create nodegroup -f spot_nodegroup_4vcpu_16gb.yml -``` - -```bash -eksctl create nodegroup -f spot_nodegroup_8vcpu_32gb.yml -``` - -{{% notice note %}} -The creation of each node group will take about 3 minutes. -{{% /notice %}} - -There are a few things to note in the configuration that we just used to create these nodegroups. - - * The configuration setup the nodes under the **managedNodeGroups** section. This is to indicate the node group being created is a managed node group. - * Notice that the configuration setup a **minSize** to 1, **maxSize** to 5 and **desiredCapacity** to 2. Spot managed nodegroups are created with 2 nodes, but minimum number of instances for managed node group is 1 instance. - * The configuration setup a configuration **spot: true** to indicate that the node group being created is a Spot managed node group, which implies all nodes in the nodegroup would be **Spot Instances**. - * We did also add an extra label **intent: apps**. We will use this label to force a hard partition of the cluster for our applications. During this workshop we will deploy control applications on nodes that have been labeled with **intent: control-apps** while our applications get deployed to nodes labeled with **intent: apps**. - * Notice that the configuration setup 2 node label under **labels** - **alpha.eksctl.io/cluster-name: : eksworkshop-eksctl** to indicate the node label belongs to **eksworkshop-eksctl** cluster, and **alpha.eksctl.io/nodegroup-name: dev-4vcpu-16gb-spot** node group. - -{{% notice info %}} -If you are wondering at this stage: *Where is spot bidding price ?* you are missing some of the changes EC2 Spot Instances had since 2017. Since November 2017 [EC2 Spot price changes infrequently](https://aws.amazon.com/blogs/compute/new-amazon-ec2-spot-pricing/) based on long term supply and demand of spare capacity in each pool independently. You can still set up a **maxPrice** in scenarios where you want to set maximum budget. By default *maxPrice* is set to the On-Demand price; Regardless of what the *maxPrice* value, Spot Instances will still be charged at the current spot market price. -{{% /notice %}} - -### Confirm the Nodes - -{{% notice tip %}} -Aside from familiarizing yourself with the kubectl commands below to obtain the cluster information, you should also explore your cluster using **kube-ops-view** and find out the nodes that were just created. -{{% /notice %}} - -Confirm that the new nodes joined the cluster correctly. You should see the nodes added to the cluster. - -```bash -kubectl get nodes -``` - -Managed node groups automatically create a label **eks.amazonaws.com/capacityType** to identify which nodes are Spot Instances and which are On-Demand Instances so that we can schedule the appropriate workloads to run on Spot Instances. You can use this node label to identify the lifecycle of the nodes - -```bash -kubectl get nodes \ - --label-columns=eks.amazonaws.com/capacityType \ - --selector=eks.amazonaws.com/capacityType=SPOT -``` - -The output of this command should return nodes running on Spot Instances. The output of the command shows the CAPACITYTYPE for the current nodes is set to SPOT. - -![Spot Output](/images/using_ec2_spot_instances_with_eks/spotworkers/spot_get_spot.png) - -Now we will show all nodes running on On Demand Instances. The output of the command shows the CAPACITYTYPE for the current nodes is set to ON_DEMAND. - -```bash -kubectl get nodes \ - --label-columns=eks.amazonaws.com/capacityType \ - --selector=eks.amazonaws.com/capacityType=ON_DEMAND -``` -![OnDemand Output](/images/using_ec2_spot_instances_with_eks/spotworkers/spot_get_od.png) - -{{% notice note %}} -Explore your cluster using kube-ops-view and find out the nodes that have just been created. -{{% /notice %}} - diff --git a/content/using_ec2_spot_instances_with_eks/spotworkers/_index.md b/content/using_ec2_spot_instances_with_eks/spotworkers/_index.md deleted file mode 100644 index 2f931519..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotworkers/_index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: "(Optional) Creating Spot self managed nodegroups on EKS" -chapter: true -weight: 50 -draft: false ---- - -# (Optional) Using Spot Instances with EKS self managed node groups - -In this module, you will learn how to provision, manage, and maintain your Kubernetes clusters with Amazon EKS self managed node groups at any scale on Spot Instances to optimize cost and scale. - -![Title Image](/images/using_ec2_spot_instances_with_eks/spotworkers/eks_self_managed_architecture.png) diff --git a/content/using_ec2_spot_instances_with_eks/spotworkers/deployhandler.md b/content/using_ec2_spot_instances_with_eks/spotworkers/deployhandler.md deleted file mode 100644 index ce1f34a0..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotworkers/deployhandler.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: "Deploy The Node Termination Handler" -date: 2018-08-07T12:32:40-07:00 -weight: 40 -draft: false ---- - -When users requests On-Demand instances from a pool to the point that the pool is depleted, the system will select a set of Spot Instances from the pool to be terminated. A Spot Instance pool is a set of unused EC2 instances with the same instance type (for example, m5.large), operating system, Availability Zone, and network platform. The Spot Instance is sent an interruption notice two minutes ahead to gracefully wrap up things. - -We will deploy a pod on each Spot Instance to detect the instance termination notification signal so that we can both terminate gracefully any pod that was running on that node, drain from load balancers and redeploy applications elsewhere in the cluster. - -AWS Node Termination Handler does far more than just capture EC2 Spot Instance notification for terminations. There are other events such as [Scheduled Maintenance Events](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/monitoring-instances-status-check_sched.html) that are taken into consideration. AWS Node Termination handler does also offer a Webhook that can be used to integrate with other applications to monitor and instrument this events. You can find more information about **[AWS Node Termination Handler following this link](https://github.com/aws/aws-node-termination-handler)** - -The Helm chart we will use to deploy AWS Node Termination Handler on each Spot Instance uses a [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/). This will monitor the EC2 meta-data service on each of the EC2 Spot Instances to capture EC2 interruption notices. - -Within the Node Termination Handler DaemonSet, the workflow can be summarized as: - -* Identify that a Spot Instance is being reclaimed. -* Use the 2-minute notification window to gracefully prepare the node for termination. -* [**Taint**](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) the node and cordon it off to prevent new pods from being placed. -* [**Drain**](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/) connections on the running pods. -* Replace the pods on remaining nodes to maintain the desired capacity. - -By default, **[aws-node-termination-handler](https://github.com/aws/aws-node-termination-handler)** will run on all of your nodes (on-demand and spot). -This also is our recommendation. Remember the termination handler does also handle maintenance events that can impact OnDemand instances! - - -``` -helm repo add eks https://aws.github.io/eks-charts -helm install aws-node-termination-handler \ - --namespace kube-system \ - --version 0.12.0 \ - eks/aws-node-termination-handler -``` - -Verify that the pods are running on all nodes: -``` -kubectl get daemonsets --all-namespaces -``` - -Use **kube-ops-view** to confirm *AWS Node Termination Handler* DaemonSet has been deployed to all nodes. - - diff --git a/content/using_ec2_spot_instances_with_eks/spotworkers/selecting_instance_types.md b/content/using_ec2_spot_instances_with_eks/spotworkers/selecting_instance_types.md deleted file mode 100644 index 430c2da5..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotworkers/selecting_instance_types.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: "Selecting Instance Types" -date: 2018-08-07T11:05:19-07:00 -weight: 10 -draft: false ---- -### Selecting instances for Spot Instance Diversification - -[See "Selecting Instance Types" under Spot managed node groups]({{< relref "/using_ec2_spot_instances_with_eks/spotmanagednodegroups/selecting_instance_types.md" >}}) - -Once you finish selecting instance types, proceed to "Adding Spot Workers with eksctl" section to continue creating Self Managed Spot node groups. diff --git a/content/using_ec2_spot_instances_with_eks/spotworkers/workers_eksctl.md b/content/using_ec2_spot_instances_with_eks/spotworkers/workers_eksctl.md deleted file mode 100644 index 817947e8..00000000 --- a/content/using_ec2_spot_instances_with_eks/spotworkers/workers_eksctl.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: "Adding Spot Workers with eksctl" -date: 2018-08-07T11:05:19-07:00 -weight: 30 -draft: false ---- - -In this section we will deploy the instance types we selected and request nodegroups that adhere to Spot diversification best practices. For that we will use **[eksctl create nodegroup](https://eksctl.io/usage/managing-nodegroups/)** and eksctl configuration files to add the new nodes to the cluster. - -Let's first create the configuration file: -``` -cat < ~/environment/spot_nodegroups.yml -apiVersion: eksctl.io/v1alpha5 -kind: ClusterConfig -metadata: - name: eksworkshop-eksctl - region: $AWS_REGION -nodeGroups: - - name: dev-4vcpu-16gb-spot - minSize: 0 - maxSize: 5 - desiredCapacity: 1 - instancesDistribution: - instanceTypes: ["m5.xlarge", "m5d.xlarge", "m4.xlarge","t3.xlarge","t3a.xlarge","m5a.xlarge","t2.xlarge"] - onDemandBaseCapacity: 0 - onDemandPercentageAboveBaseCapacity: 0 - spotAllocationStrategy: capacity-optimized - labels: - lifecycle: Ec2Spot - intent: apps - aws.amazon.com/spot: "true" - taints: - spotInstance: "true:PreferNoSchedule" - tags: - k8s.io/cluster-autoscaler/node-template/label/lifecycle: Ec2Spot - k8s.io/cluster-autoscaler/node-template/label/intent: apps - k8s.io/cluster-autoscaler/node-template/label/aws.amazon.com/spot: "true" - k8s.io/cluster-autoscaler/node-template/taint/spotInstance: "true:PreferNoSchedule" - iam: - withAddonPolicies: - autoScaler: true - cloudWatch: true - albIngress: true - - name: dev-8vcpu-32gb-spot - minSize: 0 - maxSize: 5 - desiredCapacity: 1 - instancesDistribution: - instanceTypes: ["m5.2xlarge", "m5d.2xlarge", "m4.2xlarge","t3.2xlarge","t3a.2xlarge","m5a.2xlarge","t2.2xlarge"] - onDemandBaseCapacity: 0 - onDemandPercentageAboveBaseCapacity: 0 - spotAllocationStrategy: capacity-optimized - labels: - lifecycle: Ec2Spot - intent: apps - aws.amazon.com/spot: "true" - taints: - spotInstance: "true:PreferNoSchedule" - tags: - k8s.io/cluster-autoscaler/node-template/label/lifecycle: Ec2Spot - k8s.io/cluster-autoscaler/node-template/label/intent: apps - k8s.io/cluster-autoscaler/node-template/label/aws.amazon.com/spot: "true" - k8s.io/cluster-autoscaler/node-template/taint/spotInstance: "true:PreferNoSchedule" - iam: - withAddonPolicies: - autoScaler: true - cloudWatch: true - albIngress: true -EoF -``` - -This will create a `spot_nodegroups.yml` file that we will use to instruct eksctl to create two nodegroups, both with a diversified configuration. - -``` -eksctl create nodegroup -f spot_nodegroups.yml -``` - -{{% notice note %}} -The creation of the workers will take about 3 minutes. -{{% /notice %}} - -There are a few things to note in the configuration that we just used to create these nodegroups. - - * We did set up **minSize** to 0, **maxSize** to 5 and **desiredCapacity** to 1. Nodegroups can be scaled down to 0. - * We did set up **onDemandBaseCapacity** and **onDemandPercentageAboveBaseCapacity** both to **0**. which implies all nodes in the nodegroup would be **Spot instances**. - * We did set up a **lifecycle: Ec2Spot** label so we can identify Spot nodes and use [affinities](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) and [nodeSelectors](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) later on. - * We did specify **spotAllocationStrategy** pointing it to use **[Capacity Optimized](https://aws.amazon.com/about-aws/whats-new/2019/08/new-capacity-optimized-allocation-strategy-for-provisioning-amazon-ec2-spot-instances/)**. This will ensure the capacity we provision in our nodegroups is procured from the pools that will have less chances of being interrupted. - * We did also add an extra label **intent: apps**. We will use this label to force a hard partition - of the cluster for our applications. During this workshop we will deploy control applications on - nodes that have been labeled with **intent: control-apps** while our applications get deployed to nodes labeled with **intent: apps**. - * We are also applying a **[Taint](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)** using `spotInstance: "true:PreferNoSchedule"`. **PreferNoSchedule** is used to indicate we prefer pods not be scheduled on Spot Instances. This is a “preference” or “soft” version of **NoSchedule** – the system will try to avoid placing a pod that does not tolerate the taint on the node, but it is not required. - * We did apply **k8s.io/cluster-autoscaler/node-template/label** and **k8s.io/cluster-autoscaler/node-template/taint** tags to the nodegroups. [This tags are used by cluster autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-scale-a-node-group-to-0) when nodegroups scale down to 0. They ensure cluster autoscaler considers tolerations and placement preferences of pending pods on nodegroups of 0 size. - -{{% notice info %}} -If you are wondering at this stage: *Where is spot bidding price ?* you are missing some of the changes EC2 Spot Instances had since 2017. Since November 2017 [EC2 Spot price changes infrequently](https://aws.amazon.com/blogs/compute/new-amazon-ec2-spot-pricing/) based on long term supply and demand of spare capacity in each pool independently. You can still set up a **maxPrice** in scenarios where you want to set maximum budget. By default *maxPrice* is set to the On-Demand price; Regardless of what the *maxPrice* value, Spot Instances will still be charged at the current spot market price. -{{% /notice %}} - -### Confirm the Nodes - -{{% notice tip %}} -Aside from familiarizing yourself with the kubectl commands below to obtain the cluster information, you should also explore your cluster using **kube-ops-view** and find out the nodes that were just created. -{{% /notice %}} - -Confirm that the new nodes joined the cluster correctly. You should see the nodes added to the cluster. - -```bash -kubectl get nodes -``` - -You can use the node-labels to identify the lifecycle of the nodes - -```bash -kubectl get nodes --show-labels --selector=lifecycle=Ec2Spot | grep Ec2Spot -``` - -The output of this command should return **Ec2Spot** nodes. At the end of the node output, you should see the node label **lifecycle=Ec2Spot** - -![Spot Output](/images/using_ec2_spot_instances_with_eks/spotworkers/spot_get_spot.png) - -Now we will show all nodes with the **lifecycle=OnDemand**. The output of this command should return OnDemand nodes (the ones that we tagged when -creating the cluster). - -```bash -kubectl get nodes --show-labels --selector=lifecycle=OnDemand | grep OnDemand -``` - -![OnDemand Output](/images/using_ec2_spot_instances_with_eks/spotworkers/spot_get_od.png) - -You can use the `kubectl describe nodes` with one of the spot nodes to see the taints applied to the EC2 Spot Instances. - -![Spot Taints](/images/using_ec2_spot_instances_with_eks/spotworkers/instance_taints.png) - -{{% notice note %}} -Explore your cluster using kube-ops-view and find out the nodes that have just been created. -{{% /notice %}} diff --git a/static/images/running-emr-spark-apps-on-spot/emrinstancefleetsnetwork.png b/static/images/running-emr-spark-apps-on-spot/emrinstancefleetsnetwork.png index 375bd312..36df46f2 100644 Binary files a/static/images/running-emr-spark-apps-on-spot/emrinstancefleetsnetwork.png and b/static/images/running-emr-spark-apps-on-spot/emrinstancefleetsnetwork.png differ diff --git a/static/images/using_ec2_spot_instances_with_eks/prerequisites/cloud9-role.png b/static/images/using_ec2_spot_instances_with_eks/prerequisites/cloud9-role.png new file mode 100644 index 00000000..611033a5 Binary files /dev/null and b/static/images/using_ec2_spot_instances_with_eks/prerequisites/cloud9-role.png differ diff --git a/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-20.png b/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-20.png new file mode 100644 index 00000000..bd1d9f46 Binary files /dev/null and b/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-20.png differ diff --git a/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-up.png b/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-up.png deleted file mode 100644 index abdcca2a..00000000 Binary files a/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-asg-up.png and /dev/null differ diff --git a/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-20replicas.png b/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-20replicas.png new file mode 100644 index 00000000..0eaeb00e Binary files /dev/null and b/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-20replicas.png differ diff --git a/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-3-replicas.png b/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-3-replicas.png new file mode 100644 index 00000000..4da909d6 Binary files /dev/null and b/static/images/using_ec2_spot_instances_with_eks/scaling/scaling-to-3-replicas.png differ diff --git a/static/images/using_ec2_spot_instances_with_eks/spotworkers/Screenshot 2021-10-21 at 11.09.29 AM.png b/static/images/using_ec2_spot_instances_with_eks/spotworkers/Screenshot 2021-10-21 at 11.09.29 AM.png new file mode 100644 index 00000000..0cff3107 Binary files /dev/null and b/static/images/using_ec2_spot_instances_with_eks/spotworkers/Screenshot 2021-10-21 at 11.09.29 AM.png differ diff --git a/static/images/using_ec2_spot_instances_with_eks/spotworkers/asg_spot_best_practices.png b/static/images/using_ec2_spot_instances_with_eks/spotworkers/asg_spot_best_practices.png index 3226bab5..0cff3107 100644 Binary files a/static/images/using_ec2_spot_instances_with_eks/spotworkers/asg_spot_best_practices.png and b/static/images/using_ec2_spot_instances_with_eks/spotworkers/asg_spot_best_practices.png differ diff --git a/static/images/using_ec2_spot_instances_with_eks/spotworkers/spot-self-mng-taint.png b/static/images/using_ec2_spot_instances_with_eks/spotworkers/spot-self-mng-taint.png new file mode 100644 index 00000000..c55bc545 Binary files /dev/null and b/static/images/using_ec2_spot_instances_with_eks/spotworkers/spot-self-mng-taint.png differ