From d6747a49e6987e936ab25bc04109d32ae7ce248a Mon Sep 17 00:00:00 2001 From: Mike Nomitch Date: Thu, 23 Jun 2022 16:58:54 -0700 Subject: [PATCH] Initial commit --- .gitignore | 5 + README.md | 11 + aws_cluster.tf | 384 ++++++++++++++++++ provider.tf | 3 + security.tf | 135 ++++++ templates/consul_template.sh.tpl | 14 + templates/docker.sh.tpl | 9 + templates/nomad.sh.tpl | 116 ++++++ .../services/consul_template.service.tpl | 15 + templates/services/nomad.service.tpl | 21 + templates/startup.sh.tpl | 39 ++ variables.tf | 156 +++++++ 12 files changed, 908 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 aws_cluster.tf create mode 100644 provider.tf create mode 100644 security.tf create mode 100644 templates/consul_template.sh.tpl create mode 100644 templates/docker.sh.tpl create mode 100644 templates/nomad.sh.tpl create mode 100644 templates/services/consul_template.service.tpl create mode 100644 templates/services/nomad.service.tpl create mode 100644 templates/startup.sh.tpl create mode 100644 variables.tf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0432583 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.tool-versions +.terraform +.terraform.lock.hcl +terraform.tfstate +terraform.tfstate.backup diff --git a/README.md b/README.md new file mode 100644 index 0000000..4581146 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# Quick Nomad Stack (AWS) + +This is terraform code for setting up Nomad on AWS. + +This sets up Nomad servers, a Nomad client autoscaling group, load balancers +for servers and client workloads, and subnets + security groups. + +### Dependencies + +- Terraform 12+ +- AWS Account Key and Secret diff --git a/aws_cluster.tf b/aws_cluster.tf new file mode 100644 index 0000000..76969df --- /dev/null +++ b/aws_cluster.tf @@ -0,0 +1,384 @@ +locals { + // general config values + + base_config_values = { + use_docker = var.use_docker + datacenter = var.region + region = var.region + authoritative_region = var.authoritative_region + replication_token = var.replication_token + retry_provider = var.retry_join.provider + retry_tag_key = var.retry_join.tag_key + retry_tag_value = "${var.retry_join.tag_value_prefix}-${var.cluster_name}" + rpc_port = var.rpc_port + } + + nomad_base_config = merge(local.base_config_values, { + desired_servers = var.desired_servers + nomad_version = var.nomad_version + nomad_service_config = local.nomad_service_config + }) + + common_tags = { + Use = var.common_tag + } + + // serivce config files + + nomad_service_config = templatefile( + "${path.module}/templates/services/nomad.service.tpl", + {} + ) + + consul_template_service_config = templatefile( + "${path.module}/templates/services/consul_template.service.tpl", + {} + ) + + // serivce setup files + + docker_config = templatefile( + "${path.module}/templates/docker.sh.tpl", + {} + ) + + consul_template_config = templatefile( + "${path.module}/templates/consul_template.sh.tpl", + { consul_template_service_config = local.consul_template_service_config } + ) + + nomad_server_config = templatefile( + "${path.module}/templates/nomad.sh.tpl", + merge(local.nomad_base_config, { is_server = true }) + ) + + nomad_client_config = templatefile( + "${path.module}/templates/nomad.sh.tpl", + merge(local.nomad_base_config, { is_server = false }) + ) + + launch_base_user_data = merge(local.base_config_values, { + consul_template_config = local.consul_template_config + docker_config = local.docker_config + consul_template_service_config = local.consul_template_service_config + }) +} + +# VPC AND SUBNETS + +resource "aws_vpc" "nomadstack" { + cidr_block = "10.0.0.0/16" + enable_dns_support = true + enable_dns_hostnames = true + enable_classiclink = false + instance_tenancy = "default" + + tags = local.common_tags +} + +resource "aws_subnet" "public" { + count = 2 + + vpc_id = aws_vpc.nomadstack.id + cidr_block = "10.0.10${count.index}.0/24" + availability_zone = var.availability_zones[var.region][count.index] + map_public_ip_on_launch = true + + tags = local.common_tags +} + +resource "aws_internet_gateway" "nomadstack" { + vpc_id = aws_vpc.nomadstack.id + + tags = local.common_tags +} + +resource "aws_route_table" "nomadstack" { + vpc_id = aws_vpc.nomadstack.id + + route { + //associated subnet can reach everywhere + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.nomadstack.id + } + + tags = local.common_tags +} + +resource "aws_route_table_association" "main" { + count = 2 + subnet_id = aws_subnet.public[count.index].id + route_table_id = aws_route_table.nomadstack.id +} + +# INSTANCES & CONFIG + +resource "aws_launch_configuration" "server_launch" { + name_prefix = "nomadstack-server" + image_id = var.base_amis[var.region] + instance_type = var.server_instance_type + key_name = var.key_name + + security_groups = [aws_security_group.nomadstack.id] + associate_public_ip_address = var.associate_public_ip_address + + iam_instance_profile = aws_iam_instance_profile.auto-join.name + + user_data = templatefile( + "${path.module}/templates/startup.sh.tpl", + merge(local.launch_base_user_data, { + nomad_config = local.nomad_server_config + is_server = true + }) + ) + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_launch_configuration" "client_launch" { + name_prefix = "nomadstack-client" + image_id = var.base_amis[var.region] + instance_type = var.client_instance_type + key_name = var.key_name + + security_groups = [aws_security_group.nomadstack.id] + associate_public_ip_address = var.associate_public_ip_address + + iam_instance_profile = aws_iam_instance_profile.auto-join.name + + user_data = templatefile( + "${path.module}/templates/startup.sh.tpl", + merge(local.launch_base_user_data, { + nomad_config = local.nomad_client_config + is_server = false + }) + ) + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_autoscaling_group" "servers" { + desired_capacity = var.desired_servers + max_size = var.max_servers + min_size = var.min_servers + + launch_configuration = aws_launch_configuration.server_launch.name + vpc_zone_identifier = aws_subnet.public.*.id + + target_group_arns = [ aws_alb_target_group.nomad_servers.arn ] + + tags = [ + { + key = "Name" + value = "${var.cluster_name}-server" + propagate_at_launch = true + }, + { + key = var.retry_join.tag_key + value = "${var.retry_join.tag_value_prefix}-${var.cluster_name}" + propagate_at_launch = true + }, + { + key = "Use" + value = var.common_tag + propagate_at_launch = true + } + ] +} + +resource "aws_autoscaling_group" "clients" { + desired_capacity = var.desired_clients + max_size = var.max_servers + min_size = var.min_servers + + launch_configuration = aws_launch_configuration.client_launch.name + vpc_zone_identifier = aws_subnet.public.*.id + + target_group_arns = [ + aws_alb_target_group.nomad_clients.arn + ] + + tags = [ + { + key = "Name" + value = "${var.cluster_name}-client" + propagate_at_launch = true + }, + { + key = var.retry_join.tag_key + value = "${var.retry_join.tag_value_prefix}-${var.cluster_name}" + propagate_at_launch = true + }, + { + key = "Use" + value = var.common_tag + propagate_at_launch = true + } + ] +} + +# LOAD BALANCING + +resource "aws_alb" "nomad_servers" { + name = "${var.cluster_name}-nomad-servers" + security_groups = [aws_security_group.nomadstack.id] + subnets = aws_subnet.public.*.id + internal = false + idle_timeout = 60 + + tags = local.common_tags +} + +resource "aws_alb_target_group" "nomad_servers" { + name = "${var.cluster_name}-nomad-servers" + port = 4646 + protocol = "HTTP" + vpc_id = aws_vpc.nomadstack.id + + health_check { + healthy_threshold = 3 + unhealthy_threshold = 10 + timeout = 5 + interval = 10 + path = "/v1/agent/health" + port = 4646 + } + + tags = local.common_tags +} + +resource "aws_alb_listener" "nomad_servers" { + load_balancer_arn = aws_alb.nomad_servers.arn + port = 80 + protocol = "HTTP" + + default_action { + type = "forward" + target_group_arn = aws_alb_target_group.nomad_servers.arn + } + + tags = local.common_tags +} + +resource "aws_autoscaling_attachment" "nomad_servers" { + autoscaling_group_name = aws_autoscaling_group.servers.id + alb_target_group_arn = aws_alb_target_group.nomad_servers.arn +} + +# LOAD BALANCING - NOMAD CLIENTS + +# NOTE: The first LB and associated resouces are to get to +# the Nomad UI on the clients. When attached, this makes TF wait +# for a healthy state for the client ASG until it completes. + +# This might not be necessary, but there may be some reason you +# would want to get to the Nomad client UI, and I'm keeping it +# as an easy way to block on the client ASG & Nomad health. + +# Scroll down for the other Nomad Client ASG which is meant +# for exposing load balancers or applications to the public. + +resource "aws_alb" "nomad_clients" { + name = "${var.cluster_name}-nomad-clients" + security_groups = [aws_security_group.nomadstack.id] + subnets = aws_subnet.public.*.id + internal = false + idle_timeout = 60 + + tags = local.common_tags +} + +resource "aws_alb_target_group" "nomad_clients" { + name = "${var.cluster_name}-nomad-clients" + port = 4646 + protocol = "HTTP" + vpc_id = aws_vpc.nomadstack.id + + health_check { + healthy_threshold = 3 + unhealthy_threshold = 10 + timeout = 5 + interval = 10 + path = "/v1/agent/health" + port = 4646 + } + + tags = local.common_tags +} + +resource "aws_alb_listener" "nomad_clients" { + load_balancer_arn = aws_alb.nomad_clients.arn + port = 80 + protocol = "HTTP" + + default_action { + type = "forward" + target_group_arn = aws_alb_target_group.nomad_clients.arn + } + + tags = local.common_tags +} + +resource "aws_autoscaling_attachment" "nomad_clients" { + autoscaling_group_name = aws_autoscaling_group.clients.id + alb_target_group_arn = aws_alb_target_group.nomad_clients.arn +} + +# NOTE: This load balancer is meant to expose a load balancer +# on the clients to the general public. + +# It does not have a health check associated with it, as +# the Nomad job to configure a load balances has likely +# not been deployed yet. + +resource "aws_alb" "nomad_clients_lb" { + name = "${var.cluster_name}-nomad-clients-lb" + security_groups = [aws_security_group.nomadstack.id] + subnets = aws_subnet.public.*.id + internal = false + idle_timeout = 60 + + tags = local.common_tags +} + +resource "aws_alb_target_group" "nomad_clients_lb" { + name = "${var.cluster_name}-nomad-clients-lb" + port = var.nomad_client_appliicaton_port // 8080 default + protocol = "HTTP" + vpc_id = aws_vpc.nomadstack.id + + tags = local.common_tags +} + +resource "aws_alb_listener" "nomad_clients_lb" { + load_balancer_arn = aws_alb.nomad_clients_lb.arn + port = 80 + protocol = "HTTP" + + default_action { + type = "forward" + target_group_arn = aws_alb_target_group.nomad_clients_lb.arn + } + + tags = local.common_tags +} + +resource "aws_autoscaling_attachment" "nomad_clients_lb" { + autoscaling_group_name = aws_autoscaling_group.clients.id + alb_target_group_arn = aws_alb_target_group.nomad_clients_lb.arn +} + + +# OUTPUTS + +output "nomad_server_url" { + value = "http://${aws_alb.nomad_servers.dns_name}" +} + +output "nomad_client_lb_url" { + value = "http://${aws_alb.nomad_clients_lb.dns_name}" +} diff --git a/provider.tf b/provider.tf new file mode 100644 index 0000000..66428ae --- /dev/null +++ b/provider.tf @@ -0,0 +1,3 @@ +provider "aws" { + region = var.region +} diff --git a/security.tf b/security.tf new file mode 100644 index 0000000..0cf758d --- /dev/null +++ b/security.tf @@ -0,0 +1,135 @@ +// ========================== +// == SECURITY GROUP RULES == +// ========================== + +// == HTTP == + +resource "aws_security_group_rule" "allow_http_inbound" { + type = "ingress" + from_port = var.http_port_from + to_port = var.http_port_to + protocol = "tcp" + cidr_blocks = [var.whitelist_ip] + + security_group_id = aws_security_group.hashistack.id +} + +// == RPC == + +resource "aws_security_group_rule" "allow_rpc_inbound" { + type = "ingress" + from_port = var.rpc_port + to_port = var.rpc_port + protocol = "tcp" + cidr_blocks = [var.whitelist_ip] + + security_group_id = aws_security_group.hashistack.id +} + +// == TCP == + +resource "aws_security_group_rule" "allow_serf_tcp_inbound" { + type = "ingress" + from_port = var.serf_port + to_port = var.serf_port + protocol = "tcp" + cidr_blocks = [var.whitelist_ip] + + security_group_id = aws_security_group.hashistack.id +} + +// == UDP == + +resource "aws_security_group_rule" "allow_serf_udp_inbound" { + type = "ingress" + from_port = var.serf_port + to_port = var.serf_port + protocol = "udp" + cidr_blocks = [var.whitelist_ip] + + security_group_id = aws_security_group.hashistack.id +} + +// == SSH == + +resource "aws_security_group_rule" "allow_ssh_inbound" { + type = "ingress" + from_port = var.ssh_port + to_port = var.ssh_port + protocol = "tcp" + cidr_blocks = [var.whitelist_ip] + + security_group_id = aws_security_group.hashistack.id +} + +// == OUTBOUND == + +resource "aws_security_group_rule" "allow_all_outbound" { + type = "egress" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + + security_group_id = aws_security_group.hashistack.id +} + +// ===================== +// == SECURITY GROUPS == +// ===================== + +resource "aws_security_group" "hashistack" { + name_prefix = var.cluster_name + description = "Security group for the ${var.cluster_name} launch configuration" + // if this is empty, does it set it up on the parent vpc + vpc_id = aws_vpc.hashistack.id +} + +// ================= +// == PERMISSIONS == +// ================= + +// Allow nomad auto-join + +data "aws_iam_policy_document" "describe-instances" { + statement { + effect = "Allow" + actions = ["ec2:DescribeInstances"] + resources = ["*"] + } +} + +data "aws_iam_policy_document" "assume-role" { + statement { + sid = "" + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +resource "aws_iam_policy" "auto-join" { + name = "auto-join-${var.region}-${var.cluster_name}" + description = "Allows Nomad nodes to describe instances for joining." + policy = data.aws_iam_policy_document.describe-instances.json +} + +resource "aws_iam_role" "auto-join" { + name = "auto-join-${var.region}-${var.cluster_name}" + assume_role_policy = data.aws_iam_policy_document.assume-role.json +} + +resource "aws_iam_policy_attachment" "auto-join" { + name = "auto-join-${var.region}-${var.cluster_name}" + roles = [aws_iam_role.auto-join.name] + policy_arn = aws_iam_policy.auto-join.arn +} + +resource "aws_iam_instance_profile" "auto-join" { + name = "auto-join-${var.region}-${var.cluster_name}" + role = aws_iam_role.auto-join.name +} diff --git a/templates/consul_template.sh.tpl b/templates/consul_template.sh.tpl new file mode 100644 index 0000000..a8f28b6 --- /dev/null +++ b/templates/consul_template.sh.tpl @@ -0,0 +1,14 @@ +echo "=== Setting up Consul Template ===" +sudo mkdir -p /mnt/consul-template +sudo mkdir -p /etc/consul-template.d + +sudo tee /etc/consul-template.d/consul-template.hcl > /dev/null < /dev/null <<"EOF" +${consul_template_service_config} +EOF diff --git a/templates/docker.sh.tpl b/templates/docker.sh.tpl new file mode 100644 index 0000000..5b2436f --- /dev/null +++ b/templates/docker.sh.tpl @@ -0,0 +1,9 @@ +echo "=== Getting Docker ===" + +// TODO: Why was this necessary? +// sleep 30 + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - +sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" +sudo apt-get -yqq update +sudo apt-get -yqq install docker-ce diff --git a/templates/nomad.sh.tpl b/templates/nomad.sh.tpl new file mode 100644 index 0000000..a5e7d82 --- /dev/null +++ b/templates/nomad.sh.tpl @@ -0,0 +1,116 @@ +PRIVATE_IP=$(curl http://169.254.169.254/latest/meta-data/local-ipv4) +PUBLIC_IP=$(curl http://169.254.169.254/latest/meta-data/public-ipv4) + +echo "=== Fetching Nomad ===" +cd /tmp +curl -sLo nomad.zip https://releases.hashicorp.com/nomad/${nomad_version}/nomad_${nomad_version}_linux_amd64.zip + +echo "=== Installing Nomad ===" +unzip nomad.zip >/dev/null +sudo chmod +x nomad +sudo mv nomad /usr/local/bin/nomad + +sudo mkdir -p /mnt/nomad +sudo mkdir -p /etc/nomad.d + +if [ ${is_server} == true ] || [ ${is_server} == 1 ]; then + echo "=== Setting up Nomad as Server ===" + echo "=== Writing Server Config ===" + + sudo tee /etc/nomad.d/config.hcl > /dev/null < /dev/null < /dev/null <<"EOF" +${nomad_service_config} +EOF diff --git a/templates/services/consul_template.service.tpl b/templates/services/consul_template.service.tpl new file mode 100644 index 0000000..b02878e --- /dev/null +++ b/templates/services/consul_template.service.tpl @@ -0,0 +1,15 @@ +[Unit] +Description=Consul Template Agent +Requires=network-online.target +After=network-online.target + +[Service] +Restart=on-failure +ExecStart=/usr/local/bin/consul-template -config="/etc/consul-template.d/consul-template.hcl" +ExecReload=/bin/kill -HUP $MAINPID +KillSignal=SIGTERM +User=root +Group=root + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/templates/services/nomad.service.tpl b/templates/services/nomad.service.tpl new file mode 100644 index 0000000..3f8029e --- /dev/null +++ b/templates/services/nomad.service.tpl @@ -0,0 +1,21 @@ +[Unit] +Description=Nomad Agent +Documentation=https://nomadproject.io/docs/ +Wants=network-online.target +After=network-online.target + +[Service] +ExecReload=/bin/kill -HUP $MAINPID +ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d +KillMode=process +KillSignal=SIGINT +LimitNOFILE=infinity +LimitNPROC=infinity +Restart=on-failure +RestartSec=2 +StartLimitBurst=3 +StartLimitIntervalSec=10 +TasksMax=infinity + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/templates/startup.sh.tpl b/templates/startup.sh.tpl new file mode 100644 index 0000000..4fff217 --- /dev/null +++ b/templates/startup.sh.tpl @@ -0,0 +1,39 @@ +#!/bin/bash + +set -e + +echo "========================" +echo "=== Setting up Nomad ===" +echo "========================" + +sudo apt-get -yqq update +sudo apt-get -yqq install apt-transport-https ca-certificates curl gnupg-agent software-properties-common unzip jq + +PRIVATE_IP=$(curl http://169.254.169.254/latest/meta-data/local-ipv4) + +if [ ${use_docker} == true ] || [ ${use_docker} == 1 ]; then + echo "==============" + echo "=== Docker ===" + echo "==============" + ${docker_config} +fi + +echo "=======================" +echo "=== Consul Template ===" +echo "=======================" +${consul_template_config} + +echo "=============" +echo "=== Nomad ===" +echo "=============" +${nomad_config} + +sudo systemctl daemon-reload + +echo "=== Starting Consul Template ===" +sudo systemctl enable consul-template.service +sudo systemctl start consul-template.service + +echo "=== Starting Nomad ===" +sudo systemctl enable nomad.service +sudo systemctl start nomad.service diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..eb491a0 --- /dev/null +++ b/variables.tf @@ -0,0 +1,156 @@ +// == VERSIONS == + +variable "nomad_version" { + type = string + default = "1.3.1" +} + +// == TOOLS AND DRIVERS == + +variable "use_docker" { + type = bool + default = true +} + +// == HIGH LEVEL AWS INFO == + +variable "region" { + type = string + default = "us-east-1" +} + +variable "authoritative_region" { + type = string + default = "us-east-1" +} + +variable "replication_token" { + type = string + default = "" +} + +variable "availability_zones" { + type = map(any) + + default = { + "us-east-1" = ["us-east-1a"], + "us-west-2" = ["us-west-2a"] + } + + description = "The AZs to make subnets on for any given cloud region" +} + +variable "common_tag" { + type = string + default = "nomad-stack" +} + + +// PORTS + +variable "serf_port" { + type = string + default = "4648" +} + +variable "ssh_port" { + type = string + default = "22" +} + +variable "rpc_port" { + type = string + default = "8502" +} + +variable "http_port_from" { + type = string + default = "80" +} + +variable "http_port_to" { + type = string + default = "65535" +} + +variable "nomad_client_appliicaton_port" { + type = number + default = 8080 +} + +// CIDR + +variable "whitelist_ip" { + type = string + default = "0.0.0.0/0" +} + +// == ALB == + +variable "base_amis" { + type = map(any) + + default = { + "us-east-1" = "ami-0745d55d209ff6afd" + "us-west-2" = "ami-089668cd321f3cf82" + } + + description = "The id of the machine image (AMI) to use for the server. Ubuntu 20.04 LTS AMD 64" +} + +variable "key_name" { + type = string + default = "nomad-stack" +} + +variable "server_instance_type" { + type = string + default = "t2.micro" +} + +variable "client_instance_type" { + type = string + default = "t2.small" +} + +variable "desired_servers" { + type = number + default = 1 +} + +variable "desired_clients" { + type = number + default = 2 +} + +variable "max_servers" { + type = number + default = 3 +} + +variable "min_servers" { + type = number + default = 1 +} + +variable "cluster_name" { + type = string + default = "nomad-stack" +} + +variable "associate_public_ip_address" { + type = bool + default = true +} + +// == SERVER DATA == + +variable "retry_join" { + type = map(any) + + default = { + provider = "aws" + tag_key = "NomadAutoJoin" + tag_value_prefix = "auto-join" + } +}