From 6c0458ad13c64dc8dc7b45600c6b0d5617668eda Mon Sep 17 00:00:00 2001 From: Brandon Bjelland Date: Tue, 8 Aug 2023 15:40:02 -0700 Subject: [PATCH] aws-up and aws-down working in a containerized context via makefile targets --- Makefile | 83 ++++++++++++------- docs/development.md | 6 +- install/Dockerfile | 1 + .../kubernetes/karpenter-provisioner.yaml.tpl | 1 - install/scripts/aws-down.sh | 32 +++++-- install/scripts/aws-up.sh | 68 +++++++++------ 6 files changed, 123 insertions(+), 68 deletions(-) diff --git a/Makefile b/Makefile index 737ebb72..ecd0f522 100644 --- a/Makefile +++ b/Makefile @@ -73,7 +73,7 @@ all: build .PHONY: help help: ## Display this help. - @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) ##@ Development @@ -120,40 +120,64 @@ skaffold-dev-gcpmanager: protoc skaffold protogen render-skaffold-manifests ## R build: manifests generate fmt vet ## Build manager binary. go build -o bin/manager cmd/controllermanager/main.go -.PHONY: dev-up -dev-up: - docker build ./install -t substratus-installer && \ +.PHONY: build-installer +build-installer: + docker build ./install -t substratus-installer + +.PHONY: gcp-dev-up +gcp-dev-up: build-installer docker run -it \ - -v ${HOME}/.kube:/root/.kube \ - -e PROJECT=$(shell gcloud config get project) \ - -e TOKEN=$(shell gcloud auth print-access-token) \ - -e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \ - -e INSTALL_OPERATOR=false \ - substratus-installer gcp-up.sh + -v ${HOME}/.kube:/root/.kube \ + -e PROJECT=$(shell gcloud config get project) \ + -e TOKEN=$(shell gcloud auth print-access-token) \ + -e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \ + -e INSTALL_OPERATOR=false \ + substratus-installer gcp-up.sh mkdir -p secrets gcloud iam service-accounts keys create --iam-account=substratus-gcp-manager@$(shell gcloud config get project).iam.gserviceaccount.com ./secrets/gcp-manager-key.json -.PHONY: dev-down -dev-down: +.PHONY: gcp-dev-down +gcp-dev-down: build-installer docker run -it \ - -v ${HOME}/.kube:/root/.kube \ - -e PROJECT=$(shell gcloud config get project) \ - -e TOKEN=$(shell gcloud auth print-access-token) \ - -e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \ - substratus-installer gcp-down.sh + -v ${HOME}/.kube:/root/.kube \ + -e PROJECT=$(shell gcloud config get project) \ + -e TOKEN=$(shell gcloud auth print-access-token) \ + -e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \ + substratus-installer gcp-down.sh rm ./secrets/gcp-manager-key.json -.PHONY: dev-run +.PHONY: aws-dev-up +aws-dev-up: build-installer + docker run -it \ + -v ${HOME}/.kube:/root/.kube \ + -e AWS_ACCOUNT_ID="$(shell aws sts get-caller-identity --query Account --output text)" \ + -e AWS_ACCESS_KEY_ID=$(shell aws configure get aws_access_key_id) \ + -e AWS_SECRET_ACCESS_KEY=$(shell aws configure get aws_secret_access_key) \ + -e AWS_SESSION_TOKEN=$(shell aws configure get aws_session_token) \ + -e INSTALL_OPERATOR=false \ + substratus-installer aws-up.sh + +.PHONY: aws-dev-down +aws-dev-down: build-installer + docker run -it \ + -v ${HOME}/.kube:/root/.kube \ + -e AWS_ACCOUNT_ID="$(shell aws sts get-caller-identity --query Account --output text)" \ + -e AWS_ACCESS_KEY_ID=$(shell aws configure get aws_access_key_id) \ + -e AWS_SECRET_ACCESS_KEY=$(shell aws configure get aws_secret_access_key) \ + -e AWS_SESSION_TOKEN=$(shell aws configure get aws_session_token) \ + substratus-installer aws-down.sh + +.PHONY: gcp-dev-run # Controller manager configuration # -dev-run: export CLOUD=gcp -dev-run: export GPU_TYPE=nvidia-l4 -dev-run: export PROJECT_ID=$(shell gcloud config get project) -dev-run: export CLUSTER_NAME=substratus -dev-run: export CLUSTER_LOCATION=us-central1 +gcp-dev-run: export CLOUD=gcp +gcp-dev-run: export GPU_TYPE=nvidia-l4 +gcp-dev-run: export PROJECT_ID=$(shell gcloud config get project) +gcp-dev-run: export CLUSTER_NAME=substratus +gcp-dev-run: export CLUSTER_LOCATION=us-central1 # Cloud manager configuration # -dev-run: export GOOGLE_APPLICATION_CREDENTIALS=./secrets/gcp-manager-key.json +gcp-dev-run: export GOOGLE_APPLICATION_CREDENTIALS=./secrets/gcp-manager-key.json # Run the controller manager and the cloud manager. -dev-run: manifests kustomize install-crds +gcp-dev-run: manifests kustomize install-crds go run ./cmd/gcpmanager & \ go run ./cmd/controllermanager/main.go \ --sci-address=localhost:10080 \ @@ -176,16 +200,17 @@ docker-push: ## Push docker image with the manager. .PHONY: docs docs: crd-ref-docs embedmd - $(CRD_REF_DOCS) --config=./docs/api/config.yaml \ + $(CRD_REF_DOCS) \ + --config=./docs/api/config.yaml \ --log-level=INFO \ --output-path=./docs/api/generated.md \ --source-path=./api \ - --templates-dir=./docs/api/templates/markdown \ + --templates-dir=./docs/api/templates/markdown \ --renderer=markdown # TODO: Embed YAML examples into the generate API documentation. # $(EMBEDMD) -w ./docs/api/generated.md -# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple +# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple # architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to: # - able to use docker buildx . More info: https://docs.docker.com/build/buildx/ # - have enable BuildKit, More info: https://docs.docker.com/develop/develop-images/build_enhancements/ @@ -209,7 +234,7 @@ protogen: protoc ## Generate protobuf files. ##@ Deployment ifndef ignore-not-found - ignore-not-found = false + ignore-not-found = false endif .PHONY: install-crds diff --git a/docs/development.md b/docs/development.md index bcdb9c25..b5000690 100644 --- a/docs/development.md +++ b/docs/development.md @@ -5,19 +5,19 @@ Create a GCP environment. ```sh -make dev-up +make gcp-dev-up ``` Run Substratus control plane locally. ```sh -make dev-run +make gcp-dev-run ``` Delete GCP infra. ```sh -make dev-down +make gcp-dev-down ``` TODO: Automate the cleanup of PVs... Don't forget to manually clean them up for now. diff --git a/install/Dockerfile b/install/Dockerfile index 8d3c0b2d..30ee639d 100644 --- a/install/Dockerfile +++ b/install/Dockerfile @@ -32,6 +32,7 @@ RUN DEBIAN_FRONTEND="noninteractive" \ curl \ git \ tzdata \ + gettext-base \ keyboard-configuration # AWS CLI diff --git a/install/kubernetes/karpenter-provisioner.yaml.tpl b/install/kubernetes/karpenter-provisioner.yaml.tpl index 1fafec36..ea63a3e0 100644 --- a/install/kubernetes/karpenter-provisioner.yaml.tpl +++ b/install/kubernetes/karpenter-provisioner.yaml.tpl @@ -20,7 +20,6 @@ spec: karpenter.sh/discovery: ${CLUSTER_NAME} securityGroupSelector: karpenter.sh/discovery: ${CLUSTER_NAME} - ttlSecondsAfterEmpty: 30 consolidation: enabled: true taints: diff --git a/install/scripts/aws-down.sh b/install/scripts/aws-down.sh index b776f377..9cc5e5a9 100755 --- a/install/scripts/aws-down.sh +++ b/install/scripts/aws-down.sh @@ -4,21 +4,37 @@ set -e set -u # Required env variables: -# : "$TOKEN $PROJECT" +: "$AWS_ACCOUNT_ID $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" KUBERENTES_DIR=${SCRIPT_DIR}/../kubernetes export EKSCTL_ENABLE_CREDENTIAL_CACHE=1 export CLUSTER_NAME=substratus export REGION=us-west-2 -export ARTIFACTS_REPO_NAME=substratus -export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)" -export ARTIFACTS_BUCKET_NAME=${AWS_ACCOUNT_ID}-substratus-artifacts +export ARTIFACTS_REPO_NAME=${CLUSTER_NAME} +export ARTIFACTS_BUCKET_NAME=${AWS_ACCOUNT_ID}-${CLUSTER_NAME}-artifacts + +aws eks update-kubeconfig \ + --region ${REGION} \ + --name ${CLUSTER_NAME} && + kubectl delete deployments --namespace=karpenter --all && + kubectl delete deployments --namespace=kube-system --all || + true + +aws iam delete-policy \ + --policy-arn arn:aws:iam::${AWS_ACCOUNT_ID}:policy/KarpenterControllerPolicy-${CLUSTER_NAME} || + true -aws s3 rb s3://${ARTIFACTS_BUCKET_NAME} --region ${REGION} >/dev/null || true -aws ecr delete-repository --repository-name ${ARTIFACTS_REPO_NAME} >/dev/null || true aws cloudformation delete-stack \ - --stack-name "Karpenter-${CLUSTER_NAME}" || true + --stack-name "Karpenter-${CLUSTER_NAME}" \ + --region ${REGION} || true envsubst <${KUBERENTES_DIR}/eks-cluster.yaml.tpl >${KUBERENTES_DIR}/eks-cluster.yaml -eksctl delete cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml +eksctl delete cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml || true + +aws ecr delete-repository \ + --repository-name ${ARTIFACTS_REPO_NAME} \ + --region ${REGION} >/dev/null || true + +aws s3 rb s3://${ARTIFACTS_BUCKET_NAME} \ + --region ${REGION} >/dev/null || true diff --git a/install/scripts/aws-up.sh b/install/scripts/aws-up.sh index 424c4eeb..a660a998 100755 --- a/install/scripts/aws-up.sh +++ b/install/scripts/aws-up.sh @@ -4,26 +4,32 @@ set -e set -u # Required env variables: -# : "$TOKEN $PROJECT" +: "$AWS_ACCOUNT_ID $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY" + +INSTALL_OPERATOR="${INSTALL_OPERATOR:-yes}" -# # TODO(bjb): pass AWS creds into script -# export CLOUDSDK_AUTH_ACCESS_TOKEN=${TOKEN} SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" KUBERENTES_DIR=${SCRIPT_DIR}/../kubernetes -# INSTALL_OPERATOR="${INSTALL_OPERATOR:-yes}" -export EKSCTL_ENABLE_CREDENTIAL_CACHE=1 -export CLUSTER_NAME=substratus -export REGION=us-west-2 -export ARTIFACTS_REPO_NAME=substratus -export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)" -export ARTIFACTS_BUCKET_NAME=${AWS_ACCOUNT_ID}-substratus-artifacts - -aws s3 mb s3://${ARTIFACTS_BUCKET_NAME} --region ${REGION} >/dev/null || true -aws ecr create-repository --repository-name ${ARTIFACTS_REPO_NAME} --region ${REGION} >/dev/null || true + +EKSCTL_ENABLE_CREDENTIAL_CACHE=1 +CLUSTER_NAME=substratus +REGION=us-west-2 +ARTIFACTS_REPO_NAME=${CLUSTER_NAME} +ARTIFACTS_BUCKET_NAME=${AWS_ACCOUNT_ID}-${CLUSTER_NAME}-artifacts +KARPENTER_VERSION=v0.29.2 +AWS_PARTITION="aws" +KARPENTER_IAM_ROLE_ARN="arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter" + +TEMPOUT=$(mktemp) + +aws s3 mb s3://${ARTIFACTS_BUCKET_NAME} \ + --region ${REGION} >/dev/null || true + +aws ecr create-repository \ + --repository-name ${ARTIFACTS_REPO_NAME} \ + --region ${REGION} >/dev/null || true + # install karpenter: https://karpenter.sh/docs/getting-started/getting-started-with-karpenter/ -export KARPENTER_VERSION=v0.29.2 -export AWS_PARTITION="aws" -export TEMPOUT=$(mktemp) curl -fsSL https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml >$TEMPOUT && aws cloudformation deploy \ --stack-name "Karpenter-${CLUSTER_NAME}" \ @@ -33,15 +39,23 @@ curl -fsSL https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION} --region ${REGION} envsubst <${KUBERENTES_DIR}/eks-cluster.yaml.tpl >${KUBERENTES_DIR}/eks-cluster.yaml -eksctl create cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml || eksctl upgrade cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml +eksctl create cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml || + eksctl upgrade cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml + +aws iam create-service-linked-role \ + --aws-service-name spot.amazonaws.com || true + +aws eks update-kubeconfig \ + --region ${REGION} \ + --name ${CLUSTER_NAME} -export KARPENTER_IAM_ROLE_ARN="arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter" -aws iam create-service-linked-role --aws-service-name spot.amazonaws.com || true -aws eks --region ${REGION} update-kubeconfig --name ${CLUSTER_NAME} # Logout of helm registry to perform an unauthenticated pull against the public ECR helm registry logout public.ecr.aws || true - -helm upgrade --install karpenter oci://public.ecr.aws/karpenter/karpenter --version ${KARPENTER_VERSION} --namespace karpenter --create-namespace \ +helm upgrade \ + --create-namespace \ + --install karpenter oci://public.ecr.aws/karpenter/karpenter \ + --version ${KARPENTER_VERSION} \ + --namespace karpenter \ --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=${KARPENTER_IAM_ROLE_ARN} \ --set settings.aws.clusterName=${CLUSTER_NAME} \ --set settings.aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \ @@ -64,8 +78,8 @@ helm upgrade \ eks/aws-node-termination-handler # Install the substratus operator. -# if [ "${INSTALL_OPERATOR}" == "yes" ]; then -# kubectl apply -f kubernetes/namespace.yaml -# kubectl apply -f kubernetes/config.yaml -# kubectl apply -f kubernetes/system.yaml -# fi +if [ "${INSTALL_OPERATOR}" == "yes" ]; then + kubectl apply -f kubernetes/namespace.yaml + kubectl apply -f kubernetes/config.yaml + kubectl apply -f kubernetes/system.yaml +fi