diff --git a/.github/actions/configure-aws-credentials/action.yml b/.github/actions/configure-aws-credentials/action.yml index 42f1e963..6b573ce9 100644 --- a/.github/actions/configure-aws-credentials/action.yml +++ b/.github/actions/configure-aws-credentials/action.yml @@ -1,19 +1,59 @@ name: 'Configure AWS Credentials' -description: 'Configure AWS Credentials for a given application and | - environment so that the GitHub Actions workflow can access AWS resources. | +description: 'Configure AWS Credentials for an AWS account so that | + the GitHub Actions workflow can access AWS resources. | This is a wrapper around https://github.com/aws-actions/configure-aws-credentials | - that first determines the account, role, and region based on the | - account_names_by_environment configuration in app-config' + that first determines the account, role, and region. | + Chose one of the following three authentication options: | + 1. Authenticate by account_name | + 2. Authenticate by network_name | + 3. Authenticate by app_name and environment.' + inputs: + account_name: + description: 'Name of account, must match in ..s3.tfbackend file in /infra/accounts' + network_name: + description: 'Name of network, must match in .s3.tfbackend file in /infra/networks' app_name: description: 'Name of application folder under /infra' - required: true environment: description: 'Name of environment (dev, staging, prod) that AWS resources live in, or "shared" for resources that are shared across environments' - required: true runs: using: "composite" steps: + - name: Get network name from app and environment + id: get-network-name + if: ${{ inputs.app_name && inputs.environment }} + run: | + echo "Get network name for app_name=${{ inputs.app_name }} and environment=${{ inputs.environment }}" + + terraform -chdir="infra/${{ inputs.app_name }}/app-config" init > /dev/null + terraform -chdir="infra/${{ inputs.app_name }}/app-config" apply -auto-approve > /dev/null + + if [[ "${{ inputs.environment }}" == "shared" ]]; then + network_name=$(terraform -chdir="infra/${{ inputs.app_name }}/app-config" output -raw shared_network_name) + else + network_name=$(terraform -chdir="infra/${{ inputs.app_name }}/app-config" output -json environment_configs | jq -r ".${{ inputs.environment }}.network_name") + fi + + echo "Network name retrieved: ${network_name}" + echo "network_name=${network_name}" >> "$GITHUB_OUTPUT" + shell: bash + + - name: Get account name from network + id: get-account-name + if: ${{ inputs.network_name || steps.get-network-name.outputs.network_name }} + run: | + network_name="${{ inputs.network_name || steps.get-network-name.outputs.network_name }}" + echo "Get account name for network: ${network_name}" + + terraform -chdir="infra/project-config" init > /dev/null + terraform -chdir="infra/project-config" apply -auto-approve > /dev/null + account_name=$(terraform -chdir="infra/project-config" output -json network_configs | jq -r ".[\"${network_name}\"].account_name") + + echo "Account name retrieved: ${account_name}" + echo "account_name=${account_name}" >> "$GITHUB_OUTPUT" + shell: bash + - name: Get AWS account authentication details (AWS account, IAM role, AWS region) run: | # Get AWS account authentication details (AWS account, IAM role, AWS region) @@ -22,34 +62,31 @@ runs: echo "::group::AWS account authentication details" - terraform -chdir=infra/project-config init > /dev/null - terraform -chdir=infra/project-config apply -auto-approve > /dev/null - AWS_REGION=$(terraform -chdir=infra/project-config output -raw default_region) - echo "AWS_REGION=$AWS_REGION" - GITHUB_ACTIONS_ROLE_NAME=$(terraform -chdir=infra/project-config output -raw github_actions_role_name) - echo "GITHUB_ACTIONS_ROLE_NAME=$GITHUB_ACTIONS_ROLE_NAME" + account_name="${{ inputs.account_name || steps.get-account-name.outputs.account_name }}" - terraform -chdir=infra/${{ inputs.app_name }}/app-config init > /dev/null - terraform -chdir=infra/${{ inputs.app_name }}/app-config apply -auto-approve > /dev/null - ACCOUNT_NAME=$(terraform -chdir=infra/${{ inputs.app_name }}/app-config output -json account_names_by_environment | jq -r .${{ inputs.environment }}) - echo "ACCOUNT_NAME=$ACCOUNT_NAME" + terraform -chdir="infra/project-config" init > /dev/null + terraform -chdir="infra/project-config" apply -auto-approve > /dev/null + aws_region=$(terraform -chdir="infra/project-config" output -raw default_region) + echo "aws_region=${aws_region}" + github_actions_role_name=$(terraform -chdir="infra/project-config" output -raw github_actions_role_name) + echo "github_actions_role_name=${github_actions_role_name}" # Get the account id associated with the account name extracting the # ACCOUNT_ID part of the tfbackend file name which looks like # ..s3.tfbackend. - # The cut command splits the string with period as the delimeter and + # The cut command splits the string with period as the delimiter and # extracts the second field. - ACCOUNT_ID=$(ls infra/accounts/$ACCOUNT_NAME.*.s3.tfbackend | cut -d. -f2) - echo "ACCOUNT_ID=$ACCOUNT_ID" + account_id=$(ls infra/accounts/${account_name}.*.s3.tfbackend | cut -d. -f2) + echo "account_id=${account_id}" - AWS_ROLE_TO_ASSUME=arn:aws:iam::$ACCOUNT_ID:role/$GITHUB_ACTIONS_ROLE_NAME - echo "AWS_ROLE_TO_ASSUME=$AWS_ROLE_TO_ASSUME" + aws_role_to_assume="arn:aws:iam::${account_id}:role/${github_actions_role_name}" + echo "aws_role_to_assume=${aws_role_to_assume}" echo "::endgroup::" echo "Setting env vars AWS_ROLE_TO_ASSUME and AWS_REGION..." - echo "AWS_ROLE_TO_ASSUME=$AWS_ROLE_TO_ASSUME" >> "$GITHUB_ENV" - echo "AWS_REGION=$AWS_REGION" >> "$GITHUB_ENV" + echo "AWS_ROLE_TO_ASSUME=${aws_role_to_assume}" >> "$GITHUB_ENV" + echo "AWS_REGION=${aws_region}" >> "$GITHUB_ENV" shell: bash - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v3 diff --git a/.github/workflows/README.md b/.github/workflows/README.md index d10928f7..db3692f6 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -11,6 +11,10 @@ Each app should have: - `ci-[app_name]`: must be created; should run linting and testing - `ci-[app_name]-vulnerability-scans`: calls `vulnerability-scans` - Based on [ci-app-vulnerability-scans](https://github.com/navapbc/template-infra/blob/main/.github/workflows/ci-app-vulnerability-scans.yml) +- `ci-[app_name]-pr-environment-checks.yml`: calls `pr-environment-checks.yml` to create or update a pull request environment (see [pull request environments](/docs/infra/pull-request-environments.md)) + - Based on [ci-app-pr-environment-checks.yml](/.github/workflows/ci-app-pr-environment-checks.yml) +- `ci-[app_name]-pr-environment-destroy.yml`: calls `pr-environment-destroy.yml` to destroy the pull request environment (see [pull request environments](/docs/infra/pull-request-environments.md)) + - Based on [ci-app-pr-environment-destroy.yml](https://github.com/navapbc/template-infra/blob/main/.github/workflows/ci-app-pr-environment-destroy.yml) ### App-agnostic workflows @@ -43,5 +47,4 @@ graph TD ## ⛑️ Helper workflows -- [`check-infra-auth`](./check-infra-auth.yml): verifes that the project's Github repo is able to connect to AWS - +- [`check-ci-cd-auth`](./check-ci-cd-auth.yml): verifes that the project's Github repo is able to connect to AWS diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml index 8f129609..feb7f9af 100644 --- a/.github/workflows/build-and-publish.yml +++ b/.github/workflows/build-and-publish.yml @@ -24,10 +24,26 @@ on: type: string jobs: + get-commit-hash: + name: Get commit hash + runs-on: ubuntu-latest + outputs: + commit_hash: ${{ steps.get-commit-hash.outputs.commit_hash }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref }} + - name: Get commit hash + id: get-commit-hash + run: | + COMMIT_HASH=$(git rev-parse ${{ inputs.ref }}) + echo "Commit hash: $COMMIT_HASH" + echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT" build-and-publish: name: Build and publish runs-on: ubuntu-latest - concurrency: ${{ github.workflow }}-${{ github.sha }} + needs: get-commit-hash + concurrency: ${{ github.workflow }}-${{ needs.get-commit-hash.outputs.commit_hash }} permissions: contents: read @@ -38,14 +54,23 @@ jobs: with: ref: ${{ inputs.ref }} - - name: Build release - run: make APP_NAME=${{ inputs.app_name }} release-build - - name: Configure AWS credentials uses: ./.github/actions/configure-aws-credentials with: app_name: ${{ inputs.app_name }} environment: shared + - name: Check if image is already published + id: check-image-published + run: | + is_image_published=$(./bin/is-image-published "${{ inputs.app_name }}" "${{ inputs.ref }}") + echo "Is image published: $is_image_published" + echo "is_image_published=$is_image_published" >> "$GITHUB_OUTPUT" + + - name: Build release + if: steps.check-image-published.outputs.is_image_published == 'false' + run: make APP_NAME=${{ inputs.app_name }} release-build + - name: Publish release + if: steps.check-image-published.outputs.is_image_published == 'false' run: make APP_NAME=${{ inputs.app_name }} release-publish diff --git a/.github/workflows/check-infra-auth.yml b/.github/workflows/check-ci-cd-auth.yml similarity index 93% rename from .github/workflows/check-infra-auth.yml rename to .github/workflows/check-ci-cd-auth.yml index 5b85d560..4a54b9e9 100644 --- a/.github/workflows/check-infra-auth.yml +++ b/.github/workflows/check-ci-cd-auth.yml @@ -1,4 +1,4 @@ -name: Check GitHub Actions AWS Authentication +name: Check CI/CD AWS authentication on: workflow_dispatch: diff --git a/.github/workflows/check-infra-deploy-status.yml b/.github/workflows/check-infra-deploy-status.yml new file mode 100644 index 00000000..be2f62aa --- /dev/null +++ b/.github/workflows/check-infra-deploy-status.yml @@ -0,0 +1,72 @@ +# This workflow checks the status of infrastructure deployments to see whether +# infrastructure code configuration matches the actual state of the infrastructure. +# It does this by checking that Terraform plans show an empty diff (no changes) +# across all root modules and backend configurations. +name: Check infra deploy status + +on: + workflow_dispatch: + schedule: + # Run every day at 07:00 UTC (3am ET, 12am PT) after engineers are likely done with work + - cron: "0 7 * * *" + +jobs: + collect-configs: + name: Collect configs + runs-on: ubuntu-latest + outputs: + root_module_configs: ${{ steps.collect-infra-deploy-status-check-configs.outputs.root_module_configs }} + steps: + - uses: actions/checkout@v4 + - name: Collect root module configurations + id: collect-infra-deploy-status-check-configs + run: | + root_module_configs="$(./bin/infra-deploy-status-check-configs)" + echo "${root_module_configs}" + echo "root_module_configs=${root_module_configs}" >> "$GITHUB_OUTPUT" + check: + name: ${{ matrix.root_module_subdir }} ${{ matrix.backend_config_name }} + runs-on: ubuntu-latest + needs: collect-configs + + # Skip this job if there are no root module configurations to check, + # otherwise the GitHub actions will give the error: "Matrix must define at least one vector" + if: ${{ needs.collect-configs.outputs.root_module_configs != '[]' }} + + strategy: + fail-fast: false + matrix: + include: ${{ fromJson(needs.collect-configs.outputs.root_module_configs) }} + + permissions: + contents: read + id-token: write + + steps: + - uses: actions/checkout@v4 + - uses: hashicorp/setup-terraform@v2 + with: + terraform_version: 1.8.3 + terraform_wrapper: false + + - name: Configure AWS credentials + uses: ./.github/actions/configure-aws-credentials + with: + account_name: ${{ matrix.infra_layer == 'accounts' && matrix.account_name || null }} + network_name: ${{ matrix.infra_layer == 'networks' && matrix.backend_config_name || null }} + app_name: ${{ contains(fromJSON('["build-repository", "database", "service"]'), matrix.infra_layer) && matrix.app_name || null }} + environment: ${{ contains(fromJSON('["build-repository", "database", "service"]'), matrix.infra_layer) && matrix.backend_config_name || null }} + + - name: Check Terraform plan + run: | + echo "::group::Initialize Terraform" + echo terraform -chdir="infra/${{ matrix.root_module_subdir }}" init -input=false -reconfigure -backend-config="${{ matrix.backend_config_name }}.s3.tfbackend" + terraform -chdir="infra/${{ matrix.root_module_subdir }}" init -input=false -reconfigure -backend-config="${{ matrix.backend_config_name }}.s3.tfbackend" + echo "::endgroup::" + + echo "::group::Check Terraform plan" + echo terraform -chdir="infra/${{ matrix.root_module_subdir }}" plan -input=false -detailed-exitcode ${{ matrix.extra_params }} + terraform -chdir="infra/${{ matrix.root_module_subdir }}" plan -input=false -detailed-exitcode ${{ matrix.extra_params }} + echo "::endgroup::" + env: + TF_IN_AUTOMATION: "true" diff --git a/.github/workflows/ci-app-pr-environment-checks.yml b/.github/workflows/ci-app-pr-environment-checks.yml new file mode 100644 index 00000000..345bddd8 --- /dev/null +++ b/.github/workflows/ci-app-pr-environment-checks.yml @@ -0,0 +1,21 @@ +name: CI App PR Environment Checks +on: + workflow_dispatch: + inputs: + pr_number: + required: true + type: string + commit_hash: + required: true + type: string + # !! Uncomment the following lines once you've set up the dev environment and are ready to enable PR environments + # pull_request: +jobs: + update: + name: " " # GitHub UI is noisy when calling reusable workflows, so use whitespace for name to reduce noise + uses: ./.github/workflows/pr-environment-checks.yml + with: + app_name: "app" + environment: "dev" + pr_number: ${{ inputs.pr_number || github.event.number }} + commit_hash: ${{ inputs.commit_hash || github.event.pull_request.head.sha }} diff --git a/.github/workflows/ci-app-pr-environment-destroy.yml b/.github/workflows/ci-app-pr-environment-destroy.yml new file mode 100644 index 00000000..ad4cbb01 --- /dev/null +++ b/.github/workflows/ci-app-pr-environment-destroy.yml @@ -0,0 +1,18 @@ +name: CI App PR Environment Destroy +on: + workflow_dispatch: + inputs: + pr_number: + required: true + type: string + # !! Uncomment the following lines once you've set up the dev environment and are ready to enable PR environments + # pull_request: + # types: [closed] +jobs: + destroy: + name: " " # GitHub UI is noisy when calling reusable workflows, so use whitespace for name to reduce noise + uses: ./.github/workflows/pr-environment-destroy.yml + with: + app_name: "app" + environment: "dev" + pr_number: ${{ inputs.pr_number || github.event.number }} diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml index 1700033e..93ac1e3a 100644 --- a/.github/workflows/ci-docs.yml +++ b/.github/workflows/ci-docs.yml @@ -14,9 +14,9 @@ jobs: name: Lint markdown runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # This is the GitHub Actions-friendly port of the linter used in the Makefile. - uses: gaurav-nelson/github-action-markdown-link-check@1.0.15 with: - use-quiet-mode: 'yes' # errors only. - config-file: '.github/workflows/markdownlint-config.json' + use-quiet-mode: "yes" # errors only. + config-file: ".github/workflows/markdownlint-config.json" diff --git a/.github/workflows/ci-infra-service.yml b/.github/workflows/ci-infra-service.yml index 6e3bed7c..7c486c99 100644 --- a/.github/workflows/ci-infra-service.yml +++ b/.github/workflows/ci-infra-service.yml @@ -28,7 +28,7 @@ jobs: id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: hashicorp/setup-terraform@v2 with: diff --git a/.github/workflows/ci-infra.yml b/.github/workflows/ci-infra.yml index f7f9a4bf..d9e4698d 100644 --- a/.github/workflows/ci-infra.yml +++ b/.github/workflows/ci-infra.yml @@ -21,7 +21,7 @@ jobs: name: Lint GitHub Actions workflows runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Download actionlint id: get_actionlint run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) @@ -33,14 +33,14 @@ jobs: name: Lint scripts runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Shellcheck run: make infra-lint-scripts check-terraform-format: name: Check Terraform format runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: hashicorp/setup-terraform@v2 with: terraform_version: 1.8.3 @@ -53,7 +53,7 @@ jobs: name: Validate Terraform modules runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: hashicorp/setup-terraform@v2 with: terraform_version: 1.8.3 @@ -64,7 +64,7 @@ jobs: name: Check compliance with checkov runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: "3.10" @@ -88,7 +88,7 @@ jobs: pull-requests: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Run tfsec check uses: aquasecurity/tfsec-pr-commenter-action@v1.2.0 with: diff --git a/.github/workflows/database-migrations.yml b/.github/workflows/database-migrations.yml index 669d0bc0..12185189 100644 --- a/.github/workflows/database-migrations.yml +++ b/.github/workflows/database-migrations.yml @@ -31,7 +31,7 @@ jobs: id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Terraform uses: ./.github/actions/setup-terraform diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 4ce48e8c..6e37dffb 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -34,7 +34,7 @@ jobs: contents: read id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Terraform uses: ./.github/actions/setup-terraform diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml new file mode 100644 index 00000000..8b3c9ffe --- /dev/null +++ b/.github/workflows/e2e-tests.yml @@ -0,0 +1,37 @@ +name: E2E Tests + +on: + workflow_call: + inputs: + service_endpoint: + required: true + type: string + app_name: + required: false + type: string + +jobs: + e2e: + name: " " # GitHub UI is noisy when calling reusable workflows, so use whitespace for name to reduce noise + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install Playwright browsers + run: make e2e-setup-ci + + - name: Run e2e tests + run: make e2e-test APP_NAME=${{ inputs.app_name }} BASE_URL=${{ inputs.service_endpoint }} + + - name: Upload Playwright report + uses: actions/upload-artifact@v4 + with: + name: playwright-report + path: ./e2e/playwright-report diff --git a/.github/workflows/pr-environment-checks.yml b/.github/workflows/pr-environment-checks.yml new file mode 100644 index 00000000..9e0fcbe7 --- /dev/null +++ b/.github/workflows/pr-environment-checks.yml @@ -0,0 +1,73 @@ +name: PR Environment Update +run-name: Update PR Environment ${{ inputs.pr_number }} +on: + workflow_call: + inputs: + app_name: + required: true + type: string + environment: + required: true + type: string + pr_number: + required: true + type: string + commit_hash: + required: true + type: string +jobs: + build-and-publish: + name: " " # GitHub UI is noisy when calling reusable workflows, so use whitespace for name to reduce noise + uses: ./.github/workflows/build-and-publish.yml + with: + app_name: ${{ inputs.app_name }} + ref: ${{ inputs.commit_hash }} + + update: + name: Update environment + needs: [build-and-publish] + runs-on: ubuntu-latest + + permissions: + contents: read + id-token: write + pull-requests: write # Needed to comment on PR + repository-projects: read # Workaround for GitHub CLI bug https://github.com/cli/cli/issues/6274 + + concurrency: pr-environment-${{ inputs.pr_number }} + + outputs: + service_endpoint: ${{ steps.update-environment.outputs.service_endpoint }} + + steps: + - uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v2 + with: + terraform_version: 1.8.3 + terraform_wrapper: false + + - name: Configure AWS credentials + uses: ./.github/actions/configure-aws-credentials + with: + app_name: ${{ inputs.app_name }} + environment: ${{ inputs.environment }} + + - name: Update environment + id: update-environment + run: | + ./bin/update-pr-environment "${{ inputs.app_name }}" "${{ inputs.environment }}" "${{ inputs.pr_number }}" "${{ inputs.commit_hash }}" + service_endpoint=$(terraform -chdir="infra/${{ inputs.app_name }}/service" output -raw service_endpoint) + echo "service_endpoint=${service_endpoint}" + echo "service_endpoint=${service_endpoint}" >> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ github.token }} + + e2e-tests: + name: Run E2E Tests + needs: [update] + uses: ./.github/workflows/e2e-tests.yml + with: + service_endpoint: ${{ needs.update.outputs.service_endpoint }} + app_name: ${{ inputs.app_name }} diff --git a/.github/workflows/pr-environment-destroy.yml b/.github/workflows/pr-environment-destroy.yml new file mode 100644 index 00000000..346b8c11 --- /dev/null +++ b/.github/workflows/pr-environment-destroy.yml @@ -0,0 +1,46 @@ +name: PR Environment Destroy +run-name: Destroy PR Environment ${{ inputs.pr_number }} +on: + workflow_call: + inputs: + app_name: + required: true + type: string + environment: + required: true + type: string + pr_number: + required: true + type: string +jobs: + destroy: + name: Destroy environment + runs-on: ubuntu-latest + + permissions: + contents: read + id-token: write + pull-requests: write # Needed to comment on PR + repository-projects: read # Workaround for GitHub CLI bug https://github.com/cli/cli/issues/6274 + + concurrency: pr-environment-${{ inputs.pr_number }} + + steps: + - uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v2 + with: + terraform_version: 1.8.3 + terraform_wrapper: false + + - name: Configure AWS credentials + uses: ./.github/actions/configure-aws-credentials + with: + app_name: ${{ inputs.app_name }} + environment: ${{ inputs.environment }} + + - name: Destroy environment + run: ./bin/destroy-pr-environment "${{ inputs.app_name }}" "${{ inputs.environment }}" "${{ inputs.pr_number }}" + env: + GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/vulnerability-scans.yml b/.github/workflows/vulnerability-scans.yml index 53e5968f..232f34b6 100644 --- a/.github/workflows/vulnerability-scans.yml +++ b/.github/workflows/vulnerability-scans.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # Scans Dockerfile for any bad practices or issues - name: Scan Dockerfile by hadolint @@ -37,7 +37,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build and tag Docker image for scanning id: build-image @@ -67,7 +67,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build and tag Docker image for scanning id: build-image @@ -91,7 +91,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build and tag Docker image for scanning id: build-image diff --git a/.template-version b/.template-version index 5b17916e..ab6d47de 100644 --- a/.template-version +++ b/.template-version @@ -1 +1 @@ -a16c6247afc979c69511316c89bc79d940362476 +9616fcf8f156206aea4c3cb0a81459d7becef1ef diff --git a/Makefile b/Makefile index 7812952f..29424d56 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ PROJECT_ROOT ?= $(notdir $(PWD)) -# Use `=` instead of `:=` so that we only execute `./bin/current-account-alias.sh` when needed +# Use `=` instead of `:=` so that we only execute `./bin/current-account-alias` when needed # See https://www.gnu.org/software/make/manual/html_node/Flavors.html#Flavors -CURRENT_ACCOUNT_ALIAS = `./bin/current-account-alias.sh` +CURRENT_ACCOUNT_ALIAS = `./bin/current-account-alias` -CURRENT_ACCOUNT_ID = $(./bin/current-account-id.sh) +CURRENT_ACCOUNT_ID = $(./bin/current-account-id) # Get the list of reusable terraform modules by getting out all the modules # in infra/modules and then stripping out the "infra/modules/" prefix @@ -32,6 +32,7 @@ __check_defined = \ infra-check-compliance-checkov \ infra-check-compliance-tfsec \ infra-check-compliance \ + infra-check-github-actions-auth \ infra-configure-app-build-repository \ infra-configure-app-database \ infra-configure-app-service \ @@ -42,6 +43,7 @@ __check_defined = \ infra-lint-scripts \ infra-lint-terraform \ infra-lint-workflows \ + infra-module-database-role-manager \ infra-set-up-account \ infra-test-service \ infra-update-app-build-repository \ @@ -57,40 +59,40 @@ __check_defined = \ release-image-name \ release-image-tag \ release-publish \ - release-run-database-migrations - - + release-run-database-migrations \ + e2e-setup \ + e2e-test infra-set-up-account: ## Configure and create resources for current AWS profile and save tfbackend file to infra/accounts/$ACCOUNT_NAME.ACCOUNT_ID.s3.tfbackend @:$(call check_defined, ACCOUNT_NAME, human readable name for account e.g. "prod" or the AWS account alias) - ./bin/set-up-current-account.sh $(ACCOUNT_NAME) + ./bin/set-up-current-account $(ACCOUNT_NAME) infra-configure-network: ## Configure network $NETWORK_NAME @:$(call check_defined, NETWORK_NAME, the name of the network in /infra/networks) - ./bin/create-tfbackend.sh infra/networks $(NETWORK_NAME) + ./bin/create-tfbackend infra/networks $(NETWORK_NAME) infra-configure-app-build-repository: ## Configure infra/$APP_NAME/build-repository tfbackend and tfvars files @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) - ./bin/create-tfbackend.sh "infra/$(APP_NAME)/build-repository" shared + ./bin/create-tfbackend "infra/$(APP_NAME)/build-repository" shared infra-configure-app-database: ## Configure infra/$APP_NAME/database module's tfbackend and tfvars files for $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "staging") - ./bin/create-tfbackend.sh "infra/$(APP_NAME)/database" "$(ENVIRONMENT)" + ./bin/create-tfbackend "infra/$(APP_NAME)/database" "$(ENVIRONMENT)" infra-configure-monitoring-secrets: ## Set $APP_NAME's incident management service integration URL for $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "staging") @:$(call check_defined, URL, incident management service (PagerDuty or VictorOps) integration URL) - ./bin/configure-monitoring-secret.sh $(APP_NAME) $(ENVIRONMENT) $(URL) + ./bin/configure-monitoring-secret $(APP_NAME) $(ENVIRONMENT) $(URL) infra-configure-app-service: ## Configure infra/$APP_NAME/service module's tfbackend and tfvars files for $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "staging") - ./bin/create-tfbackend.sh "infra/$(APP_NAME)/service" "$(ENVIRONMENT)" + ./bin/create-tfbackend "infra/$(APP_NAME)/service" "$(ENVIRONMENT)" infra-update-current-account: ## Update infra resources for current AWS profile - ./bin/terraform-init-and-apply.sh infra/accounts `./bin/current-account-config-name.sh` + ./bin/terraform-init-and-apply infra/accounts `./bin/current-account-config-name` infra-update-network: ## Update network @:$(call check_defined, NETWORK_NAME, the name of the network in /infra/networks) @@ -99,7 +101,7 @@ infra-update-network: ## Update network infra-update-app-build-repository: ## Create or update $APP_NAME's build repository @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) - ./bin/terraform-init-and-apply.sh infra/$(APP_NAME)/build-repository shared + ./bin/terraform-init-and-apply infra/$(APP_NAME)/build-repository shared infra-update-app-database: ## Create or update $APP_NAME's database module for $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @@ -107,10 +109,14 @@ infra-update-app-database: ## Create or update $APP_NAME's database module for $ terraform -chdir="infra/$(APP_NAME)/database" init -input=false -reconfigure -backend-config="$(ENVIRONMENT).s3.tfbackend" terraform -chdir="infra/$(APP_NAME)/database" apply -var="environment_name=$(ENVIRONMENT)" +infra-module-database-role-manager-archive: ## Build/rebuild role manager code package for Lambda deploys + pip3 install -r infra/modules/database/role_manager/requirements.txt -t infra/modules/database/role_manager/vendor --upgrade + zip -r infra/modules/database/role_manager.zip infra/modules/database/role_manager + infra-update-app-database-roles: ## Create or update database roles and schemas for $APP_NAME's database in $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "staging") - ./bin/create-or-update-database-roles.sh $(APP_NAME) $(ENVIRONMENT) + ./bin/create-or-update-database-roles $(APP_NAME) $(ENVIRONMENT) infra-update-app-service: ## Create or update $APP_NAME's web service module @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @@ -131,11 +137,16 @@ infra-validate-module-%: infra-check-app-database-roles: ## Check that app database roles have been configured properly @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "staging") - ./bin/check-database-roles.sh $(APP_NAME) $(ENVIRONMENT) + ./bin/check-database-roles $(APP_NAME) $(ENVIRONMENT) infra-check-compliance: ## Run compliance checks infra-check-compliance: infra-check-compliance-checkov infra-check-compliance-tfsec +infra-check-github-actions-auth: ## Check that GitHub actions can authenticate to the AWS account + @:$(call check_defined, ACCOUNT_NAME, the name of account in infra/accounts) + ./bin/check-github-actions-auth $(ACCOUNT_NAME) + + infra-check-compliance-checkov: ## Run checkov compliance checks checkov --directory infra @@ -161,7 +172,7 @@ infra-test-service: ## Run service layer infra test suite cd infra/test && go test -run TestService -v -timeout 30m lint-markdown: ## Lint Markdown docs for broken links - ./bin/lint-markdown.sh + ./bin/lint-markdown ######################## ## Release Management ## @@ -192,17 +203,17 @@ release-build: ## Build release for $APP_NAME and tag it with current git hash release-publish: ## Publish release to $APP_NAME's build repository @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) - ./bin/publish-release.sh $(APP_NAME) $(IMAGE_NAME) $(IMAGE_TAG) + ./bin/publish-release $(APP_NAME) $(IMAGE_NAME) $(IMAGE_TAG) release-run-database-migrations: ## Run $APP_NAME's database migrations in $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "dev") - ./bin/run-database-migrations.sh $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) + ./bin/run-database-migrations $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) release-deploy: ## Deploy release to $APP_NAME's web service in $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "dev") - ./bin/deploy-release.sh $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) + ./bin/deploy-release $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) release-image-name: ## Prints the image name of the release image @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @@ -211,6 +222,28 @@ release-image-name: ## Prints the image name of the release image release-image-tag: ## Prints the image tag of the release image @echo $(IMAGE_TAG) +############################## +## End-to-end (E2E) Testing ## +############################## + +e2e-setup: ## Setup end-to-end tests + @cd e2e && npm install + @cd e2e && npx playwright install --with-deps + +e2e-setup-ci: ## Install system dependencies, Node dependencies, and Playwright browsers + sudo apt-get update + sudo apt-get install -y libwoff1 libopus0 libvpx7 libevent-2.1-7 libopus0 libgstreamer1.0-0 \ + libgstreamer-plugins-base1.0-0 libgstreamer-plugins-good1.0-0 libharfbuzz-icu0 libhyphen0 \ + libenchant-2-2 libflite1 libgles2 libx264-dev + cd e2e && npm ci + cd e2e && npx playwright install --with-deps + + +e2e-test: ## Run end-to-end tests + @:$(call check_defined, APP_NAME, You must pass in a specific APP_NAME) + @:$(call check_defined, BASE_URL, You must pass in a BASE_URL) + @cd e2e/$(APP_NAME) && APP_NAME=$(APP_NAME) BASE_URL=$(BASE_URL) npx playwright test $(E2E_ARGS) + ######################## ## Scripts and Helper ## ######################## diff --git a/app/app/controllers/webhooks/pinwheel/events_controller.rb b/app/app/controllers/webhooks/pinwheel/events_controller.rb index 9745aadd..c2bd4122 100644 --- a/app/app/controllers/webhooks/pinwheel/events_controller.rb +++ b/app/app/controllers/webhooks/pinwheel/events_controller.rb @@ -15,6 +15,7 @@ def create PinwheelAccount .create_with(cbv_flow: @cbv_flow, supported_jobs: supported_jobs) .find_or_create_by(pinwheel_account_id: params["payload"]["account_id"]) + track_account_created_event(@cbv_flow, params["payload"]["platform_name"]) end if PinwheelAccount::EVENTS_MAP.keys.include?(params["event"]) @@ -26,7 +27,7 @@ def create end if pinwheel_account.has_fully_synced? - track_event(@cbv_flow, pinwheel_account) + track_account_synced_event(@cbv_flow, pinwheel_account) PaystubsChannel.broadcast_to(@cbv_flow, { event: "cbv.payroll_data_available", @@ -52,7 +53,7 @@ def authorize_webhook end end - def track_event(cbv_flow, pinwheel_account) + def track_account_synced_event(cbv_flow, pinwheel_account) NewRelicEventTracker.track("PinwheelAccountSyncFinished", { cbv_flow_id: cbv_flow.id, identity_success: pinwheel_account.job_succeeded?("identity"), @@ -69,6 +70,13 @@ def track_event(cbv_flow, pinwheel_account) Rails.logger.error "Unable to track NewRelic event (PinwheelAccountSyncFinished): #{ex}" end + def track_account_created_event(cbv_flow, platform_name) + NewRelicEventTracker.track("PinwheelAccountCreated", { + cbv_flow_id: cbv_flow.id, + platform_name: platform_name + }) + end + def set_cbv_flow @cbv_flow = CbvFlow.find_by_pinwheel_end_user_id(params["payload"]["end_user_id"]) end diff --git a/app/spec/controllers/webhooks/pinwheel/events_controller_spec.rb b/app/spec/controllers/webhooks/pinwheel/events_controller_spec.rb index 3503873b..d35a2cb0 100644 --- a/app/spec/controllers/webhooks/pinwheel/events_controller_spec.rb +++ b/app/spec/controllers/webhooks/pinwheel/events_controller_spec.rb @@ -38,11 +38,18 @@ { "platform_id" => "00000000-0000-0000-0000-000000011111", "end_user_id" => cbv_flow.pinwheel_end_user_id, - "account_id" => account_id + "account_id" => account_id, + "platform_name" => "acme" } end it "creates a PinwheelAccount object" do + expect(NewRelicEventTracker).to receive(:track) + .with("PinwheelAccountCreated", { + cbv_flow_id: cbv_flow.id, + platform_name: "acme" + }) + expect do post :create, params: valid_params end.to change(PinwheelAccount, :count).by(1) diff --git a/app/spec/mailers/weekly_report_mailer_spec.rb b/app/spec/mailers/weekly_report_mailer_spec.rb index 58ce08a7..411fa8a9 100644 --- a/app/spec/mailers/weekly_report_mailer_spec.rb +++ b/app/spec/mailers/weekly_report_mailer_spec.rb @@ -2,6 +2,8 @@ require 'csv' RSpec.describe WeeklyReportMailer, type: :mailer do + include ActiveSupport::Testing::TimeHelpers + let(:now) { DateTime.new(2024, 9, 9, 9, 0, 0, "-04:00") } let(:site_id) { "nyc" } let(:invitation_sent_at) { now - 5.days } @@ -37,6 +39,10 @@ CSV.parse(mail.attachments.first.body.encoded, headers: :first_row).map(&:to_h) end + before do + travel_to(now) + end + it "renders the subject" do expect(mail.subject).to eq("CBV Pilot - Weekly Report Email") end diff --git a/bin/account-ids-by-name b/bin/account-ids-by-name new file mode 100755 index 00000000..0169934e --- /dev/null +++ b/bin/account-ids-by-name @@ -0,0 +1,23 @@ +#!/bin/bash +# Prints a JSON dictionary that maps account names to account ids for the list +# of accounts given by the terraform backend files of the form +# ..s3.tfbackend in the infra/accounts directory. +set -euo pipefail + +# We use script dir to make this script agnostic to where it's called from. +# This is needed since this script its called from infra//build-repository +# in an external data source +script_dir=$(dirname "$0") + +key_value_pairs=() +backend_config_file_paths=$(ls -1 "${script_dir}"/../infra/accounts/*.*.s3.tfbackend) + +for backend_config_file_path in ${backend_config_file_paths}; do + backend_config_file=$(basename "${backend_config_file_path}") + backend_config_name="${backend_config_file/.s3.tfbackend/}" + IFS='.' read -r account_name account_id <<< "${backend_config_name}" + key_value_pairs+=("\"${account_name}\":\"${account_id}\"") +done + +IFS="," +echo "{${key_value_pairs[*]}}" diff --git a/bin/account-ids-by-name.sh b/bin/account-ids-by-name.sh deleted file mode 100755 index a12df150..00000000 --- a/bin/account-ids-by-name.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# Prints a JSON dictionary that maps account names to account ids for the list -# of accounts given by the terraform backend files of the form -# ..s3.tfbackend in the infra/accounts directory. -set -euo pipefail - - -# We use script dir to make this script agnostic to where it's called from. -# This is needed since this script its called from infra//build-repository -# in an external data source -SCRIPT_DIR=$(dirname "$0") - -KEY_VALUE_PAIRS=() -BACKEND_CONFIG_FILE_PATHS=$(ls -1 "$SCRIPT_DIR"/../infra/accounts/*.*.s3.tfbackend) - -for BACKEND_CONFIG_FILE_PATH in $BACKEND_CONFIG_FILE_PATHS; do - BACKEND_CONFIG_FILE=$(basename "$BACKEND_CONFIG_FILE_PATH") - BACKEND_CONFIG_NAME="${BACKEND_CONFIG_FILE/.s3.tfbackend/}" - IFS='.' read -r ACCOUNT_NAME ACCOUNT_ID <<< "$BACKEND_CONFIG_NAME" - KEY_VALUE_PAIRS+=("\"$ACCOUNT_NAME\":\"$ACCOUNT_ID\"") -done - -IFS="," -echo "{${KEY_VALUE_PAIRS[*]}}" diff --git a/bin/check-database-roles b/bin/check-database-roles new file mode 100755 index 00000000..50d06963 --- /dev/null +++ b/bin/check-database-roles @@ -0,0 +1,53 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# Script that invokes the database role-manager AWS Lambda function to check +# that the Postgres users were configured properly. +# +# Positional parameters: +# app_name (required) – the name of subdirectory of /infra that holds the +# application's infrastructure code. +# environment (required) - the name of the application environment (e.g. dev, +# staging, prod) +# ----------------------------------------------------------------------------- +set -euo pipefail + +app_name="$1" +environment="$2" + +terraform -chdir="infra/${app_name}/app-config" init > /dev/null +terraform -chdir="infra/${app_name}/app-config" apply -auto-approve > /dev/null +./bin/terraform-init "infra/${app_name}/database" "${environment}" +db_role_manager_function_name=$(terraform -chdir="infra/${app_name}/database" output -raw role_manager_function_name) +db_config=$(terraform -chdir="infra/${app_name}/app-config" output -json environment_configs | jq -r ".${environment}.database_config") +payload="{\"action\":\"check\",\"config\":${db_config}}" + +echo "=======================" +echo "Checking database roles" +echo "=======================" +echo "Input parameters" +echo " app_name=${app_name}" +echo " environment=${environment}" +echo +echo "Invoking Lambda function: ${db_role_manager_function_name}" +echo " Payload: ${payload}" +echo +cli_response=$(aws lambda invoke \ + --function-name "${db_role_manager_function_name}" \ + --no-cli-pager \ + --log-type Tail \ + --payload "$(echo -n "${payload}" | base64)" \ + --output json \ + response.json) + +# Print logs out (they are returned base64 encoded) +echo "${cli_response}" | jq -r '.LogResult' | base64 --decode +echo +echo "Lambda function response:" +cat response.json +rm response.json + +# Exit with nonzero status if function failed +function_error=$(echo "${cli_response}" | jq -r '.FunctionError') +if [ "${function_error}" != "null" ]; then + exit 1 +fi diff --git a/bin/check-database-roles.sh b/bin/check-database-roles.sh deleted file mode 100755 index fd192617..00000000 --- a/bin/check-database-roles.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -# ----------------------------------------------------------------------------- -# Script that invokes the database role-manager AWS Lambda function to check -# that the Postgres users were configured properly. -# -# Positional parameters: -# APP_NAME (required) – the name of subdirectory of /infra that holds the -# application's infrastructure code. -# ENVIRONMENT (required) - the name of the application environment (e.g. dev -# staging, prod) -# ----------------------------------------------------------------------------- -set -euo pipefail - -APP_NAME=$1 -ENVIRONMENT=$2 - -./bin/terraform-init.sh "infra/$APP_NAME/database" "$ENVIRONMENT" -DB_ROLE_MANAGER_FUNCTION_NAME=$(terraform -chdir="infra/$APP_NAME/database" output -raw role_manager_function_name) - -echo "=======================" -echo "Checking database roles" -echo "=======================" -echo "Input parameters" -echo " APP_NAME=$APP_NAME" -echo " ENVIRONMENT=$ENVIRONMENT" -echo -echo "Invoking Lambda function: $DB_ROLE_MANAGER_FUNCTION_NAME" -echo -CLI_RESPONSE=$(aws lambda invoke \ - --function-name "$DB_ROLE_MANAGER_FUNCTION_NAME" \ - --no-cli-pager \ - --log-type Tail \ - --payload "$(echo -n '"check"' | base64)" \ - --output json \ - response.json) - -# Print logs out (they are returned base64 encoded) -echo "$CLI_RESPONSE" | jq -r '.LogResult' | base64 --decode -echo -echo "Lambda function response:" -cat response.json -rm response.json - -# Exit with nonzero status if function failed -FUNCTION_ERROR=$(echo "$CLI_RESPONSE" | jq -r '.FunctionError') -if [ "$FUNCTION_ERROR" != "null" ]; then - exit 1 -fi diff --git a/bin/check-github-actions-auth b/bin/check-github-actions-auth new file mode 100755 index 00000000..202d5fbd --- /dev/null +++ b/bin/check-github-actions-auth @@ -0,0 +1,94 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# This script configures the database module for the specified application +# and environment by creating the .tfvars file and .tfbackend file for the module. +# +# Positional parameters: +# account_name (required) – the name of AWS account name in infra/accounts +# ----------------------------------------------------------------------------- +set -euo pipefail + +account_name="$1" + +# This is used later to determine the run id of the workflow run +# See comment below about "Getting workflow run id" +prev_run_create_time=$(gh run list --workflow check-ci-cd-auth.yml --limit 1 --json createdAt --jq ".[].createdAt") + +code_repository=$(terraform -chdir="infra/project-config" output --raw code_repository) + +echo "=========================" +echo "Check GitHub Actions Auth" +echo "=========================" +echo "Input parameters" +echo " account_name=${account_name}" +echo + +# Get AWS account authentication details (AWS account, IAM role, AWS region) +echo "::group::AWS account authentication details" + +terraform -chdir="infra/project-config" init > /dev/null +terraform -chdir="infra/project-config" apply -auto-approve > /dev/null +aws_region=$(terraform -chdir="infra/project-config" output -raw default_region) +echo "aws_region=${aws_region}" +github_actions_role_name=$(terraform -chdir="infra/project-config" output -raw github_actions_role_name) +echo "github_actions_role_name=${github_actions_role_name}" + +# Get the account id associated with the account name extracting the +# account_id part of the tfbackend file name which looks like +# ..s3.tfbackend. +# The cut command splits the string with period as the delimiter and +# extracts the second field. +account_id=$(find "infra/accounts/${account_name}."*.s3.tfbackend | cut -d. -f2) +echo "account_id=${account_id}" + +aws_role_to_assume="arn:aws:iam::${account_id}:role/${github_actions_role_name}" +echo "aws_role_to_assume=${aws_role_to_assume}" + +echo "::endgroup::" + +################## +## Run workflow ## +################## + +gh workflow run check-ci-cd-auth.yml --field "aws_region=${aws_region}" --field "role_to_assume=${aws_role_to_assume}" + +######################### +## Get workflow run id ## +######################### + +echo "Get workflow run id" +# The following commands aims to get the workflow run id of the run that was +# just triggered by the previous workflow dispatch event. There's currently no +# simple and reliable way to do this, so for now we are going to accept that +# there is a race condition. +# +# The current implementation involves getting the create time of the previous +# run. Then continuously checking the list of workflow runs until we see a +# newly created run. Then we get the id of this new run. +# +# References: +# * This stack overflow article suggests a complicated overengineered approach: +# https://stackoverflow.com/questions/69479400/get-run-id-after-triggering-a-github-workflow-dispatch-event +# * This GitHub community discussion also requests this feature: +# https://github.com/orgs/community/discussions/17389 + +echo "Previous workflow run created at ${prev_run_create_time}" +echo "Check workflow run create time until we find a newer workflow run" +while : ; do + echo -n "." + run_create_time=$(gh run list --workflow check-ci-cd-auth.yml --limit 1 --json createdAt --jq ".[].createdAt") + [[ "${run_create_time}" > "${prev_run_create_time}" ]] && break +done +echo "Found newer workflow run created at ${run_create_time}" + +echo "Get id of workflow run" +workflow_run_id=$(gh run list --workflow check-ci-cd-auth.yml --limit 1 --json databaseId --jq ".[].databaseId") +echo "Workflow run id: ${workflow_run_id}" + +workflow_run_url="https://github.com/${code_repository}/actions/runs/${workflow_run_id}" +echo "See run logs at:" +echo " ${workflow_run_url}" + +echo "Watch workflow run until it exits" +# --exit-status causes command to exit with non-zero status if run fails +gh run watch "${workflow_run_id}" --exit-status diff --git a/bin/check-github-actions-auth.sh b/bin/check-github-actions-auth.sh deleted file mode 100755 index 2c5ec695..00000000 --- a/bin/check-github-actions-auth.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -set -euo pipefail - -GITHUB_ACTIONS_ROLE=$1 - -# This is used later to determine the run id of the workflow run -# See comment below about "Getting workflow run id" -PREV_RUN_CREATE_TIME=$(gh run list --workflow check-infra-auth.yml --limit 1 --json createdAt --jq ".[].createdAt") - -echo "Run check-infra-auth workflow with role_to_assume=$GITHUB_ACTIONS_ROLE" -gh workflow run check-infra-auth.yml --field role_to_assume="$GITHUB_ACTIONS_ROLE" - -######################### -## Get workflow run id ## -######################### - -echo "Get workflow run id" -# The following commands aims to get the workflow run id of the run that was -# just triggered by the previous workflow dispatch event. There's currently no -# simple and reliable way to do this, so for now we are going to accept that -# there is a race condition. -# -# The current implementation involves getting the create time of the previous -# run. Then continuously checking the list of workflow runs until we see a -# newly created run. Then we get the id of this new run. -# -# References: -# * This stack overflow article suggests a complicated overengineered approach: -# https://stackoverflow.com/questions/69479400/get-run-id-after-triggering-a-github-workflow-dispatch-event -# * This GitHub community discussion also requests this feature: -# https://github.com/orgs/community/discussions/17389 - -echo "Previous workflow run created at $PREV_RUN_CREATE_TIME" -echo "Check workflow run create time until we find a newer workflow run" -while : ; do - echo -n "." - RUN_CREATE_TIME=$(gh run list --workflow check-infra-auth.yml --limit 1 --json createdAt --jq ".[].createdAt") - [[ $RUN_CREATE_TIME > $PREV_RUN_CREATE_TIME ]] && break -done -echo "Found newer workflow run created at $RUN_CREATE_TIME" - -echo "Get id of workflow run" -WORKFLOW_RUN_ID=$(gh run list --workflow check-infra-auth.yml --limit 1 --json databaseId --jq ".[].databaseId") -echo "Workflow run id: $WORKFLOW_RUN_ID" - -echo "Watch workflow run until it exits" -# --exit-status causes command to exit with non-zero status if run fails -gh run watch "$WORKFLOW_RUN_ID" --exit-status diff --git a/bin/configure-monitoring-secret b/bin/configure-monitoring-secret new file mode 100755 index 00000000..ed3720af --- /dev/null +++ b/bin/configure-monitoring-secret @@ -0,0 +1,43 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# This script creates SSM parameter for storing integration URL for incident management +# services. Script creates new SSM attribute or updates existing. +# +# Positional parameters: +# app_name (required) – the name of subdirectory of /infra that holds the +# application's infrastructure code. +# environment is the name of the application environment (e.g. dev, staging, prod) +# integration_endpoint_url is the url for the integration endpoint for external +# incident management services (e.g. Pagerduty, Splunk-On-Call) +# ----------------------------------------------------------------------------- +set -euo pipefail + +app_name="$1" +environment="$2" +integration_endpoint_url="$3" + +terraform -chdir="infra/${app_name}/app-config" init > /dev/null +terraform -chdir="infra/${app_name}/app-config" apply -auto-approve > /dev/null + +has_incident_management_service=$(terraform -chdir="infra/${app_name}/app-config" output -raw has_incident_management_service) +if [ "${has_incident_management_service}" = "false" ]; then + echo "Application does not have incident management service, no secret to create" + exit 0 +fi + +secret_name=$(terraform -chdir="infra/${app_name}/app-config" output -json environment_configs | jq -r ".${environment}.incident_management_service_integration.integration_url_param_name") + +echo "=====================" +echo "Setting up SSM secret" +echo "=====================" +echo "app_name=${app_name}" +echo "environment=${environment}" +echo "integration_endpoint_url=${integration_endpoint_url}" +echo +echo "Creating SSM secret: ${secret_name}" + +aws ssm put-parameter \ + --name "${secret_name}" \ + --value "${integration_endpoint_url}" \ + --type SecureString \ + --overwrite diff --git a/bin/configure-monitoring-secret.sh b/bin/configure-monitoring-secret.sh deleted file mode 100755 index 7dd7600d..00000000 --- a/bin/configure-monitoring-secret.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# ----------------------------------------------------------------------------- -# This script creates SSM parameter for storing integration URL for incident management -# services. Script creates new SSM attribute or updates existing. -# -# Positional parameters: -# APP_NAME (required) – the name of subdirectory of /infra that holds the -# application's infrastructure code. -# ENVIRONMENT is the name of the application environment (e.g. dev, staging, prod) -# INTEGRATION_ENDPOINT_URL is the url for the integration endpoint for external -# incident management services (e.g. Pagerduty, Splunk-On-Call) -# ----------------------------------------------------------------------------- -set -euo pipefail - -APP_NAME=$1 -ENVIRONMENT=$2 -INTEGRATION_ENDPOINT_URL=$3 - -terraform -chdir="infra/$APP_NAME/app-config" init > /dev/null -terraform -chdir="infra/$APP_NAME/app-config" apply -auto-approve > /dev/null - -HAS_INCIDENT_MANAGEMENT_SERVICE=$(terraform -chdir="infra/$APP_NAME/app-config" output -raw has_incident_management_service) -if [ "$HAS_INCIDENT_MANAGEMENT_SERVICE" = "false" ]; then - echo "Application does not have incident management service, no secret to create" - exit 0 -fi - -SECRET_NAME=$(terraform -chdir="infra/$APP_NAME/app-config" output -json environment_configs | jq -r ".$ENVIRONMENT.incident_management_service_integration.integration_url_param_name") - -echo "=====================" -echo "Setting up SSM secret" -echo "=====================" -echo "APPLICATION_NAME=$APP_NAME" -echo "ENVIRONMENT=$ENVIRONMENT" -echo "INTEGRATION_URL=$INTEGRATION_ENDPOINT_URL" -echo -echo "Creating SSM secret: $SECRET_NAME" - -aws ssm put-parameter \ - --name "$SECRET_NAME" \ - --value "$INTEGRATION_ENDPOINT_URL" \ - --type SecureString \ - --overwrite - diff --git a/bin/create-or-update-database-roles b/bin/create-or-update-database-roles new file mode 100755 index 00000000..a0865901 --- /dev/null +++ b/bin/create-or-update-database-roles @@ -0,0 +1,55 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# Script that invokes the database role-manager AWS Lambda function to create +# or update the Postgres user roles for a particular environment. +# The Lambda function is created by the infra/app/database root module and is +# defined in the infra/app/database child module. +# +# Positional parameters: +# app_name (required) – the name of subdirectory of /infra that holds the +# application's infrastructure code. +# environment (required) - the name of the application environment (e.g. dev, +# staging, prod) +# ----------------------------------------------------------------------------- +set -euo pipefail + +app_name="$1" +environment="$2" + +terraform -chdir="infra/${app_name}/app-config" init > /dev/null +terraform -chdir="infra/${app_name}/app-config" apply -auto-approve > /dev/null +./bin/terraform-init "infra/${app_name}/database" "${environment}" +db_role_manager_function_name=$(terraform -chdir="infra/${app_name}/database" output -raw role_manager_function_name) +db_config=$(terraform -chdir="infra/${app_name}/app-config" output -json environment_configs | jq -r ".${environment}.database_config") +payload="{\"action\":\"manage\",\"config\":${db_config}}" + +echo "================================" +echo "Creating/updating database users" +echo "================================" +echo "Input parameters" +echo " app_name=${app_name}" +echo " environment=${environment}" +echo +echo "Invoking Lambda function: ${db_role_manager_function_name}" +echo " Payload: ${payload}" +echo +cli_response=$(aws lambda invoke \ + --function-name "${db_role_manager_function_name}" \ + --no-cli-pager \ + --log-type Tail \ + --payload "$(echo -n "${payload}" | base64)" \ + --output json \ + response.json) + +# Print logs out (they are returned base64 encoded) +echo "${cli_response}" | jq -r '.LogResult' | base64 --decode +echo +echo "Lambda function response:" +cat response.json +rm response.json + +# Exit with nonzero status if function failed +function_error=$(echo "${cli_response}" | jq -r '.FunctionError') +if [ "${function_error}" != "null" ]; then + exit 1 +fi diff --git a/bin/create-or-update-database-roles.sh b/bin/create-or-update-database-roles.sh deleted file mode 100755 index af98f25a..00000000 --- a/bin/create-or-update-database-roles.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -# ----------------------------------------------------------------------------- -# Script that invokes the database role-manager AWS Lambda function to create -# or update the Postgres user roles for a particular environment. -# The Lambda function is created by the infra/app/database root module and is -# defined in the infra/app/database child module. -# -# Positional parameters: -# APP_NAME (required) – the name of subdirectory of /infra that holds the -# application's infrastructure code. -# ENVIRONMENT (required) - the name of the application environment (e.g. dev -# staging, prod) -# ----------------------------------------------------------------------------- -set -euo pipefail - -APP_NAME=$1 -ENVIRONMENT=$2 - -./bin/terraform-init.sh "infra/$APP_NAME/database" "$ENVIRONMENT" -DB_ROLE_MANAGER_FUNCTION_NAME=$(terraform -chdir="infra/$APP_NAME/database" output -raw role_manager_function_name) - -echo "================================" -echo "Creating/updating database users" -echo "================================" -echo "Input parameters" -echo " APP_NAME=$APP_NAME" -echo " ENVIRONMENT=$ENVIRONMENT" -echo -echo "Invoking Lambda function: $DB_ROLE_MANAGER_FUNCTION_NAME" -CLI_RESPONSE=$(aws lambda invoke \ - --function-name "$DB_ROLE_MANAGER_FUNCTION_NAME" \ - --no-cli-pager \ - --log-type Tail \ - --output json \ - response.json) - -# Print logs out (they are returned base64 encoded) -echo "$CLI_RESPONSE" | jq -r '.LogResult' | base64 --decode -echo -echo "Lambda function response:" -cat response.json -rm response.json - -# Exit with nonzero status if function failed -FUNCTION_ERROR=$(echo "$CLI_RESPONSE" | jq -r '.FunctionError') -if [ "$FUNCTION_ERROR" != "null" ]; then - exit 1 -fi diff --git a/bin/create-tfbackend b/bin/create-tfbackend new file mode 100755 index 00000000..0c7f67cb --- /dev/null +++ b/bin/create-tfbackend @@ -0,0 +1,56 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# This script creates a terraform backend config file for a terraform module. +# It is not meant to be used directly. Instead, it is called by other scripts +# that set up and configure the infra/accounts module and the infra/app/ modules +# such as infra/app/build-repository and infra/app/service +# +# Positional parameters: +# module_dir (required) - the directory of the root module that will be configured +# backend_config_name (required) - the name of the backend that will be created. +# For environment specific configs, the backend_config_name will be the same +# as environment. For shared configs, the backend_config_name will be "shared". +# tf_state_key (optional) - the S3 object key of the tfstate file in the S3 bucket +# Defaults to [module_dir]/[backend_config_name].tfstate +# ----------------------------------------------------------------------------- +set -euo pipefail + +module_dir="$1" +backend_config_name="$2" +tf_state_key="${3:-${module_dir}/${backend_config_name}.tfstate}" + +# The local tfbackend config file that will store the terraform backend config +backend_config_file="${module_dir}/${backend_config_name}.s3.tfbackend" + +# Get the name of the S3 bucket that was created to store the tf state +# and the name of the DynamoDB table that was created for tf state locks. +# This will be used to configure the S3 backends in all the application +# modules +tf_state_bucket_name=$(terraform -chdir="infra/accounts" output --raw tf_state_bucket_name) +tf_locks_table_name=$(terraform -chdir="infra/accounts" output --raw tf_locks_table_name) +region=$(terraform -chdir="infra/accounts" output --raw region) + +echo "====================================" +echo "Create terraform backend config file" +echo "====================================" +echo "Input parameters" +echo " module_dir=${module_dir}" +echo " backend_config_name=${backend_config_name}" +echo + +# Create output file from example file +cp infra/example.s3.tfbackend "${backend_config_file}" + +# Replace the placeholder values +sed -i.bak "s//${tf_state_bucket_name}/g" "${backend_config_file}" +sed -i.bak "s||${tf_state_key}|g" "${backend_config_file}" +sed -i.bak "s//${tf_locks_table_name}/g" "${backend_config_file}" +sed -i.bak "s//${region}/g" "${backend_config_file}" + +# Remove the backup file created by sed +rm "${backend_config_file}.bak" + +echo "Created file: ${backend_config_file}" +echo "------------------ file contents ------------------" +cat "${backend_config_file}" +echo "----------------------- end -----------------------" diff --git a/bin/create-tfbackend.sh b/bin/create-tfbackend.sh deleted file mode 100755 index b2887758..00000000 --- a/bin/create-tfbackend.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# ----------------------------------------------------------------------------- -# This script creates a terraform backend config file for a terraform module. -# It is not meant to be used directly. Instead, it is called by other scripts -# that set up and configure the infra/accounts module and the infra/app/ modules -# such as infra/app/build-repository and infra/app/service -# -# Positional parameters: -# MODULE_DIR (required) - the directory of the root module that will be configured -# BACKEND_CONFIG_NAME (required) - the name of the backend that will be created. -# For environment specific configs, the BACKEND_CONFIG_NAME will be the same -# as ENVIRONMENT. For shared configs, the BACKEND_CONFIG_NAME will be "shared". -# TF_STATE_KEY (optional) - the S3 object key of the tfstate file in the S3 bucket -# Defaults to [MODULE_DIR]/[BACKEND_CONFIG_NAME].tfstate -# ----------------------------------------------------------------------------- -set -euo pipefail - -MODULE_DIR=$1 -BACKEND_CONFIG_NAME=$2 -TF_STATE_KEY="${3:-$MODULE_DIR/$BACKEND_CONFIG_NAME.tfstate}" - -# The local tfbackend config file that will store the terraform backend config -BACKEND_CONFIG_FILE="$MODULE_DIR/$BACKEND_CONFIG_NAME.s3.tfbackend" - -# Get the name of the S3 bucket that was created to store the tf state -# and the name of the DynamoDB table that was created for tf state locks. -# This will be used to configure the S3 backends in all the application -# modules -TF_STATE_BUCKET_NAME=$(terraform -chdir=infra/accounts output -raw tf_state_bucket_name) -TF_LOCKS_TABLE_NAME=$(terraform -chdir=infra/accounts output -raw tf_locks_table_name) -REGION=$(terraform -chdir=infra/accounts output -raw region) - -echo "====================================" -echo "Create terraform backend config file" -echo "====================================" -echo "Input parameters" -echo " MODULE_DIR=$MODULE_DIR" -echo " BACKEND_CONFIG_NAME=$BACKEND_CONFIG_NAME" -echo - -# Create output file from example file -cp infra/example.s3.tfbackend "$BACKEND_CONFIG_FILE" - -# Replace the placeholder values -sed -i.bak "s//$TF_STATE_BUCKET_NAME/g" "$BACKEND_CONFIG_FILE" -sed -i.bak "s||$TF_STATE_KEY|g" "$BACKEND_CONFIG_FILE" -sed -i.bak "s//$TF_LOCKS_TABLE_NAME/g" "$BACKEND_CONFIG_FILE" -sed -i.bak "s//$REGION/g" "$BACKEND_CONFIG_FILE" - -# Remove the backup file created by sed -rm "$BACKEND_CONFIG_FILE.bak" - - -echo "Created file: $BACKEND_CONFIG_FILE" -echo "------------------ file contents ------------------" -cat "$BACKEND_CONFIG_FILE" -echo "----------------------- end -----------------------" diff --git a/bin/current-account-alias.sh b/bin/current-account-alias similarity index 100% rename from bin/current-account-alias.sh rename to bin/current-account-alias diff --git a/bin/current-account-config-name b/bin/current-account-config-name new file mode 100755 index 00000000..80af12e3 --- /dev/null +++ b/bin/current-account-config-name @@ -0,0 +1,12 @@ +#!/bin/bash +# Print the config name for the current AWS account +# Do this by getting the current account and searching for a file in +# infra/accounts that matches "..s3.tfbackend". +# The config name is "."" +set -euo pipefail + +current_account_id=$(./bin/current-account-id) +backend_config_file_path=$(ls -1 infra/accounts/*."${current_account_id}".s3.tfbackend) +backend_config_file=$(basename "${backend_config_file_path}") +backend_config_name="${backend_config_file/.s3.tfbackend/}" +echo "${backend_config_name}" diff --git a/bin/current-account-config-name.sh b/bin/current-account-config-name.sh deleted file mode 100755 index f7bd3b1c..00000000 --- a/bin/current-account-config-name.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -# Print the config name for the current AWS account -# Do this by getting the current account and searching for a file in -# infra/accounts that matches "..s3.tfbackend". -# The config name is "."" -set -euo pipefail -CURRENT_ACCOUNT_ID=$(./bin/current-account-id.sh) -BACKEND_CONFIG_FILE_PATH=$(ls -1 infra/accounts/*."$CURRENT_ACCOUNT_ID".s3.tfbackend) -BACKEND_CONFIG_FILE=$(basename "$BACKEND_CONFIG_FILE_PATH") -BACKEND_CONFIG_NAME="${BACKEND_CONFIG_FILE/.s3.tfbackend/}" -echo "$BACKEND_CONFIG_NAME" diff --git a/bin/current-account-id.sh b/bin/current-account-id similarity index 100% rename from bin/current-account-id.sh rename to bin/current-account-id diff --git a/bin/current-region.sh b/bin/current-region similarity index 100% rename from bin/current-region.sh rename to bin/current-region diff --git a/bin/deploy-release b/bin/deploy-release new file mode 100755 index 00000000..e5b56d82 --- /dev/null +++ b/bin/deploy-release @@ -0,0 +1,30 @@ +#!/bin/bash +set -euo pipefail + +app_name="$1" +image_tag="$2" +environment="$3" + +echo "--------------" +echo "Deploy release" +echo "--------------" +echo "Input parameters:" +echo " app_name=${app_name}" +echo " image_tag=${image_tag}" +echo " environment=${environment}" +echo + +# Update task definition and update service to use new task definition + +echo "::group::Starting ${app_name} deploy of ${image_tag} to ${environment}" +TF_CLI_ARGS_apply="-input=false -auto-approve -var=image_tag=${image_tag}" make infra-update-app-service APP_NAME="${app_name}" ENVIRONMENT="${environment}" +echo "::endgroup::" + +# Wait for the service to become stable + +cluster_name=$(terraform -chdir="infra/${app_name}/service" output -raw service_cluster_name) +service_name=$(terraform -chdir="infra/${app_name}/service" output -raw service_name) +echo "Wait for service ${service_name} to become stable" +aws ecs wait services-stable --cluster "${cluster_name}" --services "${service_name}" + +echo "Completed ${app_name} deploy of ${image_tag} to ${environment}" diff --git a/bin/deploy-release.sh b/bin/deploy-release.sh deleted file mode 100755 index a5cf2219..00000000 --- a/bin/deploy-release.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -euo pipefail - -APP_NAME=$1 -IMAGE_TAG=$2 -ENVIRONMENT=$3 - -echo "--------------" -echo "Deploy release" -echo "--------------" -echo "Input parameters:" -echo " APP_NAME=$APP_NAME" -echo " IMAGE_TAG=$IMAGE_TAG" -echo " ENVIRONMENT=$ENVIRONMENT" -echo -echo "Starting $APP_NAME deploy of $IMAGE_TAG to $ENVIRONMENT" - -TF_CLI_ARGS_apply="-input=false -auto-approve -var=image_tag=$IMAGE_TAG" make infra-update-app-service APP_NAME="$APP_NAME" ENVIRONMENT="$ENVIRONMENT" - -echo "Completed $APP_NAME deploy of $IMAGE_TAG to $ENVIRONMENT" diff --git a/bin/destroy-pr-environment b/bin/destroy-pr-environment new file mode 100755 index 00000000..92c97eb1 --- /dev/null +++ b/bin/destroy-pr-environment @@ -0,0 +1,53 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# Destroy the temporary environment that was created for the pull request. +# +# Positional parameters: +# app_name (required) – the name of subdirectory of /infra that holds the +# application's infrastructure code. +# environment - the name of the application environment (e.g. dev, staging, prod) +# pr_number - the pull request number in GitHub +# ----------------------------------------------------------------------------- +set -euo pipefail + +app_name="$1" +environment="$2" +pr_number="$3" + +workspace="p-${pr_number}" + +echo "::group::Initialize Terraform with backend for environment: ${environment}" +terraform -chdir="infra/${app_name}/service" init -backend-config="${environment}.s3.tfbackend" +echo "::endgroup::" + +echo "Select Terraform workspace: ${workspace}" +terraform -chdir="infra/${app_name}/service" workspace select "${workspace}" + +echo "::group::Destroy resources" +terraform -chdir="infra/${app_name}/service" destroy -var="environment_name=${environment}" -input=false -auto-approve +echo "::endgroup::" + +echo "Select default workspace" +terraform -chdir="infra/${app_name}/service" workspace select default + +echo "Delete workspace: ${workspace}" +terraform -chdir="infra/${app_name}/service" workspace delete "${workspace}" + +pr_info=$(cat < +## Preview environment +♻️ Environment destroyed ♻️ + +EOF +) + +pr_body="$(gh pr view "${pr_number}" --json body | jq --raw-output .body)" +if [[ $pr_body == *""*""* ]]; then + pr_body="${pr_body//*/$pr_info}" +else + pr_body="${pr_body}"$'\n\n'"${pr_info}" +fi + +echo "Update PR description with PR environment info" +echo "${pr_info}" +gh pr edit "${pr_number}" --body "${pr_body}" diff --git a/bin/infra-deploy-status-check-configs b/bin/infra-deploy-status-check-configs new file mode 100755 index 00000000..9dadc1c7 --- /dev/null +++ b/bin/infra-deploy-status-check-configs @@ -0,0 +1,127 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# This script is used by the GitHub Action workflow check-infra-deploy-status.yml +# to generate the matrix strategy for all the jobs in that workflow. +# It retrieves all the root modules in the repository, the backend configurations +# for each of those root modules, information that GitHub Actions needs to +# authenticate with AWS, and any additional parameters that need to be passed to +# terraform plan. +# +# An example of this script's output is as follows (the actual output is minified JSON, +# but this is a pretty-printed version to be more readable): +# +# [ +# { +# "backend_config_name": "dev.111111111111", +# "infra_layer": "accounts", +# "root_module_subdir": "accounts", +# "account_name": "dev" +# }, +# { +# "backend_config_name": "dev", +# "infra_layer": "networks", +# "root_module_subdir": "networks", +# "extra_params": "-var=\"network_name=dev\"" +# }, +# { +# "backend_config_name": "shared", +# "infra_layer": "build-repository", +# "root_module_subdir": "app/build-repository", +# "app_name": "app" +# }, +# { +# "backend_config_name": "dev", +# "infra_layer": "database", +# "root_module_subdir": "app/database", +# "app_name": "app", +# "extra_params": "-var=\"environment_name=dev\"" +# }, +# { +# "backend_config_name": "dev", +# "infra_layer": "service", +# "root_module_subdir": "app/service", +# "app_name": "app", +# "extra_params": "-var=\"environment_name=dev\"" +# } +# ] +# ----------------------------------------------------------------------------- +set -euo pipefail + +# Return the names of Terraform backend configuration files in (without the ".s3.tfbackend" suffix) +# for the root module given by "infra/${root_module_subdir}". +# +# Parameters: +# - root_module_subdir: The subdirectory of the root module where the backend configuration files are located. +# Returns: +# - The names of the backend configuration files, separated by newlines +function get_backend_config_names() { + local root_module_subdir="$1" + local root_module="infra/${root_module_subdir}" + if [ -d "${root_module}" ]; then + find "${root_module}" -name "*.s3.tfbackend" -exec basename {} .s3.tfbackend \; + fi +} + +# Get deploy status check configurations for the given infrastructure layer (and application name if relevant). +# Parameters: +# - infra_layer: The infrastructure layer (one of "accounts", "networks", "build-repository", "database", "service") +# - app_name (optional): The application name (only required for the "build-repository", "database", and "service" layers) +# Returns: +# - JSON objects containing backend configuration name, infrastructure layer, and root module subdirectory, separated by newlines +function get_root_module_configs() { + local infra_layer="$1" + local app_name + local backend_config_names + local root_module_subdir + if [[ "${infra_layer}" == "build-repository" || "${infra_layer}" == "database" || "${infra_layer}" == "service" ]]; then + app_name="$2" + root_module_subdir="${app_name}/${infra_layer}" + else + root_module_subdir="${infra_layer}" + fi + backend_config_names="$(get_backend_config_names "${root_module_subdir}")" + for backend_config_name in ${backend_config_names}; do + echo "{\"backend_config_name\": \"${backend_config_name}\", \"infra_layer\": \"${infra_layer}\", \"root_module_subdir\": \"${root_module_subdir}\"}" + done +} + +# Retrieve the names of the applications in the repo by listing the directories in the "infra" directory +# and filtering out the directories that are not applications. +# Returns: A list of application names. +function get_app_names() { + find "infra" -maxdepth 1 -type d -not -name "infra" -not -name "accounts" -not -name "modules" -not -name "networks" -not -name "project-config" -not -name "test" -exec basename {} \; +} + +function get_account_layer_configs() { + local configs + configs=$(get_root_module_configs "accounts") + echo "${configs}" | jq -c '. + {account_name: (.backend_config_name | split(".")[0])}' +} + +function get_network_layer_configs() { + local configs + configs=$(get_root_module_configs "networks") + echo "${configs}" | jq -c '. + {extra_params: "-var=\"network_name=\(.backend_config_name)\""}' +} + +function get_app_configs() { + local app_name="$1" + local configs="" + for infra_layer in "build-repository" "database" "service"; do + configs+="$(get_root_module_configs "${infra_layer}" "${app_name}")" + configs+=$'\n' + done + echo "${configs}" | jq -c 'if .backend_config_name != "shared" then . + {app_name: "'"${app_name}"'", extra_params: "-var=\"environment_name=\(.backend_config_name)\""} else . + {app_name: "'"${app_name}"'"} end' +} + +function main() { + local root_module_configs + root_module_configs="$(get_account_layer_configs)" + root_module_configs+="$(get_network_layer_configs)" + for app_name in $(get_app_names); do + root_module_configs+="$(get_app_configs "${app_name}")" + done + echo "${root_module_configs}" | jq -s -c . +} + +main diff --git a/bin/is-image-published b/bin/is-image-published new file mode 100755 index 00000000..64c19f51 --- /dev/null +++ b/bin/is-image-published @@ -0,0 +1,25 @@ +#!/bin/bash +# Checks if an image tag has already been published to the container repository +# Prints "true" if so, "false" otherwise + +set -euo pipefail + +app_name="$1" +git_ref="$2" + +# Get commit hash +image_tag=$(git rev-parse "${git_ref}") + +# Need to init module when running in CD since GitHub actions does a fresh checkout of repo +terraform -chdir="infra/${app_name}/app-config" init > /dev/null +terraform -chdir="infra/${app_name}/app-config" apply -auto-approve > /dev/null +image_repository_name="$(terraform -chdir="infra/${app_name}/app-config" output -json build_repository_config | jq -r ".name")" +region=$(./bin/current-region) + +result="" +result=$(aws ecr describe-images --repository-name "${image_repository_name}" --image-ids "imageTag=${image_tag}" --region "${region}" 2> /dev/null ) || true +if [ -n "${result}" ];then + echo "true" +else + echo "false" +fi diff --git a/bin/lint-markdown.sh b/bin/lint-markdown similarity index 80% rename from bin/lint-markdown.sh rename to bin/lint-markdown index d70a06e3..73733a10 100755 --- a/bin/lint-markdown.sh +++ b/bin/lint-markdown @@ -4,15 +4,14 @@ # running, regardless where the user is when invoking this script. # Grab the full directory name for where this script lives. -SCRIPT_DIR=$(readlink -f "$0" | xargs dirname) +script_dir=$(readlink -f "$0" | xargs dirname) # Move up to the root since we want to do everything relative to that. Note that this only impacts # this script, but will leave the user wherever they were when the script exists. -cd "${SCRIPT_DIR}/.." >/dev/null || exit 1 +cd "${script_dir}/.." >/dev/null || exit 1 - -LINK_CHECK_CONFIG=".github/workflows/markdownlint-config.json" +link_check_config=".github/workflows/markdownlint-config.json" # Recursively find all markdown files (*.md) in the current directory, excluding node_modules and .venv subfolders. # Pass them in as args to the lint command using the handy `xargs` command. -find . -name \*.md -not -path "*/node_modules/*" -not -path "*/.venv/*" -print0 | xargs -0 -n1 npx markdown-link-check --config $LINK_CHECK_CONFIG +find . -name \*.md -not -path "*/node_modules/*" -not -path "*/.venv/*" -print0 | xargs -0 -n1 npx markdown-link-check --config "${link_check_config}" diff --git a/bin/publish-release b/bin/publish-release new file mode 100755 index 00000000..f359befd --- /dev/null +++ b/bin/publish-release @@ -0,0 +1,46 @@ +#!/bin/bash + +set -euo pipefail + +app_name="$1" +image_name="$2" +image_tag="$3" + +echo "---------------" +echo "Publish release" +echo "---------------" +echo "Input parameters:" +echo " app_name=${app_name}" +echo " image_name=${image_name}" +echo " image_tag=${image_tag}" + +# Need to init module when running in CD since GitHub actions does a fresh checkout of repo +terraform -chdir="infra/${app_name}/app-config" init > /dev/null +terraform -chdir="infra/${app_name}/app-config" apply -auto-approve > /dev/null +image_repository_name="$(terraform -chdir="infra/${app_name}/app-config" output -json build_repository_config | jq -r ".name")" + +region=$(./bin/current-region) +read -r image_registry_id image_repository_url <<< "$(aws ecr describe-repositories --repository-names "${image_repository_name}" --query "repositories[0].[registryId,repositoryUri]" --output text)" +image_registry="${image_registry_id}.dkr.ecr.${region}.amazonaws.com" + +echo "Build repository info:" +echo " region=${region}" +echo " image_registry=${image_registry}" +echo " image_repository_name=${image_repository_name}" +echo " image_repository_url=${image_repository_url}" +echo +echo "Authenticating Docker with ECR" +aws ecr get-login-password --region "${region}" \ + | docker login --username AWS --password-stdin "${image_registry}" +echo +echo "Check if tag has already been published..." +result="" +result=$(aws ecr describe-images --repository-name "${image_repository_name}" --image-ids "imageTag=${image_tag}" --region "${region}" 2> /dev/null ) || true +if [ -n "${result}" ];then + echo "Image with tag ${image_tag} already published" + exit 0 +fi + +echo "New tag. Publishing image" +docker tag "${image_name}:${image_tag}" "${image_repository_url}:${image_tag}" +docker push "${image_repository_url}:${image_tag}" diff --git a/bin/publish-release.sh b/bin/publish-release.sh deleted file mode 100755 index d1ca50a4..00000000 --- a/bin/publish-release.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -APP_NAME=$1 -IMAGE_NAME=$2 -IMAGE_TAG=$3 - -echo "---------------" -echo "Publish release" -echo "---------------" -echo "Input parameters:" -echo " APP_NAME=$APP_NAME" -echo " IMAGE_NAME=$IMAGE_NAME" -echo " IMAGE_TAG=$IMAGE_TAG" - -# Need to init module when running in CD since GitHub actions does a fresh checkout of repo -terraform -chdir="infra/$APP_NAME/app-config" init > /dev/null -terraform -chdir="infra/$APP_NAME/app-config" apply -auto-approve > /dev/null -IMAGE_REPOSITORY_NAME=$(terraform -chdir="infra/$APP_NAME/app-config" output -json build_repository_config | jq -r .name) -IMAGE_REPOSITORY_ACCOUNT_ID=$(terraform -chdir="infra/$APP_NAME/app-config" output -json build_repository_config | jq -r .account_id) -REGION=$(terraform -chdir="infra/$APP_NAME/app-config" output -json build_repository_config | jq -r .region) - -IMAGE_REPOSITORY_URL="$IMAGE_REPOSITORY_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$IMAGE_REPOSITORY_NAME" - -echo "Build repository info:" -echo " REGION=$REGION" -echo " IMAGE_REPOSITORY_NAME=$IMAGE_REPOSITORY_NAME" -echo " IMAGE_REPOSITORY_URL=$IMAGE_REPOSITORY_URL" -echo -echo "Authenticating Docker with ECR" -aws ecr get-login-password --region "$REGION" \ - | docker login --username AWS --password-stdin "$IMAGE_REPOSITORY_URL" -echo -echo "Check if tag has already been published..." -RESULT="" -RESULT=$(aws ecr describe-images --repository-name "$IMAGE_REPOSITORY_NAME" --image-ids "imageTag=$IMAGE_TAG" --region "$REGION" 2> /dev/null ) || true -if [ -n "$RESULT" ];then - echo "Image with tag $IMAGE_TAG already published" - exit 0 -fi - -echo "New tag. Publishing image" -docker tag "$IMAGE_NAME:$IMAGE_TAG" "$IMAGE_REPOSITORY_URL:$IMAGE_TAG" -docker push "$IMAGE_REPOSITORY_URL:$IMAGE_TAG" diff --git a/bin/run-command b/bin/run-command new file mode 100755 index 00000000..edde6ba6 --- /dev/null +++ b/bin/run-command @@ -0,0 +1,220 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# Run an application command using the application image +# +# Optional parameters: +# --environment_variables - a JSON list of environment variables to add to the +# the container. Each environment variable is an object with the "name" key +# specifying the name of the environment variable and the "value" key +# specifying the value of the environment variable. +# e.g. '[{ "name" : "DB_USER", "value" : "migrator" }]' +# --task_role_arn - the IAM role ARN that the task should assume. Overrides the +# task role specified in the task definition. +# +# Positional parameters: +# app_name (required) - the name of subdirectory of /infra that holds the +# application's infrastructure code. +# environment (required) - the name of the application environment (e.g. dev, +# staging, prod) +# command (required) - a JSON list representing the command to run +# e.g. To run the command `db-migrate-up` with no arguments, set +# command='["db-migrate-up"]' +# e.g. To run the command `echo "Hello, world"` set +# command='["echo", "Hello, world"]') +# ----------------------------------------------------------------------------- +set -euo pipefail + +# Parse optional parameters +environment_variables="" +task_role_arn="" +while :; do + case "$1" in + --environment-variables) + environment_variables="$2" + shift 2 + ;; + --task-role-arn) + task_role_arn="$2" + shift 2 + ;; + *) + break + ;; + esac +done + +app_name="$1" +environment="$2" +command="$3" + +echo "===============" +echo "Running command" +echo "===============" +echo "Input parameters" +echo " app_name=${app_name}" +echo " environment=${environment}" +echo " command=${command}" +echo " environment_variables=${environment_variables:-}" +echo " task_role_arn=${task_role_arn:-}" +echo + +# Use the same cluster, task definition, and network configuration that the application service uses +cluster_name=$(terraform -chdir="infra/${app_name}/service" output -raw service_cluster_name) +service_name=$(terraform -chdir="infra/${app_name}/service" output -raw service_name) + +# Get the log group and log stream prefix so that we can print out the ECS task's logs after running the task +log_group=$(terraform -chdir="infra/${app_name}/service" output -raw application_log_group) +log_stream_prefix=$(terraform -chdir="infra/${app_name}/service" output -raw application_log_stream_prefix) + +service_task_definition_arn=$(aws ecs describe-services --no-cli-pager --cluster "${cluster_name}" --services "${service_name}" --query "services[0].taskDefinition" --output text) +# For subsequent commands, use the task definition family rather than the service's task definition ARN +# because in the case of migrations, we'll deploy a new task definition revision before updating the +# service, so the service will be using an old revision, but we want to use the latest revision. +task_definition_family=$(aws ecs describe-task-definition --no-cli-pager --task-definition "${service_task_definition_arn}" --query "taskDefinition.family" --output text) + +network_config=$(aws ecs describe-services --no-cli-pager --cluster "${cluster_name}" --services "${service_name}" --query "services[0].networkConfiguration") +current_region=$(./bin/current-region) +aws_user_id=$(aws sts get-caller-identity --no-cli-pager --query UserId --output text) + +container_name=$(aws ecs describe-task-definition --task-definition "${task_definition_family}" --query "taskDefinition.containerDefinitions[0].name" --output text) + +overrides=$(cat << EOF +{ + "containerOverrides": [ + { + "name": "${container_name}", + "command": ${command} + } + ] +} +EOF +) + +if [ -n "${environment_variables}" ]; then + overrides=$(echo "${overrides}" | jq ".containerOverrides[0].environment |= ${environment_variables}") +fi + +if [ -n "${task_role_arn}" ]; then + overrides=$(echo "${overrides}" | jq ".taskRoleArn |= \"${task_role_arn}\"") +fi + +task_start_time=$(date +%s) +task_start_time_millis=$((task_start_time * 1000)) + +aws_args=( + ecs run-task + --region="${current_region}" + --cluster="${cluster_name}" + --task-definition="${task_definition_family}" + --started-by="${aws_user_id}" + --launch-type=FARGATE + --platform-version=1.4.0 + --network-configuration "${network_config}" + --overrides "${overrides}" +) +echo "::group::Running AWS CLI command" +printf " ... %s\n" "${aws_args[@]}" +task_arn=$(aws --no-cli-pager "${aws_args[@]}" --query "tasks[0].taskArn" --output text) +echo "::endgroup::" +echo + +# Get the task id by extracting the substring after the last '/' since the task ARN is of +# the form "arn:aws:ecs:::task//" +ecs_task_id=$(basename "${task_arn}") + +# The log stream has the format "prefix-name/container-name/ecs-task-id" +# See https://docs.aws.amazon.com/AmazonECS/latest/userguide/using_awslogs.html +log_stream="${log_stream_prefix}/${container_name}/${ecs_task_id}" + +# Wait for log stream to be created before fetching the logs. +# The reason we don't use the `aws ecs wait tasks-running` command is because +# that command can fail on short-lived tasks. In particular, the command polls +# every 6 seconds with describe-tasks until tasks[].lastStatus is RUNNING. A +# task that completes quickly can go from PENDING to STOPPED, causing the wait +# command to error out. +echo "Waiting for log stream to be created" +echo " task_arn=${task_arn}" +echo " task_id=${ecs_task_id}" +echo " log_stream=${log_stream}" + +num_retries_waiting_for_logs=0 +while true; do + num_retries_waiting_for_logs=$((num_retries_waiting_for_logs+1)) + if [ "${num_retries_waiting_for_logs}" -eq 20 ]; then + echo "Timing out task ${ecs_task_id} waiting for logs" + exit 1 + fi + is_log_stream_created=$(aws logs describe-log-streams --no-cli-pager --log-group-name "${log_group}" --query "length(logStreams[?logStreamName==\`${log_stream}\`])") + if [ "${is_log_stream_created}" == "1" ]; then + break + fi + sleep 5 + echo -n "." +done +echo +echo + +# Tail logs until task stops using a loop that polls for new logs. +# The reason why we don't use `aws logs tail` is because that command is meant +# for interactive use. In particular, it will wait forever for new logs, even +# after a task stops, until the user hits Ctrl+C. And the reason why we don't +# wait until the task completes first before fetching logs is so that we can +# show logs in near real-time, which can be useful for long running tasks. +echo "::group::Tailing logs until task stops" +echo " log_group=${log_group}" +echo " log_stream=${log_stream}" +echo " task_start_time_millis=${task_start_time_millis}" +# Initialize the logs start time filter to the time we started the task +logs_start_time_millis="${task_start_time_millis}" +while true; do + # Print logs with human readable timestamps by fetching the log events as JSON + # then transforming them afterwards using jq + log_events=$(aws logs get-log-events \ + --no-cli-pager \ + --log-group-name "${log_group}" \ + --log-stream-name "${log_stream}" \ + --start-time "${logs_start_time_millis}" \ + --start-from-head \ + --no-paginate \ + --output json) + # Divide timestamp by 1000 since AWS timestamps are in milliseconds + echo "${log_events}" | jq -r '.events[] | ((.timestamp / 1000 | strftime("%Y-%m-%d %H:%M:%S")) + "\t" + .message)' + + # If the task stopped, then stop tailing logs + last_task_status=$(aws ecs describe-tasks --cluster "${cluster_name}" --tasks "${task_arn}" --query "tasks[0].containers[?name=='${container_name}'].lastStatus" --output text) + if [ "${last_task_status}" = "STOPPED" ]; then + break + fi + + # If there were new logs printed, then update the logs start time filter + # to be the last log's timestamp + 1 + last_log_timestamp=$(echo "${log_events}" | jq -r '.events[-1].timestamp' ) + if [ "${last_log_timestamp}" != "null" ]; then + logs_start_time_millis=$((last_log_timestamp + 1)) + fi + + # Give the application a moment to generate more logs before fetching again + sleep 1 +done +echo "::endgroup::" +echo + +container_exit_code=$( + aws ecs describe-tasks \ + --cluster "${cluster_name}" \ + --tasks "${task_arn}" \ + --query "tasks[0].containers[?name=='${container_name}'].exitCode" \ + --output text +) + +if [[ "${container_exit_code}" == "null" || "${container_exit_code}" != "0" ]]; then + echo "Task failed" >&2 + # Although we could avoid extra calls to AWS CLI if we just got the full JSON response from + # `aws ecs describe-tasks` and parsed it with jq, we are trying to avoid unnecessary dependencies. + container_status=$(aws ecs describe-tasks --cluster "${cluster_name}" --tasks "${task_arn}" --query "tasks[0].containers[?name=='${container_name}'].[lastStatus,exitCode,reason]" --output text) + task_status=$(aws ecs describe-tasks --cluster "${cluster_name}" --tasks "${task_arn}" --query "tasks[0].[lastStatus,stopCode,stoppedAt,stoppedReason]" --output text) + + echo "Container status (lastStatus, exitCode, reason): ${container_status}" >&2 + echo "Task status (lastStatus, stopCode, stoppedAt, stoppedReason): ${task_status}" >&2 + exit 1 +fi diff --git a/bin/run-command.sh b/bin/run-command.sh deleted file mode 100755 index 304ff505..00000000 --- a/bin/run-command.sh +++ /dev/null @@ -1,220 +0,0 @@ -#!/bin/bash -# ----------------------------------------------------------------------------- -# Run an application command using the application image -# -# Optional parameters: -# --environment-variables - a JSON list of environment variables to add to the -# the container. Each environment variable is an object with the "name" key -# specifying the name of the environment variable and the "value" key -# specifying the value of the environment variable. -# e.g. '[{ "name" : "DB_USER", "value" : "migrator" }]' -# --task-role-arn - the IAM role ARN that the task should assume. Overrides the -# task role specified in the task definition. -# -# Positional parameters: -# APP_NAME (required) - the name of subdirectory of /infra that holds the -# application's infrastructure code. -# ENVIRONMENT (required) - the name of the application environment (e.g. dev, -# staging, prod) -# COMMAND (required) - a JSON list representing the command to run -# e.g. To run the command `db-migrate-up` with no arguments, set -# COMMAND='["db-migrate-up"]' -# e.g. To run the command `echo "Hello, world"` set -# COMMAND='["echo", "Hello, world"]') -# ----------------------------------------------------------------------------- -set -euo pipefail - -# Parse optional parameters -ENVIRONMENT_VARIABLES="" -TASK_ROLE_ARN="" -while :; do - case $1 in - --environment-variables) - ENVIRONMENT_VARIABLES=$2 - shift 2 - ;; - --task-role-arn) - TASK_ROLE_ARN=$2 - shift 2 - ;; - *) - break - ;; - esac -done - -APP_NAME="$1" -ENVIRONMENT="$2" -COMMAND="$3" - -echo "===============" -echo "Running command" -echo "===============" -echo "Input parameters" -echo " APP_NAME=$APP_NAME" -echo " ENVIRONMENT=$ENVIRONMENT" -echo " COMMAND=$COMMAND" -echo " ENVIRONMENT_VARIABLES=${ENVIRONMENT_VARIABLES:-}" -echo " TASK_ROLE_ARN=${TASK_ROLE_ARN:-}" -echo - -# Use the same cluster, task definition, and network configuration that the application service uses -CLUSTER_NAME=$(terraform -chdir="infra/$APP_NAME/service" output -raw service_cluster_name) -SERVICE_NAME=$(terraform -chdir="infra/$APP_NAME/service" output -raw service_name) - -# Get the log group and log stream prefix so that we can print out the ECS task's logs after running the task -LOG_GROUP=$(terraform -chdir="infra/$APP_NAME/service" output -raw application_log_group) -LOG_STREAM_PREFIX=$(terraform -chdir="infra/$APP_NAME/service" output -raw application_log_stream_prefix) - -SERVICE_TASK_DEFINITION_ARN=$(aws ecs describe-services --no-cli-pager --cluster "$CLUSTER_NAME" --services "$SERVICE_NAME" --query "services[0].taskDefinition" --output text) -# For subsequent commands, use the task definition family rather than the service's task definition ARN -# because in the case of migrations, we'll deploy a new task definition revision before updating the -# service, so the service will be using an old revision, but we want to use the latest revision. -TASK_DEFINITION_FAMILY=$(aws ecs describe-task-definition --no-cli-pager --task-definition "$SERVICE_TASK_DEFINITION_ARN" --query "taskDefinition.family" --output text) - -NETWORK_CONFIG=$(aws ecs describe-services --no-cli-pager --cluster "$CLUSTER_NAME" --services "$SERVICE_NAME" --query "services[0].networkConfiguration") -CURRENT_REGION=$(./bin/current-region.sh) -AWS_USER_ID=$(aws sts get-caller-identity --no-cli-pager --query UserId --output text) - -CONTAINER_NAME=$(aws ecs describe-task-definition --task-definition "$TASK_DEFINITION_FAMILY" --query "taskDefinition.containerDefinitions[0].name" --output text) - -OVERRIDES=$(cat << EOF -{ - "containerOverrides": [ - { - "name": "$CONTAINER_NAME", - "command": $COMMAND - } - ] -} -EOF -) - -if [ -n "$ENVIRONMENT_VARIABLES" ]; then - OVERRIDES=$(echo "$OVERRIDES" | jq ".containerOverrides[0].environment |= $ENVIRONMENT_VARIABLES") -fi - -if [ -n "$TASK_ROLE_ARN" ]; then - OVERRIDES=$(echo "$OVERRIDES" | jq ".taskRoleArn |= \"$TASK_ROLE_ARN\"") -fi - -TASK_START_TIME=$(date +%s) -TASK_START_TIME_MILLIS=$((TASK_START_TIME * 1000)) - -AWS_ARGS=( - ecs run-task - --region="$CURRENT_REGION" - --cluster="$CLUSTER_NAME" - --task-definition="$TASK_DEFINITION_FAMILY" - --started-by="$AWS_USER_ID" - --launch-type=FARGATE - --platform-version=1.4.0 - --network-configuration "$NETWORK_CONFIG" - --overrides "$OVERRIDES" -) -echo "::group::Running AWS CLI command" -printf " ... %s\n" "${AWS_ARGS[@]}" -TASK_ARN=$(aws --no-cli-pager "${AWS_ARGS[@]}" --query "tasks[0].taskArn" --output text) -echo "::endgroup::" -echo - -# Get the task id by extracting the substring after the last '/' since the task ARN is of -# the form "arn:aws:ecs:::task//" -ECS_TASK_ID=$(basename "$TASK_ARN") - -# The log stream has the format "prefix-name/container-name/ecs-task-id" -# See https://docs.aws.amazon.com/AmazonECS/latest/userguide/using_awslogs.html -LOG_STREAM="$LOG_STREAM_PREFIX/$CONTAINER_NAME/$ECS_TASK_ID" - -# Wait for log stream to be created before fetching the logs. -# The reason we don't use the `aws ecs wait tasks-running` command is because -# that command can fail on short-lived tasks. In particular, the command polls -# every 6 seconds with describe-tasks until tasks[].lastStatus is RUNNING. A -# task that completes quickly can go from PENDING to STOPPED, causing the wait -# command to error out. -echo "Waiting for log stream to be created" -echo " TASK_ARN=$TASK_ARN" -echo " TASK_ID=$ECS_TASK_ID" -echo " LOG_STREAM=$LOG_STREAM" - -NUM_RETRIES_WAITIN_FOR_LOGS=0 -while true; do - NUM_RETRIES_WAITIN_FOR_LOGS=$((NUM_RETRIES_WAITIN_FOR_LOGS+1)) - if [ $NUM_RETRIES_WAITIN_FOR_LOGS -eq 20 ]; then - echo "Timing out task $ECS_TASK_ID waiting for logs" - exit 1 - fi - IS_LOG_STREAM_CREATED=$(aws logs describe-log-streams --no-cli-pager --log-group-name "$LOG_GROUP" --query "length(logStreams[?logStreamName==\`$LOG_STREAM\`])") - if [ "$IS_LOG_STREAM_CREATED" == "1" ]; then - break - fi - sleep 5 - echo -n "." -done -echo -echo - -# Tail logs until task stops using a loop that polls for new logs. -# The reason why we don't use `aws logs tail` is because that command is meant -# for interactive use. In particular, it will wait forever for new logs, even -# after a task stops, until the user hits Ctrl+C. And the reason why we don't -# wait until the task completes first before fetching logs is so that we can -# show logs in near real-time, which can be useful for long running tasks. -echo "::group::Tailing logs until task stops" -echo " LOG_GROUP=$LOG_GROUP" -echo " LOG_STREAM=$LOG_STREAM" -echo " TASK_START_TIME_MILLIS=$TASK_START_TIME_MILLIS" -# Initialize the logs start time filter to the time we started the task -LOGS_START_TIME_MILLIS=$TASK_START_TIME_MILLIS -while true; do - # Print logs with human readable timestamps by fetching the log events as JSON - # then transforming them afterwards using jq - LOG_EVENTS=$(aws logs get-log-events \ - --no-cli-pager \ - --log-group-name "$LOG_GROUP" \ - --log-stream-name "$LOG_STREAM" \ - --start-time "$LOGS_START_TIME_MILLIS" \ - --start-from-head \ - --no-paginate \ - --output json) - # Divide timestamp by 1000 since AWS timestamps are in milliseconds - echo "$LOG_EVENTS" | jq -r '.events[] | ((.timestamp / 1000 | strftime("%Y-%m-%d %H:%M:%S")) + "\t" + .message)' - - # If the task stopped, then stop tailing logs - LAST_TASK_STATUS=$(aws ecs describe-tasks --cluster "$CLUSTER_NAME" --tasks "$TASK_ARN" --query "tasks[0].containers[?name=='$CONTAINER_NAME'].lastStatus" --output text) - if [ "$LAST_TASK_STATUS" = "STOPPED" ]; then - break - fi - - # If there were new logs printed, then update the logs start time filter - # to be the last log's timestamp + 1 - LAST_LOG_TIMESTAMP=$(echo "$LOG_EVENTS" | jq -r '.events[-1].timestamp' ) - if [ "$LAST_LOG_TIMESTAMP" != "null" ]; then - LOGS_START_TIME_MILLIS=$((LAST_LOG_TIMESTAMP + 1)) - fi - - # Give the application a moment to generate more logs before fetching again - sleep 1 -done -echo "::endgroup::" -echo - -CONTAINER_EXIT_CODE=$( - aws ecs describe-tasks \ - --cluster "$CLUSTER_NAME" \ - --tasks "$TASK_ARN" \ - --query "tasks[0].containers[?name=='$CONTAINER_NAME'].exitCode" \ - --output text -) - -if [[ "$CONTAINER_EXIT_CODE" == "null" || "$CONTAINER_EXIT_CODE" != "0" ]]; then - echo "Task failed" >&2 - # Although we could avoid extra calls to AWS CLI if we just got the full JSON response from - # `aws ecs describe-tasks` and parsed it with jq, we are trying to avoid unnecessary dependencies. - CONTAINER_STATUS=$(aws ecs describe-tasks --cluster "$CLUSTER_NAME" --tasks "$TASK_ARN" --query "tasks[0].containers[?name=='$CONTAINER_NAME'].[lastStatus,exitCode,reason]" --output text) - TASK_STATUS=$(aws ecs describe-tasks --cluster "$CLUSTER_NAME" --tasks "$TASK_ARN" --query "tasks[0].[lastStatus,stopCode,stoppedAt,stoppedReason]" --output text) - - echo "Container status (lastStatus, exitCode, reason): $CONTAINER_STATUS" >&2 - echo "Task status (lastStatus, stopCode, stoppedAt, stoppedReason): $TASK_STATUS" >&2 - exit 1 -fi diff --git a/bin/run-database-migrations b/bin/run-database-migrations new file mode 100755 index 00000000..ad24b608 --- /dev/null +++ b/bin/run-database-migrations @@ -0,0 +1,67 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# Run database migrations +# 1. Update the application's task definition with the latest build, but +# do not update the service +# 2. Run the "db-migrate" command in the container as a new task +# +# Positional parameters: +# app_name (required) – the name of subdirectory of /infra that holds the +# application's infrastructure code. +# image_tag (required) – the tag of the latest build +# environment (required) – the name of the application environment (e.g. dev, +# staging, prod) +# ----------------------------------------------------------------------------- + +set -euo pipefail + +app_name="$1" +image_tag="$2" +environment="$3" + +echo "==================" +echo "Running migrations" +echo "==================" +echo "Input parameters" +echo " app_name=${app_name}" +echo " image_tag=${image_tag}" +echo " environment=${environment}" +echo +echo "Step 0. Check if app has a database" + +terraform -chdir="infra/${app_name}/app-config" init > /dev/null +terraform -chdir="infra/${app_name}/app-config" apply -auto-approve > /dev/null +has_database=$(terraform -chdir="infra/${app_name}/app-config" output -raw has_database) +if [ "${has_database}" = "false" ]; then + echo "Application does not have a database, no migrations to run" + exit 0 +fi + +db_migrator_user=$(terraform -chdir="infra/${app_name}/app-config" output -json environment_configs | jq -r ".${environment}.database_config.migrator_username") + +./bin/terraform-init "infra/${app_name}/service" "${environment}" +migrator_role_arn=$(terraform -chdir="infra/${app_name}/service" output -raw migrator_role_arn) + +echo +echo "::group::Step 1. Update task definition without updating service" + +TF_CLI_ARGS_apply="-input=false -auto-approve -var=image_tag=${image_tag} + -target=module.service.aws_ecs_task_definition.app + -target=module.service.aws_iam_role_policy.task_executor" \ + make infra-update-app-service APP_NAME="${app_name}" ENVIRONMENT="${environment}" + +echo "::endgroup::" +echo +echo 'Step 2. Run "db-migrate" command' + +command='["db-migrate"]' + + + +# Indent the later lines more to make the output of run-command prettier +environment_variables=$(cat << EOF +[{ "name" : "DB_USER", "value" : "${db_migrator_user}" }] +EOF +) + +./bin/run-command --task-role-arn "${migrator_role_arn}" --environment-variables "${environment_variables}" "${app_name}" "${environment}" "${command}" diff --git a/bin/run-database-migrations.sh b/bin/run-database-migrations.sh deleted file mode 100755 index 9122bc83..00000000 --- a/bin/run-database-migrations.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -# ----------------------------------------------------------------------------- -# Run database migrations -# 1. Update the application's task definition with the latest build, but -# do not update the service -# 2. Run the "db-migrate" command in the container as a new task -# -# Positional parameters: -# APP_NAME (required) – the name of subdirectory of /infra that holds the -# application's infrastructure code. -# IMAGE_TAG (required) – the tag of the latest build -# ENVIRONMENT (required) – the name of the application environment (e.g. dev, -# staging, prod) -# ----------------------------------------------------------------------------- - -set -euo pipefail - -APP_NAME="$1" -IMAGE_TAG="$2" -ENVIRONMENT="$3" - -echo "==================" -echo "Running migrations" -echo "==================" -echo "Input parameters" -echo " APP_NAME=$APP_NAME" -echo " IMAGE_TAG=$IMAGE_TAG" -echo " ENVIRONMENT=$ENVIRONMENT" -echo -echo "Step 0. Check if app has a database" - -terraform -chdir="infra/$APP_NAME/app-config" init > /dev/null -terraform -chdir="infra/$APP_NAME/app-config" apply -auto-approve > /dev/null -HAS_DATABASE=$(terraform -chdir="infra/$APP_NAME/app-config" output -raw has_database) -if [ "$HAS_DATABASE" = "false" ]; then - echo "Application does not have a database, no migrations to run" - exit 0 -fi - -DB_MIGRATOR_USER=$(terraform -chdir="infra/$APP_NAME/app-config" output -json environment_configs | jq -r ".$ENVIRONMENT.database_config.migrator_username") - -./bin/terraform-init.sh "infra/$APP_NAME/service" "$ENVIRONMENT" -MIGRATOR_ROLE_ARN=$(terraform -chdir="infra/$APP_NAME/service" output -raw migrator_role_arn) - -echo -echo "::group::Step 1. Update task definition without updating service" - -TF_CLI_ARGS_apply="-input=false -auto-approve -var=image_tag=$IMAGE_TAG \ - -target=module.service.aws_ecs_task_definition.app \ - -target=module.service.aws_iam_role_policy.task_executor" \ - make infra-update-app-service APP_NAME="$APP_NAME" ENVIRONMENT="$ENVIRONMENT" - -echo "::endgroup::" -echo -echo 'Step 2. Run "db-migrate" command' - -COMMAND='["db-migrate"]' - - - -# Indent the later lines more to make the output of run-command prettier -ENVIRONMENT_VARIABLES=$(cat << EOF -[{ "name" : "DB_USER", "value" : "$DB_MIGRATOR_USER" }] -EOF -) - -./bin/run-command.sh --task-role-arn "$MIGRATOR_ROLE_ARN" --environment-variables "$ENVIRONMENT_VARIABLES" "$APP_NAME" "$ENVIRONMENT" "$COMMAND" diff --git a/bin/set-up-current-account.sh b/bin/set-up-current-account similarity index 73% rename from bin/set-up-current-account.sh rename to bin/set-up-current-account index d8f15c65..8bfc8eea 100755 --- a/bin/set-up-current-account.sh +++ b/bin/set-up-current-account @@ -11,7 +11,7 @@ # can change and is not guaranteed to exist. # # Positional parameters: -# ACCOUNT_NAME (required) - human readable name for the AWS account that you're +# account_name (required) - human readable name for the AWS account that you're # authenticated into. The account name will be used to prefix the created # tfbackend file so that it's easier to visually identify as opposed to # identifying the file using the account id. @@ -22,42 +22,42 @@ # ----------------------------------------------------------------------------- set -euo pipefail -ACCOUNT_NAME=$1 +account_name="$1" -ACCOUNT_ID=$(./bin/current-account-id.sh) -REGION=$(./bin/current-region.sh) +account_id=$(./bin/current-account-id) +region=$(./bin/current-region) # Get project name terraform -chdir="infra/project-config" apply -auto-approve > /dev/null -PROJECT_NAME=$(terraform -chdir=infra/project-config output -raw project_name) +project_name=$(terraform -chdir="infra/project-config" output --raw project_name) -TF_STATE_BUCKET_NAME="$PROJECT_NAME-$ACCOUNT_ID-$REGION-tf" -TF_STATE_KEY="infra/account.tfstate" +tf_state_bucket_name="${project_name}-${account_id}-${region}-tf" +tf_state_key="infra/account.tfstate" echo "==================" echo "Setting up account" echo "==================" -echo "ACCOUNT_NAME=$ACCOUNT_NAME" -echo "ACCOUNT_ID=$ACCOUNT_ID" -echo "PROJECT_NAME=$PROJECT_NAME" -echo "TF_STATE_BUCKET_NAME=$TF_STATE_BUCKET_NAME" -echo "TF_STATE_KEY=$TF_STATE_KEY" -echo "REGION=$REGION" +echo "account_name=${account_name}" +echo "account_id=${account_id}" +echo "project_name=${project_name}" +echo "tf_state_bucket_name=${tf_state_bucket_name}" +echo "tf_state_key=${tf_state_key}" +echo "region=${region}" echo echo "------------------------------------------------------------------------------" echo "Bootstrapping the account by creating an S3 backend with minimal configuration" echo "------------------------------------------------------------------------------" echo -echo "Creating bucket: $TF_STATE_BUCKET_NAME" +echo "Creating bucket: ${tf_state_bucket_name}" # For creating buckets outside of us-east-1, a LocationConstraint needs to be set # For creating buckets in us-east-1, LocationConstraint cannot be set # See https://docs.aws.amazon.com/cli/latest/reference/s3api/create-bucket.html -CREATE_BUCKET_CONFIGURATION=("") -if [ "$REGION" != "us-east-1" ]; then - CREATE_BUCKET_CONFIGURATION=("--create-bucket-configuration" "LocationConstraint=$REGION") +create_bucket_configuration=("") +if [ "${region}" != "us-east-1" ]; then + create_bucket_configuration=("--create-bucket-configuration" "LocationConstraint=${region}") fi -aws s3api create-bucket --bucket "$TF_STATE_BUCKET_NAME" --region "$REGION" "${CREATE_BUCKET_CONFIGURATION[@]}" > /dev/null +aws s3api create-bucket --bucket "${tf_state_bucket_name}" --region "${region}" "${create_bucket_configuration[@]}" > /dev/null echo echo "----------------------------------" echo "Creating rest of account resources" @@ -74,7 +74,7 @@ cd infra/accounts # account. github_arn=$(aws iam list-open-id-connect-providers | jq -r ".[] | .[] | .Arn" | grep github || echo "") -if [[ -z ${github_arn} ]]; then +if [[ -z "${github_arn}" ]]; then aws iam create-open-id-connect-provider \ --url "https://token.actions.githubusercontent.com" \ --client-id-list "sts.amazonaws.com" \ @@ -88,15 +88,15 @@ fi terraform init \ -reconfigure \ -input=false \ - -backend-config="bucket=$TF_STATE_BUCKET_NAME" \ - -backend-config="key=$TF_STATE_KEY" \ - -backend-config="region=$REGION" + -backend-config="bucket=${tf_state_bucket_name}" \ + -backend-config="key=${tf_state_key}" \ + -backend-config="region=${region}" # Import the bucket that we created in the previous step so we don't recreate it # But first check if the bucket already exists in the state file. If we are # re-running account setup and the bucket already exists then skip the import step if ! terraform state list module.backend.aws_s3_bucket.tf_state; then - terraform import module.backend.aws_s3_bucket.tf_state "$TF_STATE_BUCKET_NAME" + terraform import module.backend.aws_s3_bucket.tf_state "${tf_state_bucket_name}" fi terraform apply \ @@ -105,6 +105,6 @@ terraform apply \ cd - -MODULE_DIR=infra/accounts -BACKEND_CONFIG_NAME="$ACCOUNT_NAME.$ACCOUNT_ID" -./bin/create-tfbackend.sh "$MODULE_DIR" "$BACKEND_CONFIG_NAME" "$TF_STATE_KEY" +module_dir="infra/accounts" +backend_config_name="${account_name}.${account_id}" +./bin/create-tfbackend "${module_dir}" "${backend_config_name}" "${tf_state_key}" diff --git a/bin/terraform-apply.sh b/bin/terraform-apply similarity index 58% rename from bin/terraform-apply.sh rename to bin/terraform-apply index dc6f8fc0..1afc845e 100755 --- a/bin/terraform-apply.sh +++ b/bin/terraform-apply @@ -4,38 +4,38 @@ # The configuration name is used to determine which .tfvars file to use for the -var-file # option of terraform apply. # -# Additional arguments to terraform apply can also be passed in using terraform's built in environment variables +# Additional arguments to terraform apply can also be passed in using terraform's built-in environment variables # TF_CLI_ARGS and TF_CLI_ARGS_name. For example, in CI/CD pipelines, you may want to set # TF_CLI_ARGS="-input=false -auto-approve" to skip the confirmation prompt. # See https://developer.hashicorp.com/terraform/cli/config/environment-variables#tf_cli_args-and-tf_cli_args_name # # Positional parameters: -# MODULE_DIR (required) – The location of the root module to initialize and apply -# CONFIG_NAME (required) – The name of the tfvars config. For accounts, the config name is the AWS account alias. -# For application modules the config name is the name of the environment (e.g. "dev", "staging", "prod"). +# module_dir (required) – The location of the root module to initialize and apply +# config_name (required) – The name of the tfvars config. For accounts, the config name is the AWS account alias. +# For application modules, the config name is the name of the environment (e.g. "dev", "staging", "prod"). # For application modules that are shared across environments, the config name is "shared". -# For example if a backend config file is named "myaccount.s3.tfbackend", then the CONFIG_NAME would be "myaccount" +# For example, if a backend config file is named "myaccount.s3.tfbackend", then the config_name would be "myaccount" # ----------------------------------------------------------------------------- set -euo pipefail -MODULE_DIR="$1" -CONFIG_NAME="$2" +module_dir="$1" +config_name="$2" # Convenience script for running terraform apply -# CONFIG_NAME – the name of the backend config. -# For example if a backend config file is named "myaccount.s3.tfbackend", then the CONFIG_NAME would be "myaccount" -# MODULE_DIR – the location of the root module to initialize and apply +# config_name – the name of the backend config. +# For example, if a backend config file is named "myaccount.s3.tfbackend", then the config_name would be "myaccount" +# module_dir – the location of the root module to initialize and apply # 1. Set working directory to the terraform root module directory -cd "$MODULE_DIR" +cd "${module_dir}" # 2. Run terraform apply with the tfvars file (if it exists) that has the same name as the backend config file -TF_VARS_FILE="$CONFIG_NAME.tfvars" -TF_VARS_OPTION="" -if [ -f "$TF_VARS_FILE" ]; then - TF_VARS_OPTION="-var-file=$TF_VARS_FILE" +tf_vars_file="${config_name}.tfvars" +tf_vars_option="" +if [ -f "${tf_vars_file}" ]; then + tf_vars_option="-var-file=${tf_vars_file}" fi -terraform apply "$TF_VARS_OPTION" +terraform apply "${tf_vars_option}" diff --git a/bin/terraform-init.sh b/bin/terraform-init similarity index 52% rename from bin/terraform-init.sh rename to bin/terraform-init index 7cc22fdf..39bf8f67 100755 --- a/bin/terraform-init.sh +++ b/bin/terraform-init @@ -5,23 +5,23 @@ # option of terraform init. # # Positional parameters: -# MODULE_DIR (required) – The location of the root module to initialize and apply -# CONFIG_NAME (required) – The name of the backend config. For accounts, the config name is the AWS account alias. -# For application modules the config name is the name of the environment (e.g. "dev", "staging", "prod"). +# module_dir (required) – The location of the root module to initialize and apply +# config_name (required) – The name of the backend config. For accounts, the config name is the AWS account alias. +# For application modules, the config name is the name of the environment (e.g. "dev", "staging", "prod"). # For application modules that are shared across environments, the config name is "shared". -# For example if a backend config file is named "myaccount.s3.tfbackend", then the CONFIG_NAME would be "myaccount" +# For example, if a backend config file is named "myaccount.s3.tfbackend", then the config_name would be "myaccount" # ----------------------------------------------------------------------------- set -euo pipefail -MODULE_DIR="$1" -CONFIG_NAME="$2" +module_dir="$1" +config_name="$2" # Run terraform init with the named backend config file -BACKEND_CONFIG_FILE="$CONFIG_NAME.s3.tfbackend" +backend_config_file="${config_name}.s3.tfbackend" -# Note that the BACKEND_CONFIG_FILE path is relative to MODULE_DIR, not the current working directory -terraform -chdir="$MODULE_DIR" init \ +# Note that the backend_config_file path is relative to module_dir, not the current working directory +terraform -chdir="${module_dir}" init \ -input=false \ -reconfigure \ - -backend-config="$BACKEND_CONFIG_FILE" + -backend-config="${backend_config_file}" diff --git a/bin/terraform-init-and-apply.sh b/bin/terraform-init-and-apply similarity index 52% rename from bin/terraform-init-and-apply.sh rename to bin/terraform-init-and-apply index 70896ceb..fd07e0ae 100755 --- a/bin/terraform-init-and-apply.sh +++ b/bin/terraform-init-and-apply @@ -1,23 +1,23 @@ #!/bin/bash # ----------------------------------------------------------------------------- # Convenience script for running terraform init followed by terraform apply -# See ./bin/terraform-init.sh and ./bin/terraform-apply.sh for more details. +# See ./bin/terraform-init and ./bin/terraform-apply for more details. # # Positional parameters: -# MODULE_DIR (required) – The location of the root module to initialize and apply -# CONFIG_NAME (required) – The name of the tfbackend and tfvars config. The name +# module_dir (required) – The location of the root module to initialize and apply +# config_name (required) – The name of the tfbackend and tfvars config. The name # is expected to be consistent for both the tfvars file and the tfbackend file. # ----------------------------------------------------------------------------- set -euo pipefail -MODULE_DIR="$1" -CONFIG_NAME="$2" +module_dir="$1" +config_name="$2" # Convenience script for running terraform init and terraform apply -# CONFIG_NAME – the name of the backend config. -# For example if a backend config file is named "myaccount.s3.tfbackend", then the CONFIG_NAME would be "myaccount" -# MODULE_DIR – the location of the root module to initialize and apply +# config_name – the name of the backend config. +# For example if a backend config file is named "myaccount.s3.tfbackend", then the config_name would be "myaccount" +# module_dir – the location of the root module to initialize and apply -./bin/terraform-init.sh "$MODULE_DIR" "$CONFIG_NAME" +./bin/terraform-init "${module_dir}" "${config_name}" -./bin/terraform-apply.sh "$MODULE_DIR" "$CONFIG_NAME" +./bin/terraform-apply "${module_dir}" "${config_name}" diff --git a/bin/update-pr-environment b/bin/update-pr-environment new file mode 100755 index 00000000..54b57965 --- /dev/null +++ b/bin/update-pr-environment @@ -0,0 +1,57 @@ +#!/bin/bash +# ----------------------------------------------------------------------------- +# Create or update a temporary environment that will exist while a pull request +# is open. +# +# Positional parameters: +# app_name (required) – the name of subdirectory of /infra that holds the +# application's infrastructure code. +# environment - the name of the application environment (e.g. dev, staging, prod) +# pr_number - the pull request number in GitHub +# image_tag - the commit hash to deploy for the temporary environment +# ----------------------------------------------------------------------------- +set -euo pipefail + +app_name="$1" +environment="$2" +pr_number="$3" +image_tag="$4" + +workspace="p-${pr_number}" + +echo "::group::Initialize Terraform with backend for environment: ${environment}" +terraform -chdir="infra/${app_name}/service" init -backend-config="${environment}.s3.tfbackend" +echo "::endgroup::" + +echo "Select or create Terraform workspace: ${workspace}" +terraform -chdir="infra/${app_name}/service" workspace select -or-create "${workspace}" + +echo "::group::Apply changes to environment using image tag: ${image_tag}" +terraform -chdir="infra/${app_name}/service" apply -input=false -auto-approve -var="environment_name=${environment}" -var="image_tag=${image_tag}" +echo "::endgroup::" + +cluster_name="$(terraform -chdir="infra/$app_name/service" output -raw service_cluster_name)" +service_name="$(terraform -chdir="infra/$app_name/service" output -raw service_name)" +echo "Wait for service ${service_name} to become stable" +aws ecs wait services-stable --cluster "${cluster_name}" --services "${service_name}" + +service_endpoint="$(terraform -chdir="infra/${app_name}/service" output -raw service_endpoint)" +pr_info=$(cat < +## Preview environment +- Service endpoint: ${service_endpoint} +- Deployed commit: ${image_tag} + +EOF +) + +pr_body="$(gh pr view "${pr_number}" --json body | jq --raw-output .body)" +if [[ $pr_body == *""*""* ]]; then + pr_body="${pr_body//*/$pr_info}" +else + pr_body="${pr_body}"$'\n\n'"${pr_info}" +fi + +echo "Update PR description with PR environment info" +echo "${pr_info}" +gh pr edit "${pr_number}" --body "${pr_body}" diff --git a/docs/e2e/e2e-checks.md b/docs/e2e/e2e-checks.md new file mode 100644 index 00000000..f9e93e93 --- /dev/null +++ b/docs/e2e/e2e-checks.md @@ -0,0 +1,73 @@ +# End-to-End (E2E) Tests + +## Overview + +This repository uses [Playwright](https://playwright.dev/) to perform end-to-end (E2E) tests. The tests can be run locally, but also run on [Pull Request preview environments](../infra/pull-request-environments.md). This ensures that any new code changes are validated through E2E tests before being merged. + +## Folder Structure +In order to support e2e for multiple apps, the folder structure will include a base playwright config (`./e2e/playwright.config.js`), and app-specific derived playwright config that override the base config. See the example folder structure below: +``` +- e2e + - playwright.config.js + - app/ + - playwright.config.js + - tests/ + - index.spec.js + - app2/ + - playwright.config.js + - tests/ + - index.spec.js +``` + +Some highlights: +- By default, the base config is defined to run on a minimal browser-set (desktop and mobile chrome). Browsers can be added in the app-specific playwright config. +- Snapshots will be output locally or in the artifacts of the CI job +- HTML reports are output to the `playwright-report` folder +- Parallelism limited on CI to ensure stable execution +- Accessibility testing can be performed using the `@axe-core/playwright` package (https://playwright.dev/docs/accessibility-testing) + + +## Running Locally + +### Running Locally From the Root Directory + +Make targets are setup to easily pass in a particular app name and URL to run tests against + +```bash +make e2e-setup # install playwright deps +make e2e-test APP_NAME=app BASE_URL=http://localhost:3000 # run tests on a particular app +``` + +### Running Locally From the `./e2e` Directory + +If you prefer to run package.json run scripts, you can do so from the e2e folder: + +``` +cd e2e + +npm install + +APP_NAME=app npm run e2e-test +``` + +### PR Environments + +The E2E tests are triggered in PR preview environments on each PR update. For more information on how PR environments work, please refer to [PR Environments Documentation](../infra/pull-request-environments.md). + +### Workflows + +The following workflows trigger E2E tests: +- [PR Environment Update](../../.github/workflows/pr-environment-checks.yml) +- [E2E Tests Workflow](../../.github/workflows/e2e-tests.yml) + +The [E2E Tests Workflow](../../.github/workflows/e2e-tests.yml) takes a `service_endpoint` URL and an `app_name` to run the tests against specific configurations for your app. + +## Configuration + +The E2E tests are configured using the following files: +- [Base Configuration](../../e2e/playwright.config.js) +- [App-specific Configuration](../../e2e/app/playwright.config.js) + +The app-specific configuration files extend the common base configuration. + +By default when running `make e2e-test APP_NAME=app BASE_URL=http://localhost:3000 ` - you don't necessarily need to pass an `BASE_URL` since the default is defined in the app-specific playwright config (`./e2e/app/playwright.config.js`). diff --git a/docs/infra/background-jobs.md b/docs/infra/background-jobs.md index 9455948d..21a53d3f 100644 --- a/docs/infra/background-jobs.md +++ b/docs/infra/background-jobs.md @@ -2,7 +2,7 @@ The application may have background jobs that support the application. Types of background jobs include: -* Jobs that occur on a fixed schedule (e.g. every hour or every night) — This type of job is useful for ETL jobs that can't be event-driven, such as ETL jobs that ingest source files from an SFTP server or from an S3 bucket managed by another team that we have little control or influence over. **This functionality has not yet been implemented** +* Jobs that occur on a fixed schedule (e.g. every hour or every night) — This type of job is useful for ETL jobs that can't be event-driven, such as ETL jobs that ingest source files from an SFTP server or from an S3 bucket managed by another team that we have little control or influence over. * Jobs that trigger on an event (e.g. when a file is uploaded to the document storage service). This type of job can be processed by two types of tasks: * Tasks that spin up on demand to process the job — This type of task is appropriate for low-frequency ETL jobs **This is the currently the only type that's supported** * Worker tasks that are running continuously, waiting for jobs to enter a queue that the worker then processes — This type of task is ideal for high frequency, low-latency jobs such as processing user uploads or submitting claims to an unreliable or high-latency legacy system **This functionality has not yet been implemented** @@ -13,4 +13,10 @@ Background jobs for the application are configured via the application's `env-co ## How it works -File upload jobs use AWS EventBridge to listen to "Object Created" events when files are uploaded to S3. An event rule is created for each job configuration, and each event rule has a single event target that targets the application's ECS cluster. The task uses the same container image that the service uses, and the task's configuration is the same as the service's configuration with the exception of the entrypoint, which is specified by the job configuration's `task_command` setting, which can reference the bucket and path of the file that triggered the event by using the template values `` and ``. +### File Upload Jobs + +File upload jobs use AWS EventBridge to listen to "Object Created" events when files are uploaded to S3. An event rule is created for each job configuration, and each event rule has a single event target that targets the application's ECS cluster. The task uses the same container image that the service uses, and the task's configuration is the same as the service's configuration with the exception of the entry-point, which is specified by the job configuration's `task_command` setting, which can reference the bucket and path of the file that triggered the event by using the template values `` and ``. + +### Scheduled Jobs + +Scheduled jobs use AWS EventBridge to trigger AWS Step Functions jobs on a reoccurring basis. The trigger can use cron, or a rate (hourly, daily, etc) based syntax, via their `schedule_expression`. Similarly to the file upload jobs, the task uses the same container image and configuration, with the exception of the entry-point, which is specified by the job configuration's `task_command` setting. Scheduled jobs can be configured with retries, to trigger multiple jobs in a row, or to run in a certain timezone - although we do not configure any of these settings by default. diff --git a/docs/infra/database-access-control.md b/docs/infra/database-access-control.md index 5b645f93..5977becd 100644 --- a/docs/infra/database-access-control.md +++ b/docs/infra/database-access-control.md @@ -22,3 +22,17 @@ The database authenticates connections with [IAM database authentication](https: * The system leverages IAM to centrally manage access to the database * There are no long-lived user database credentials that need to be stored as database authentication tokens are generated by IAM and have a lifetime of 15 minutes + +## Update the role manager + +If you need to update the role manager code or dependencies, first build the role manager Lambda zip file by running: + +```bash +make infra-module-database-role-manager-archive +``` + +Then deploy the changes to the role manager by running + +```bash +make infra-update-app-database APP_NAME= ENVIRONMENT= +``` diff --git a/docs/infra/develop-and-test-infrastructure-in-isolation-using-workspaces.md b/docs/infra/develop-and-test-infrastructure-in-isolation-using-workspaces.md new file mode 100644 index 00000000..2ea9235c --- /dev/null +++ b/docs/infra/develop-and-test-infrastructure-in-isolation-using-workspaces.md @@ -0,0 +1,68 @@ +# Develop and test infrastructure in isolation using workspaces + +When developing infrastructure code, you often want to develop and test your changes in isolation so that: + +- Your changes do not impact other engineers on the team +- Other engineers working on infrastructure do not revert your changes when making their own changes +- Other engineers can review your changes before applying them + +This document describes a workflow that leverages [Terraform workspaces](https://developer.hashicorp.com/terraform/language/state/workspaces) for developing and testing infrastructure changes in isolation so that they can be tested and peer reviewed before being merged into main. + +## Overview + +By default, each Terraform root module has a single workspace named `default`. Workspaces allow you to deploy multiple instances of the root module configuration without configuring a new backend. When you run `terraform apply` in a separate workspace, a parallel set of infrastructure resources are created. + +There are a few notable differences with resources created in a non-default workspace: + +1. When using terraform, you cannot deploy the same resource with the same name. The `terraform apply` will fail with an error like "A resource with the ID already exists". Therefore, to avoid naming conflicts, we prefix the resource names with the workspace name. +2. Some resources, such as database and storage buckets, enable deletion protection to prevent accidental deletion. However, non-default workspaces are intended to be temporary, so deletion protection is disabled in non-default workspaces. +3. Resources that are difficult to create in isolation, such as [DNS records](https://github.com/navapbc/template-infra/blob/2cda6da18c84aa5a3dfb038ab32be4fac363af21/infra/modules/service/dns.tf#L3), are not created at all. + +## Development workflow + +Follow these steps if you want to develop and test a change to the service layer. Make the appropriate changes to the `-chdir` flag if you want to make a change to a different layer, such as the database layer or network layer. + +### 1. Create a new workspace + +First, make sure that the Terraform root module is initialized to the dev environment + +```bash +terraform -chdir=infra//service init -reconfigure -backend-config=dev.s3.tfbackend +``` + +Then create a new workspace. Since the workspace name is used to prefix resource names, use a short workspace name to avoid hitting resource name character limits. Assuming you're only working on one thing at a time (following the Kanban principle of limiting work in progress), your initials would make a good workspace name. For example, if your name was Loren Yu, you could use `ly` as your workspace name. + +```bash +terraform -chdir=infra//service workspace new +``` + +Verify that the new workspace was created and selected: + +```bash +# List all workspaces, with a * next to the selected workspace +terraform -chdir=infra//service workspace list +# - OR - +# Show your current selected workspace +terraform -chdir=infra//service workspace show +``` + +### 2. Create resources in your workspace + +```bash +terraform -chdir=infra//service apply -var=environment_name=dev +# - OR - +make infra-update-app-service "APP_NAME=" ENVIRONMENT=dev +``` + +### 3. Clean up after merging to main and deploying changes to default workspace + +Finally, after you merged your pull request and have deployed your changes to the default workspace, clean up your workspace so that you don't continue to accrue costs from the infrastructure resources. + +```bash +# Destroy all infrastructure resources within the workspace +terraform -chdir=infra//service destroy -var=environment_name=dev +# Select default workspace so that you can delete your workspace, since you can't delete the selected workspace +terraform -chdir=infra//service workspace select default +# Delete your workspace +terraform -chdir=infra//service delete +``` diff --git a/docs/infra/environment-variables-and-secrets.md b/docs/infra/environment-variables-and-secrets.md index f05d39df..e468b95f 100644 --- a/docs/infra/environment-variables-and-secrets.md +++ b/docs/infra/environment-variables-and-secrets.md @@ -40,19 +40,23 @@ module "dev_config" { Secrets are a specific category of environment variables that need to be handled sensitively. Examples of secrets are authentication credentials such as API keys for external services. Secrets first need to be stored in AWS SSM Parameter Store as a `SecureString`. This section then describes how to make those secrets accessible to the ECS task as environment variables through the `secrets` configuration in the container definition (see AWS documentation on [retrieving Secrets Manager secrets through environment variables](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/secrets-envvar-secrets-manager.html)). -Secrets are defined in the same file that non-sensitive environment variables are defined, in the `app-config` module in the [environment-variables.tf file](/infra/app/app-config/env-config/environment-variables.tf). Modify the `secrets` list to define the secrets that the application will have access to. For each secret, `name` defines the environment variable name, and `ssm_param_name` defines the SSM parameter name that stores the secret value. For example: +Secrets are defined in the same file that non-sensitive environment variables are defined, in the `app-config` module in the [environment-variables.tf file](/infra/app/app-config/env-config/environment-variables.tf). Modify the `secrets` map to define the secrets that the application will have access to. For each secret, the map key defines the environment variable name. The `manage_method` property, which can be set to `"generated"` or `"manual"`, defines whether or not to generate a random secret or to reference an existing secret that was manually created and stored into AWS SSM. The `secret_store_name` property defines the SSM parameter name that stores the secret value. If `manage_method = "generated"`, then `secret_store_name` is where terraform will store the secret. If `manage_method = "manual"`, then `secret_store_name` is where terraform will look for the existing secret. For example: ```terraform # environment-variables.tf locals { - secrets = [ - { - name = "SOME_API_KEY" - ssm_param_name = "/${var.app_name}-${var.environment}/secret-sauce" + secrets = { + GENERATED_SECRET = { + manage_method = "generated" + secret_store_name = "/${var.app_name}-${var.environment}/generated-secret" } - ] + MANUALLY_CREATED_SECRET = { + manage_method = "manual" + secret_store_name = "/${var.app_name}-${var.environment}/manually-created-secret" + } + } } ``` -> ⚠️ Make sure you store the secret in SSM Parameter Store before you try to add secrets to your application service, or else the service won't be able to start since the ECS Task Executor won't be able to fetch the configured secret. +> ⚠️ For secrets with `manage_method = "manual"`, make sure you store the secret in SSM Parameter Store *before* you try to add configure your application service with the secrets, or else the service won't be able to start since the ECS Task Executor won't be able to fetch the configured secret. diff --git a/docs/infra/infrastructure-configuration.md b/docs/infra/infrastructure-configuration.md new file mode 100644 index 00000000..350dacf9 --- /dev/null +++ b/docs/infra/infrastructure-configuration.md @@ -0,0 +1,40 @@ +# Infrastructure configuration + +## Configure infrastructure with configuration modules + +The infrastructure derives all of its configuration from the following modules: + +- Project config ([/infra/project-config/](/infra/project-config/)) +- App config (`/infra//app-config` per application) + +Shell scripts running in CI jobs or locally on developer machines treat config modules as root modules and fetch configuration values by running `terraform apply -auto-approve` followed by `terraform output`. + +Root modules across the infrastructure layers fetch configuration values by calling the config modules as child modules: + +```terraform +module "project_config" { + source = "../../project-config" +} + +module "app_config" { + source = "../app-config" +} +``` + +### Design config module outputs to be static + +Config modules are designed to be static. This means that all of the outputs can be statically determined without needing to execute the code. In particular: + +- All config module outputs are either constant or derived from constants via deterministic functions. +- Config module outputs do not rely on the environment, including which root module is being applied, which workspace is selected, or the current timestamp. +- Config modules have no side effects. In particular, they do not create any infrastructure resources. + +When configuring your project and application, keep these principles in mind to avoid violating the static nature of config modules. + +## Benefits of config modules over variable definitions (.tfvars) files + +Putting configuration in static configuration modules has a number of benefits over managing configuration in Terraform [variable definitions (.tfvars) files](https://developer.hashicorp.com/terraform/language/values/variables#assigning-values-to-root-module-variables): + +1. Environment-specific configuration can be forced to adopt a common convention by generating the configuration value through code. For example, each application's service name is be defined as `"${local.prefix}${var.app_name}-${var.environment}"`. +2. Configuration values can be used outside of Terraform by shell scripts and CI/CD workflows by calling `terraform output` after calling `terraform apply -auto-approve`. If configuration values were embedded in `.tfvars` files, the scripts would need to parse the `.tfvars` files for those values. Note that `-auto-approve` is safe for config modules since they are entirely static and have no side effects. +3. Eliminate the possibility of passing in the incorrect `.tfvars` file to `terraform plan/apply`. Since we [reuse the same root module with multiple terraform backend configs](https://github.com/navapbc/template-infra/blob/main/docs/decisions/infra/0004-separate-terraform-backend-configs-into-separate-config-files.md), having separate `.tfvars` requires that after `terraform init` is called with a specific `-backend-config` file, the corresponding `.tfvars` file needs to be passed to `terraform plan`/`terraform apply`. This creates opportunity for error if the incorrect variable definitions file is used when a particular backend has been initialized. diff --git a/docs/infra/intro-to-terraform-workspaces.md b/docs/infra/intro-to-terraform-workspaces.md deleted file mode 100644 index 0a4a88d4..00000000 --- a/docs/infra/intro-to-terraform-workspaces.md +++ /dev/null @@ -1,59 +0,0 @@ -# Workspaces - -Terraform workspaces are created by default, the default workspace is named "default." Workspaces are used to allow multiple engineers to deploy their own stacks for development and testing. This allows multiple engineers to develop new features in parallel using a single environment without destroying each other's infrastructure. Separate resources will be created for each engineer when using the prefix variable. - -## Terraform workspace commands - -`terraform workspace show [Name]` - This command will show the workspace you working in. - -`terraform workspace list [Name]` - This command will list all workspaces. - -`terraform workspace new [Name]` - This command will create a new workspace. - -`terraform workspace select [Name]` - This command will switch your workspace to the workspace you select. - -`terraform workspace delete [Name]` - This command will delete the specified workspace. (does not delete infrastructure, that step will done first) - -## Workspaces and prefix - A How-To - - Workspaces are used to allow multiple developers to deploy their own stacks for development and testing. By default "prefix~ is set to `terraform.workspace` in the envs/dev environment, it is `staging` and `prod` in those respective environments. - -### envs/dev/main.tf - -``` tf -locals { - prefix = terraform.workspace -} - -module "example" { - source = "../../modules/example" - prefix = local.prefix -} - -``` - -### modules/example/variables.tf - When creating a new module create the variable "prefix" in your variables.tf - -``` tf - -variable "prefix" { - type = string - description = "prefix used to uniquely identify resources, allows parallel development" - -} - -``` - -### modules/example/main.tf - Use `var.prefix` to uniquely name resources for parallel development - -``` tf - -# Create the S3 bucket with a unique prefix from terraform.workspace. -resource "aws_s3_bucket" "example" { - bucket = "${var.prefix}-bucket" - -} - -``` - -When in the workspace "shawn", the resulting bucket name created in the aws account will be `shawn-bucket`. This prevents the following undesirable situation: If resources are not actively prefixed and two developers deploy the same resource, the developer who runs their deployment second will overwrite the deployment of the first. diff --git a/docs/infra/intro-to-terraform.md b/docs/infra/intro-to-terraform.md deleted file mode 100644 index f91eddfe..00000000 --- a/docs/infra/intro-to-terraform.md +++ /dev/null @@ -1,33 +0,0 @@ -# Introduction to Terraform - -## Basic Terraform Commands - -The `terraform init` command is used to initialize a working directory containing Terraform configuration files. This is the first command that should be run after writing a new Terraform configuration or cloning an existing one from version control. - -The `terraform plan` command creates an execution plan, which lets you preview the changes that Terraform plans to make to your infrastructure. By default, when Terraform creates a plan it: - -- Reads the current state of any already existing remote objects to make sure that the Terraform state is up-to-date. -- Compares the current configuration to the prior state, noting any differences. -- Proposes a set of change actions that should, if applied, make the remote objects match the configuration. - -The `terraform apply` command executes the actions proposed in a Terraform plan deploying the infrastructure specified in the configuration. Use with caution. The configuration becomes idempotent once a subsequent apply returns 0 changes. - -The `terraform destroy` command is a convenient way to destroy all remote objects managed by a particular Terraform configuration. Use `terraform plan -destroy` to preview what remote objects will be destroyed if you run `terraform destroy`. - -⚠️ WARNING! ⚠️ This is a destructive command! As a best practice, it's recommended that you comment out resources in non-development environments rather than running this command. `terraform destroy` should only be used as a way to clean up a development environment. e.g. a developer's workspace after they are done with it. - -For more information about terraform commands follow the link below: - -- [Basic CLI Features](https://www.terraform.io/cli/commands) - -## Terraform Dependency Lock File - -The [dependency lock file](https://www.terraform.io/language/files/dependency-lock) tracks provider dependencies. It belongs to the configuration as a whole and is created when running `terraform ini`. The lock file is always named `.terraform.lock.hcl`, and this name is intended to signify that it is a lock file for various items that Terraform caches in the `.terraform` subdirectory of your working directory. You should include this file in your version control repository so that you can discuss potential changes to your external dependencies via code review, just as you would discuss potential changes to your configuration itself. - -## Modules - -A module is a container for multiple resources that are used together. Modules can be used to create lightweight abstractions, so that you can describe your infrastructure in terms of its architecture, rather than directly in terms of physical objects. The `.tf` files in your working directory when you run `terraform plan` or `terraform apply` together form the root module. In this root module, you will call modules that you create from the module directory to build the infrastructure required to provide any functionality needed for the application. - -## Terraform Workspaces - -Workspaces are used to allow multiple engineers to deploy their own stacks for development and testing. Read more about it in [Terraform Workspaces](./intro-to-terraform-workspaces.md) diff --git a/docs/infra/making-infra-changes.md b/docs/infra/making-infra-changes.md index 5989134e..3133c7b2 100644 --- a/docs/infra/making-infra-changes.md +++ b/docs/infra/making-infra-changes.md @@ -14,6 +14,12 @@ Make changes to the account: make infra-update-current-account ``` +Make changes to the network: + +```bash +make infra-update-network NETWORK_NAME=dev +``` + Make changes to the application service in the dev environment: ```bash @@ -39,18 +45,30 @@ TF_CLI_ARGS_apply='-var=image_tag=abcdef1' make infra-update-app-service APP_NAM An alternative to using the Makefile is to directly use the terraform wrapper scripts that the Makefile uses: ```bash -project-root$ ./bin/terraform-init.sh app/service dev -project-root$ ./bin/terraform-apply.sh app/service dev -project-root$ ./bin/terraform-init-and-apply.sh app/service dev # calls init and apply in the same script +project-root$ ./bin/terraform-init app/service dev +project-root$ ./bin/terraform-apply app/service dev +project-root$ ./bin/terraform-init-and-apply app/service dev # calls init and apply in the same script ``` Look in the script files for more details on usage. ## Using Terraform CLI directly -Finally, if the wrapper scripts don't meet your needs, you can always run `terraform` directly from the root module directory. You may need to do this if you are running terraform commands other than `terraform plan` and `terraform apply`, such as `terraform import`, `terraform taint`, etc. To do this, you'll need to pass in the appropriate `tfvars` and `tfbackend` files to `terraform init` and `terraform apply`. For example, to make changes to the application's service resources in the dev environment, cd to the `infra/app/service` directory and run: +Finally, if the wrapper scripts don't meet your needs, you can always run `terraform` directly. You may need to do this if you are running terraform commands other than `terraform plan` and `terraform apply`, such as `terraform import`, `terraform taint`, etc. To do this, you'll need to remember to run `terraform init` with the appropriate `tfbackend` file since the root modules are shared across multiple backends. For example, to make changes to the application's service resources in the dev environment: + +```bash +project-root$ cd infra/app/service +infra/app-service$ terraform init -backend-config=dev.s3.tfbackend +infra/app-service$ terraform apply -var-file=dev.tfvars +``` + +or you can run the commands from the project root by using the `-chdir` flag. ```bash -infra/app/service$ terraform init -backend-config=dev.s3.tfbackend -infra/app/service$ terraform apply -var-file=dev.tfvars +project-root$ terraform init -chdir=infra/app/service -backend-config=dev.s3.tfbackend +project-root$ terraform apply -chdir=infra/app/service -var="environment_name=dev" ``` + +## See also + +While developing infrastructure, you often don't want to make changes directly to the infrastructure before your infrastructure code has been tested, peer reviewed, and merged into main. In these situations, [use workspaces to develop and test your infrastructure changes in isolation](./develop-and-test-infrastructure-in-isolation-using-workspaces.md). diff --git a/docs/infra/module-architecture.md b/docs/infra/module-architecture.md index 3ca4eb67..07a165bf 100644 --- a/docs/infra/module-architecture.md +++ b/docs/infra/module-architecture.md @@ -75,15 +75,15 @@ app/database --> accounts When deciding which layer to put an infrastructure resource in, follow the following guidelines. -* **Default to the service layer** By default, consider putting application resources as part of the service layer. This way the resource is managed together with everything else in the environment, and spinning up new application environments automatically spins up the resource. +- **Default to the service layer** By default, consider putting application resources as part of the service layer. This way the resource is managed together with everything else in the environment, and spinning up new application environments automatically spins up the resource. -* **Consider variations in the number and types of environments of each layer:** If the resource does not or might not map one-to-one with application environments, consider putting the resource in a different layer. For example, the number of AWS accounts may or may not match the number of VPCs, which may or may not match the number of application environments. As another example, each application only has one instance of a build repository, which is shared across all environments. As a final example, an application may or may not need a database layer at all, so by putting database-related resources in the database layer, and application can skip those resources by skipping the entire layer rather than by needing to change the behavior of an existing layer. Choose the layer for the resource that maps most closely with that resource's purpose. +- **Consider variations in the number and types of environments of each layer:** If the resource does not or might not map one-to-one with application environments, consider putting the resource in a different layer. For example, the number of AWS accounts may or may not match the number of VPCs, which may or may not match the number of application environments. As another example, each application only has one instance of a build repository, which is shared across all environments. As a final example, an application may or may not need a database layer at all, so by putting database-related resources in the database layer, and application can skip those resources by skipping the entire layer rather than by needing to change the behavior of an existing layer. Choose the layer for the resource that maps most closely with that resource's purpose. -* **Consider AWS uniqueness constraints on resources:** This is a special case of the previous consideration: resources that AWS requires to be unique should be managed by a layer that creates only one of that resource per instance of that layer. For example, there can only be one OIDC provider for GitHub actions per AWS account (see [Creating OIDC identity providers](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html)), so the OIDC provider should go in the account layer. As another example, there can only be one VPC endpoint per VPC per AWS service (see [Fix conflicting DNS domain errors for interface VPC endpoints](https://repost.aws/knowledge-center/vpc-interface-endpoint-domain-conflict)). Therefore, if multiple application environments share a VPC, they can't each create a VPC endpoint for the same AWS service. As such, the VPC endpoint logically belongs to the network layer and VPC endpoints should be created and managed per network instance rather than per application environment. +- **Consider AWS uniqueness constraints on resources:** This is a special case of the previous consideration: resources that AWS requires to be unique should be managed by a layer that creates only one of that resource per instance of that layer. For example, there can only be one OIDC provider for GitHub actions per AWS account (see [Creating OIDC identity providers](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html)), so the OIDC provider should go in the account layer. As another example, there can only be one VPC endpoint per VPC per AWS service (see [Fix conflicting DNS domain errors for interface VPC endpoints](https://repost.aws/knowledge-center/vpc-interface-endpoint-domain-conflict)). Therefore, if multiple application environments share a VPC, they can't each create a VPC endpoint for the same AWS service. As such, the VPC endpoint logically belongs to the network layer and VPC endpoints should be created and managed per network instance rather than per application environment. -* **Consider policy constraints on what resources the project team is authorized to manage:** Different categories of resources may have different requirements on who is allowed to create and manage those resources. Resources that the project team are not allowed to manage directly should not be mixed with resources that the project team needs to manage directly. +- **Consider policy constraints on what resources the project team is authorized to manage:** Different categories of resources may have different requirements on who is allowed to create and manage those resources. Resources that the project team are not allowed to manage directly should not be mixed with resources that the project team needs to manage directly. -* **Consider out-of-band dependencies:** Put infrastructure resources that require steps outside of terraform to be completed configured in layers that are upstream to resources that depend on those completed resources. For example, after creating a database cluster, the database schemas, roles, and privileges need to be configured before they can be used by a downstream service. Therefore database resources should be separate from the service layer so that the database can be configured fully before attempting to create the service layer resources. +- **Consider out-of-band dependencies:** Put infrastructure resources that require steps outside of terraform to be completed configured in layers that are upstream to resources that depend on those completed resources. For example, after creating a database cluster, the database schemas, roles, and privileges need to be configured before they can be used by a downstream service. Therefore database resources should be separate from the service layer so that the database can be configured fully before attempting to create the service layer resources. ## Making changes to infrastructure diff --git a/docs/infra/pull-request-environments.md b/docs/infra/pull-request-environments.md new file mode 100644 index 00000000..4953140e --- /dev/null +++ b/docs/infra/pull-request-environments.md @@ -0,0 +1,34 @@ +# Pull request environments + +A temporary environment is created for each pull request that stays up while the pull request is open. The endpoint for the pull request and the deployed commit are added to the pull request description, and updated when the environment is updated. Use cases for the temporary pull request environment includes: + +- Allow other delivery stakeholders—including product managers, designers, and business owners—to review changes before being merged and deployed +- Enable automated end-to-end tests on the pull request +- Enable automated accessibility checks on the pull request +- Facilitate workspace creation for developing and testing service layer infrastructure changes + +## Lifecycle of pull request environments + +A pull request environment is created when a pull request is opened or reopened, and destroyed when the pull request is merged or closed. When new commits are pushed up to the pull request, the pull request environment is updated. + +## Isolate database migrations into separate pull requests + +Database migrations are not reflected in PR environments. In particular, PR environments shares the same database with the dev environment, so database migrations that exist in the pull request are not run on the database to avoid impacting the dev environment. + +Therefore, isolate database changes in their own pull request and merge that pull request first before opening pull requests with application changes that depend on those database changes. Note that it is still okay and encouraged to develop database and application changes together during local development. + +This guidance is not strict. It is still okay to combine database migrations and application changes in a single pull request. However, when doing so, note that the PR environment may not be fully functional if the application changes rely on the database migrations. + +## Implementing pull request environments for each application + +Pull request environments are created by GitHub Actions workflows. There are two reusable callable workflows that manage pull request environments: + +- [pr-environment-checks.yml](/.github/workflows/pr-environment-checks.yml) - creates or updates a temporary environment in a separate Terraform workspace for a given application and pull request +- [pr-environment-destroy.yml](/.github/workflows/pr-environment-destroy.yml) - destroys a temporary environment and workspace for a given application and pull request + +Using these reusable workflows, configure PR environments for each application with application-specific workflows: + +- `ci-[app_name]-pr-environment-checks.yml` + - Based on [ci-app-pr-environment-checks.yml](https://github.com/navapbc/template-infra/blob/main/.github/workflows/ci-app-pr-environment-checks.yml) +- `ci-[app_name]-pr-environment-destroy.yml` + - Based on [ci-app-pr-environment-destroy.yml](https://github.com/navapbc/template-infra/blob/main/.github/workflows/ci-app-pr-environment-destroy.yml) diff --git a/docs/infra/service-command-execution.md b/docs/infra/service-command-execution.md index df2ebdfb..9b5deb16 100644 --- a/docs/infra/service-command-execution.md +++ b/docs/infra/service-command-execution.md @@ -40,7 +40,7 @@ To enable service execution access, the VPC requires an additional VPC endpoint. make infra-update-network NETWORK_NAME= ``` -`ENVIRONMENT` needs to be the name of the network that the application environment is running in. +`NETWORK_NAME` needs to be the name of the network that the application environment is running in. ### 4. Update the application service @@ -67,3 +67,10 @@ aws ecs execute-command --cluster \ ``` To run other commands, modify the `--command` flag to execute the command, rather than starting a shell. + +## Troubleshooting + +If you get an error after running the above steps, these diagnosis steps may be helpful: +1. Verify that `enableExecuteCommand` is `true` on your running task by using `aws ecs describe-tasks --cluster $APP_NAME-$ENVIRONMENT_NAME --task `. If not, run the `infra-update-app-service` command above and/or redeploy your service. +2. Make sure that the SSM Agent is running by checking the `managedAgents` object in the `containers` array of the `aws ecs describe-tasks` command output. If it is `STOPPED`, you may have an issue with your container that is preventing the agent from running. +3. Run the [amazon-ecs-exec-checker](https://github.com/aws-containers/amazon-ecs-exec-checker) script to further pinpoint issues that may prevent ECS Exec from functioning. diff --git a/docs/infra/set-up-app-build-repository.md b/docs/infra/set-up-app-build-repository.md index 01c32a63..eb30bb49 100644 --- a/docs/infra/set-up-app-build-repository.md +++ b/docs/infra/set-up-app-build-repository.md @@ -27,6 +27,14 @@ Now run the following commands to create the resources, making sure to verify th make infra-update-app-build-repository APP_NAME=app ``` +## 3. Check that the build repository was created properly + +Run the [Build and publish GitHub Actions workflow](/.github/workflows/build-and-publish.yml) to build your application's image and publish it to the container image registry you just created. If you have the GitHub CLI installed, you can do this using the following command. + +```bash +gh workflow run build-and-publish.yml --field app_name=app --field ref=main +``` + ## Set up application environments Once you set up the deployment process, you can proceed to [set up application environments](./set-up-app-env.md) diff --git a/docs/infra/set-up-app-env.md b/docs/infra/set-up-app-env.md index 31a588f8..702b9223 100644 --- a/docs/infra/set-up-app-env.md +++ b/docs/infra/set-up-app-env.md @@ -9,12 +9,17 @@ The application environment setup process will: Before setting up the application's environments you'll need to have: 1. [A compatible application in the app folder](https://github.com/navapbc/template-infra/blob/main/template-only-docs/application-requirements.md) -2. [Configure the app](/infra/app/app-config/main.tf). Make sure you update `has_database` to `true` or `false` depending on whether or not your application has a database to integrate with. - 1. If you're configuring your production environment, make sure to update the `service_cpu`, `service_memory`, and `service_desired_instance_count` settings based on the project's needs. If your application is sensitive to performance, consider doing a load test. -3. [Create a nondefault VPC to be used by the application](./set-up-network.md) -4. (If the application has a database) [Set up the database for the application](./set-up-database.md) -5. (If you have an incident management service) [Set up monitoring](./set-up-monitoring-alerts.md) -6. [Set up the application build repository](./set-up-app-build-repository.md) +2. [Set up the AWS account that this environment is going to use](/docs/infra/set-up-aws-account.md). +3. [Configure the app](/infra/app/app-config/main.tf). + 1. Make sure you update `has_database` to `true` or `false` (defaults to `true`) depending on whether or not your application has a database to integrate with. + 2. Make sure you update `has_external_non_aws_service` to `true` or `false` depending on whether your application utilizes any non-AWS services. Other applications within the same git repo count as external services, so if your application makes API calls to another application service in the same git repo, set `has_external_non_aws_service` to `true`. + 3. If you're configuring your production environment, make sure to update the `service_cpu`, `service_memory`, and `service_desired_instance_count` settings based on the project's needs. If your application is sensitive to performance, consider doing a load test. + 4. Make sure your application environment is using the AWS Account you want to use by checking the `account_name` property in the environment configuration and updating it if necessary. +4. [Create a nondefault VPC to be used by the application](./set-up-network.md) +5. (If the application has external non-AWS services) [Set up network access to the public internet](./set-up-public-internet-access.md) +6. (If the application has a database) [Set up the database for the application](./set-up-database.md) +7. (If you have an incident management service) [Set up monitoring](./set-up-monitoring-alerts.md) +8. [Set up the application build repository](./set-up-app-build-repository.md) ## 1. Configure backend @@ -36,7 +41,7 @@ Before creating the application resources, you'll need to first build and publis There are two ways to do this: 1. Trigger the "Build and Publish" workflow from your repo's GitHub Actions tab. This option requires that the `role-to-assume` GitHub workflow variable has already been set up as part of the overall infra account setup process. -1. Alternatively, run the following from the root directory. This option can take much longer than the GitHub workflow, depending on your machine's architecture. +2. Alternatively, run the following from the root directory. This option can take much longer than the GitHub workflow, depending on your machine's architecture. ```bash make release-build APP_NAME=app @@ -57,4 +62,3 @@ TF_CLI_ARGS_apply="-var=image_tag=" make infra-update-app-service APP Configure email alerts, external incident management service integration and additional Cloudwatch Alerts. [Configure monitoring module](./set-up-monitoring-alerts.md) - diff --git a/docs/infra/set-up-aws-account.md b/docs/infra/set-up-aws-account.md index fe0040bb..af35a93f 100644 --- a/docs/infra/set-up-aws-account.md +++ b/docs/infra/set-up-aws-account.md @@ -2,7 +2,7 @@ The AWS account setup process will: -1. Create the [Terraform backend](https://www.terraform.io/language/settings/backends/configuration) resources needed to store Terraform's infrastructure state files. The project uses an [S3 backend](https://www.terraform.io/language/settings/backends/s3). +1. Create the [Terraform backend](https://developer.hashicorp.com/terraform/language/backend) resources needed to store Terraform's infrastructure state files. The project uses an [S3 backend](https://www.terraform.io/language/settings/backends/s3). 2. Create the [OpenID connect provider in AWS](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_create_oidc.html) to allow GitHub Actions to access AWS account resources. 3. Create the IAM role and policy that GitHub Actions will use to manage infrastructure resources. @@ -49,9 +49,13 @@ make infra-set-up-account ACCOUNT_NAME= This command will create the S3 tfstate bucket and the GitHub OIDC provider. It will also create a `[account name].[account id].s3.tfbackend` file in the `infra/accounts` directory. -### 3. Update the account names map in app-config +### 3. Check that GitHub actions can authenticate into the AWS account -In [app-config/main.tf](/infra/app/app-config/main.tf), update the `account_names_by_environment` config to reflect the account name you chose. +This step requires [GitHub CLI](https://cli.github.com/) to be installed and [configured to authenticate with your GitHub account](https://cli.github.com/manual/). If you don't have it, you can install on Mac via `brew install gh` + +```bash +make infra-check-github-actions-auth ACCOUNT_NAME= +``` ## Making changes to the account diff --git a/docs/infra/set-up-database.md b/docs/infra/set-up-database.md index 7f07c451..45210cbc 100644 --- a/docs/infra/set-up-database.md +++ b/docs/infra/set-up-database.md @@ -8,6 +8,10 @@ The database setup process will: 4. Create an [AWS Lambda function](https://docs.aws.amazon.com/lambda/latest/dg/welcome.html), the "role manager", for provisioning the [PostgreSQL database users](https://www.postgresql.org/docs/8.0/user-manag.html) that will be used by the application service and by the migrations task. 5. Invoke the role manager function to create the `app` and `migrator` Postgres users. +## Important note + +This is an optional step that can be skipped if the application does not have a database. + ## Requirements Before setting up the database you'll need to have: @@ -26,6 +30,28 @@ make infra-configure-app-database APP_NAME= ENVIRONMENT= `APP_NAME` needs to be the name of the application folder within the `infra` folder. By default, this is `app`. `ENVIRONMENT` needs to be the name of the environment you are creating. This will create a file called `.s3.tfbackend` in the `infra/app/service` module directory. +### (Optional) Enable any database extensions that require `rds_superuser` + +To enable some extensions, such as [pgvector](https://github.com/pgvector/pgvector), requires the `rds_superuser` role. You can enable any such extensions via the `superuser_extensions` configuration variable, and set them to either enabled or disabled. + +For example, to enable the pgvector extension: + +```terraform +# infra/app/app-config/env-config/main.tf + +database_config = { + ... + + superuser_extensions = { + "vector" : true, # TODO + } +} +``` + +Note that this should only be used for extensions that require the `rds_superuser` role to be created. For many extensions, you can (and should) instead enable them as part of your application's standard database migrations. This [list of trusted extensions from AWS](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_PostgreSQL.html#PostgreSQL.Concepts.General.Extensions.Trusted) shows which extensions can be enabled via a database migrations. + +If you're not sure whether you need to do anything here, you can skip this and come back to it later. + ## 2. Create database resources Now run the following commands to create the resources. Review the terraform before confirming "yes" to apply the changes. This can take over 5 minutes. @@ -65,12 +91,16 @@ The Lambda function's response should describe the resulting PostgreSQL roles an } ``` -### Important note on Postgres table permissions +### Updating the role manager + +To make changes to the role manager such as updating dependencies or adding functionality, see [database access control](./database-access-control.md#update-the-role-manager) + +### Note on Postgres table permissions -Before creating migrations that create tables, first create a migration that includes the following SQL command (or equivalent if your migrations are written in a general-purpose programming language): +The role manager executes the following statement as part of database setup: ```sql -ALTER DEFAULT PRIVILEGES GRANT ALL ON TABLES TO app +ALTER DEFAULT PRIVILEGES IN SCHEMA app GRANT ALL ON TABLES TO app ``` This will cause all future tables created by the `migrator` user to automatically be accessible by the `app` user. See the [Postgres docs on ALTER DEFAULT PRIVILEGES](https://www.postgresql.org/docs/current/sql-alterdefaultprivileges.html) for more info. As an example see the example app's migrations file [migrations.sql](https://github.com/navapbc/template-infra/blob/main/app/migrations.sql). diff --git a/docs/infra/set-up-infrastructure-tools.md b/docs/infra/set-up-infrastructure-tools.md index 1bf076ad..71bbd983 100644 --- a/docs/infra/set-up-infrastructure-tools.md +++ b/docs/infra/set-up-infrastructure-tools.md @@ -19,10 +19,14 @@ brew install tfenv Then install the version of Terraform you need. ```bash -tfenv install 1.4.6 +tfenv install 1.8.0 ``` -If you are unfamiliar with Terraform, check out this [basic introduction to Terraform](./intro-to-terraform.md). +You may need to set the Terraform version. + +```bash +tfenv use 1.8.0 +``` ### Install AWS CLI @@ -36,7 +40,7 @@ The [Go programming language](https://go.dev/dl/) is required to run [Terratest] ### Install GitHub CLI -The [GitHub CLI](https://cli.github.com/) is useful for automating certain operations for GitHub such as with GitHub actions. This is needed to run [check-github-actions-auth.sh](/bin/check-github-actions-auth.sh) +The [GitHub CLI](https://cli.github.com/) is useful for automating certain operations for GitHub such as with GitHub actions. This is needed to run [check-github-actions-auth](/bin/check-github-actions-auth) ```bash brew install gh @@ -46,9 +50,9 @@ brew install gh We have several optional utilities for running infrastructure linters locally. These are run as part of the CI pipeline, therefore, it is often simpler to test them locally first. -* [Shellcheck](https://github.com/koalaman/shellcheck) -* [actionlint](https://github.com/rhysd/actionlint) -* [markdown-link-check](https://github.com/tcort/markdown-link-check) +- [Shellcheck](https://github.com/koalaman/shellcheck) +- [actionlint](https://github.com/rhysd/actionlint) +- [markdown-link-check](https://github.com/tcort/markdown-link-check) ```bash brew install shellcheck diff --git a/docs/infra/set-up-network.md b/docs/infra/set-up-network.md index b9a1834e..f7f55726 100644 --- a/docs/infra/set-up-network.md +++ b/docs/infra/set-up-network.md @@ -16,8 +16,8 @@ Before setting up the network you'll need to have: 2. Optionally, [configure HTTPS support](./https-support.md). You can also come back to setting up HTTPS support at a later time. 3. [Configure the app](/infra/app/app-config/main.tf). 1. Update `has_database` to `true` or `false` depending on whether or not your application has a database to integrate with. This setting determines whether or not to create VPC endpoints needed by the database layer. - 2. Update `has_external_non_aws_service` to `true` or `false` depending on whether or not your application makes calls to an external non-AWS service. This setting determines whether or not to create NAT gateways, which allows the service in the private subnet to make requests to the internet. - 3. Update `network_name` for your application environments. This mapping ensures that each network is configured appropriately based on the application(s) in that network (see `local.apps_in_network` in [/infra/networks/main.tf](/infra/networks/main.tf)) Failure to set the network name properly means that the network layer may not receive the correct application configurations for `has_database` and `has_external_non_aws_service`. + 2. Update `has_external_non_aws_service` to `true` or `false` depending on whether or not your application makes calls over the public internet. Set this to `true` (a) if your application makes calls to a SaaS service, or (b) if your application needs to call services from another application in the same git repo. This setting determines whether or not to create NAT gateways, which allows the service in the private subnet to make requests to the internet. For more information, see [set up network access to the public internet](./set-up-public-internet-access.md) + 3. If you made changes to the configuration of the networks in the optional step 2 above and or to the default application environments: Update `network_name` for your application environments. This mapping ensures that each network is configured appropriately based on the application(s) in that network (see `local.apps_in_network` in [/infra/networks/main.tf](/infra/networks/main.tf)) Failure to set the network name properly means that the network layer may not receive the correct application configurations for `has_database` and `has_external_non_aws_service`. ## 1. Configure backend diff --git a/docs/infra/set-up-public-internet-access.md b/docs/infra/set-up-public-internet-access.md new file mode 100644 index 00000000..58a9f574 --- /dev/null +++ b/docs/infra/set-up-public-internet-access.md @@ -0,0 +1,32 @@ +# Public internet access + +Some applications depend on external services that are not provided directly by AWS. External services include: + +1. Software as a service (SaaS) providers like New Relic +2. Custom API applications in the same git repository + +Applications that depend on external services need access to the public internet via a NAT (Network Address Translation) gateway. This document describes how to configure public internet access for your application. The setup process will: + +1. Create a [NAT gateway](https://docs.aws.amazon.com/vpc/latest/userguide/vpc-nat-gateway.html) for each availability zone in your virtual network + +Note: To access services that are provided directly by AWS, you can access them over the public internet by enabling public internet access, or you can alternatively use [VPC endpoints](https://docs.aws.amazon.com/whitepapers/latest/aws-privatelink/what-are-vpc-endpoints.html) to keep network traffic entirely within the VPC. + +## 1. Configure `has_external_non_aws_service` property in app-config module + +In the `infra//app-config` module, set `has_external_non_aws_service` to `true`. + +## 2. Create or update the network + +If you are creating new network(s), follow the instructions in [set up network](./set-up-network.md) + +If you are updating existing networks, run the following command for each network used by your application's environments (look at `network_name` for each environment in your application's `app-config` module). + +```bash +make infra-update-network NETWORK_NAME= +``` + +## 3. Check that your application can access the internet + +Check that your application can access the internet. If your application already has an endpoint or background job that calls the internet, you can exercise that code path without needing to re-deploy the application. If not, you can test internet access by introducing a simple endpoint that accesses some public URL (e.g. google.com). + +Repeat this step for each application environment. diff --git a/docs/infra/style-guide.md b/docs/infra/style-guide.md new file mode 100644 index 00000000..d39fd452 --- /dev/null +++ b/docs/infra/style-guide.md @@ -0,0 +1,52 @@ +# Style guide + +## Table of contents + +- [Style guide](#style-guide) + - [Table of contents](#table-of-contents) + - [Terraform code style](#terraform-code-style) + - [Exceptions and additions to Hashicorp's Terraform style guide](#exceptions-and-additions-to-hashicorps-terraform-style-guide) + - [Modules](#modules) + - [Variables](#variables) + - [.gitignore](#gitignore) + - [Integration and unit testing](#integration-and-unit-testing) + - [Policy](#policy) + - [Shell script style](#shell-script-style) + +## Terraform code style + +Follow [Hashicorp's Terraform style guide](https://developer.hashicorp.com/terraform/language/style) when writing Terraform code, with a few exceptions (see below). + +### Exceptions and additions to Hashicorp's Terraform style guide + +Here are some exceptions (and additions) to Hashicorp's Terraform style guide. + +#### Modules + +- Use module names based on the logical function of the module rather than the underlying proprietary service used for implementing the module. For example, use "database" instead of "rds", or "storage" instead of "s3". +- Organize resources according to the infrastructure layers described in [module architecture](/docs/infra/module-architecture.md). +- [Use shared configuration](/docs/infra/module-dependencies.md) instead of the [tfe_outputs data source](https://registry.terraform.io/providers/hashicorp/tfe/latest/docs/data-sources/outputs) to share state between two state files. + +#### Variables + +- Include additional type information in string variable names to clarify the value being stored. For example, use `access_policy_arn` instead of `access_policy`. Common examples of suffixes include: `_id`, `_arn`, and `_name`. +- Include units in numerical variable names. For example, use `max_request_seconds` instead of `max_request_time`. +- Use plural nouns for lists. For example, use `subnet_ids` to represent a list of subnet ids. +- Use `values_by_key` for maps that map keys to values. For example use `account_ids_by_name` to represent a map from account names to account ids. +- For boolean feature flags, use the prefix `enable_`, as in `enable_https`. + +#### .gitignore + +- Do not commit the `.terraform.lock.hcl` dependency lock file. As of Feb 2023, Terraform lock files, while well intentioned, have a tendency to get into a confusing state that requires recreating the lock file, which defeats the purpose. Moreover, lock files are per environment, which can make it difficult for people to upgrade dependencies (e.g. upgrade an AWS provider) across environments if certain environments are locked down (e.g. production). + +#### Integration and unit testing + +- For testing, use [Terratest](https://terratest.gruntwork.io/docs/) instead of the [Terraform test framework](https://developer.hashicorp.com/terraform/language/tests). + +#### Policy + +- For policy enforcement and compliance checks, [Tfsec](https://github.com/aquasecurity/tfsec) is used instead of [Terraform's policy enforcement framework](https://developer.hashicorp.com/terraform/cloud-docs/policy-enforcement) + +## Shell script style + +Follow [Google's Shell Style Guide](https://google.github.io/styleguide/shellguide.html). diff --git a/docs/infra/upgrade-database.md b/docs/infra/upgrade-database.md new file mode 100644 index 00000000..0c99a204 --- /dev/null +++ b/docs/infra/upgrade-database.md @@ -0,0 +1,84 @@ +# Upgrade database + +Upgrading the database between major versions (e.g., from Postgres 15 to 16) is a two-step process. + +1. Create a new DBParameterGroup for the new engine version and upgrade the database. +2. Remove the old DBParamaterGroup for the prior engine version. + +These steps are a minimal starting point for the changes you'll need to make. As with any major change to your codebase, you should carefully test the impact of upgrading the database before applying it to a production environment. See also the AWS documentation for [Upgrading the PostgreSQL DB engine for Amazon RDS](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_UpgradeDBInstance.PostgreSQL.html#USER_UpgradeDBInstance.PostgreSQL.MajorVersion.Process). + +## 1. Creating a new DBParameterGroup and upgrading the database + +1. Set `allow_major_version_upgrade = true` + +Set the `aws_rds_cluster` resource in [infra/modules/database/main.tf#L20](../../infra/modules/database/main.tf). + +2. (if needed) Update the `serverlessv2_scaling_configuration` + +Set the `min_capacity` to 4.0 (and adjust the `max_capacity` accordingly). +If your minimum is lower than this, the upgrade will fail with `FATAL: shared memory segment sizes are configured too large`. + +3. Create a new DBParamaterGroup + +The database will need access to a new parameter group as part of the upgrade, but the old parameter group can't be deleted until the upgrade is complete. + +Make a copy of the `rds_query_logging` resource. +In the original, replace the `${local.engine_major_version}` variable with your current database version. +Then, in the duplicate version, modify the resource name to a new unique value. + +E.g., if you were moving from Postgres 14 to Postgres 15, your configuration would look like: + +```terraform +# This is the original; note we are manually specifying the family is v14 since after the changes are applied the new engine major version will be 15. +resource "aws_rds_cluster_parameter_group" "rds_query_logging" { + family = "aurora-postgresql14" + + ... +} + +# This is the new parameter group; we have given it a new name to distinguish it. +resource "aws_rds_cluster_parameter_group" "rds_query_logging_15" { + family = "aurora-postgresql${local.engine_major_version}" + + ... +} +``` + +Modify the `db_cluster_parameter_group_name` to reference this new parameter group: + +```terraform +resource "aws_rds_cluster" "db" { + ... + db_cluster_parameter_group_name = aws_rds_cluster_parameter_group.rds_query_logging_15.name + ... +} +``` + +4. Set the `engine_version` to your new desired version. + +5. Run `make infra-update-app-database APP_NAME= ENVIRONMENT=` + +Note that the upgrade is not applied immediately; it is queued for the next maintenance window. + +If you wish to apply the upgrade immediately, you can manually change the engine version to match in the AWS Console. See also: + + - https://developer.hashicorp.com/terraform/tutorials/aws/aws-rds + - https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Overview.DBInstance.Modifying.html + + +## 2. Removing the old DBParameter group + +Once the upgrade has been applied, you can remove the old parameter group. + +You should also remove `allow_major_version_upgrade = true` (or set it to false). + +If you had to increase your autoscaling settings to support the upgrade, you may wish to revert that change now as well. + +Finally, the new DBParameter group will have a new resource name (e.g., in the example above, `rds_query_logging_15`). You can revert this to the original name (`rds_query_logging`) without modifying the infrastructure by using [Terraform's moved block](https://developer.hashicorp.com/terraform/cli/state/move), e.g.: + +```terraform +moved { + from = aws_rds_cluster_parameter_group.rds_query_logging_15 + to = aws_rds_cluster_parameter_group.rds_query_logging +} +``` \ No newline at end of file diff --git a/e2e/.gitignore b/e2e/.gitignore new file mode 100644 index 00000000..68f91e36 --- /dev/null +++ b/e2e/.gitignore @@ -0,0 +1,6 @@ +node_modules/ +/test-results/ +/playwright-report/ +/blob-report/ +/playwright/.cache/ +*.png* diff --git a/e2e/app/playwright.config.js b/e2e/app/playwright.config.js new file mode 100644 index 00000000..12a24297 --- /dev/null +++ b/e2e/app/playwright.config.js @@ -0,0 +1,12 @@ +import baseConfig from '../playwright.config'; +import { deepMerge } from '../util'; +import { defineConfig } from '@playwright/test'; + +export default defineConfig(deepMerge( + baseConfig, + { + use: { + baseUrl: baseConfig.use.baseUrl || "localhost:3000" + }, + } + )); diff --git a/e2e/app/tests/index.spec.js b/e2e/app/tests/index.spec.js new file mode 100644 index 00000000..51c07df4 --- /dev/null +++ b/e2e/app/tests/index.spec.js @@ -0,0 +1,31 @@ +const { test, expect } = require('@playwright/test'); + +import AxeBuilder from '@axe-core/playwright'; + +test.describe('Generic Webpage Tests', () => { + test('should load the webpage successfully', async ({ page }) => { + const response = await page.goto('/'); + const title = await page.title(); + await expect(response.status()).toBe(200); + }); + + test('should take a screenshot of the webpage', async ({ page }) => { + await page.goto('/'); + await page.screenshot({ path: 'example-screenshot.png', fullPage: true }); + }); + + // https://playwright.dev/docs/accessibility-testing + test('should not have any automatically detectable accessibility issues', async ({ page }) => { + await page.goto('/'); + const accessibilityScanResults = await new AxeBuilder({ page }).analyze(); + expect(accessibilityScanResults.violations).toEqual([]); + }); + + // Example test of finding a an html element on the index/home page + // test('should check for an element to be visible', async ({ page }) => { + // await page.goto('/'); + // const element = page.locator('h1'); + // await expect(element).toBeVisible(); + // }); + +}); diff --git a/e2e/package-lock.json b/e2e/package-lock.json new file mode 100644 index 00000000..a4024e53 --- /dev/null +++ b/e2e/package-lock.json @@ -0,0 +1,188 @@ +{ + "name": "e2e", + "version": "1.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "version": "1.0.0", + "dependencies": { + "@axe-core/playwright": "^4.9.1", + "dotenv": "^16.4.5" + }, + "devDependencies": { + "@playwright/test": "^1.45.1", + "@types/node": "^20.14.10" + } + }, + "node_modules/@axe-core/playwright": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/@axe-core/playwright/-/playwright-4.9.1.tgz", + "integrity": "sha512-8m4WZbZq7/aq7ZY5IG8GqV+ZdvtGn/iJdom+wBg+iv/3BAOBIfNQtIu697a41438DzEEyptXWmC3Xl5Kx/o9/g==", + "dependencies": { + "axe-core": "~4.9.1" + }, + "peerDependencies": { + "playwright-core": ">= 1.0.0" + } + }, + "node_modules/@playwright/test": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.45.1.tgz", + "integrity": "sha512-Wo1bWTzQvGA7LyKGIZc8nFSTFf2TkthGIFBR+QVNilvwouGzFd4PYukZe3rvf5PSqjHi1+1NyKSDZKcQWETzaA==", + "dev": true, + "dependencies": { + "playwright": "1.45.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@types/node": { + "version": "20.14.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.10.tgz", + "integrity": "sha512-MdiXf+nDuMvY0gJKxyfZ7/6UFsETO7mGKF54MVD/ekJS6HdFtpZFBgrh6Pseu64XTb2MLyFPlbW6hj8HYRQNOQ==", + "dev": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/axe-core": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.9.1.tgz", + "integrity": "sha512-QbUdXJVTpvUTHU7871ppZkdOLBeGUKBQWHkHrvN2V9IQWGMt61zf3B45BtzjxEJzYuj0JBjBZP/hmYS/R9pmAw==", + "engines": { + "node": ">=4" + } + }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.1.tgz", + "integrity": "sha512-Hjrgae4kpSQBr98nhCj3IScxVeVUixqj+5oyif8TdIn2opTCPEzqAqNMeK42i3cWDCVu9MI+ZsGWw+gVR4ISBg==", + "dev": true, + "dependencies": { + "playwright-core": "1.45.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.45.1.tgz", + "integrity": "sha512-LF4CUUtrUu2TCpDw4mcrAIuYrEjVDfT1cHbJMfwnE2+1b8PZcFzPNgvZCvq2JfQ4aTjRCCHw5EJ2tmr2NSzdPg==", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + } + }, + "dependencies": { + "@axe-core/playwright": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/@axe-core/playwright/-/playwright-4.9.1.tgz", + "integrity": "sha512-8m4WZbZq7/aq7ZY5IG8GqV+ZdvtGn/iJdom+wBg+iv/3BAOBIfNQtIu697a41438DzEEyptXWmC3Xl5Kx/o9/g==", + "requires": { + "axe-core": "~4.9.1" + } + }, + "@playwright/test": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.45.1.tgz", + "integrity": "sha512-Wo1bWTzQvGA7LyKGIZc8nFSTFf2TkthGIFBR+QVNilvwouGzFd4PYukZe3rvf5PSqjHi1+1NyKSDZKcQWETzaA==", + "dev": true, + "requires": { + "playwright": "1.45.1" + } + }, + "@types/node": { + "version": "20.14.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.10.tgz", + "integrity": "sha512-MdiXf+nDuMvY0gJKxyfZ7/6UFsETO7mGKF54MVD/ekJS6HdFtpZFBgrh6Pseu64XTb2MLyFPlbW6hj8HYRQNOQ==", + "dev": true, + "requires": { + "undici-types": "~5.26.4" + } + }, + "axe-core": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.9.1.tgz", + "integrity": "sha512-QbUdXJVTpvUTHU7871ppZkdOLBeGUKBQWHkHrvN2V9IQWGMt61zf3B45BtzjxEJzYuj0JBjBZP/hmYS/R9pmAw==" + }, + "dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==" + }, + "fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "optional": true + }, + "playwright": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.1.tgz", + "integrity": "sha512-Hjrgae4kpSQBr98nhCj3IScxVeVUixqj+5oyif8TdIn2opTCPEzqAqNMeK42i3cWDCVu9MI+ZsGWw+gVR4ISBg==", + "dev": true, + "requires": { + "fsevents": "2.3.2", + "playwright-core": "1.45.1" + } + }, + "playwright-core": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.45.1.tgz", + "integrity": "sha512-LF4CUUtrUu2TCpDw4mcrAIuYrEjVDfT1cHbJMfwnE2+1b8PZcFzPNgvZCvq2JfQ4aTjRCCHw5EJ2tmr2NSzdPg==" + }, + "undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + } + } +} diff --git a/e2e/package.json b/e2e/package.json new file mode 100644 index 00000000..145148b8 --- /dev/null +++ b/e2e/package.json @@ -0,0 +1,17 @@ +{ + "name": "e2e", + "version": "1.0.0", + "scripts": { + "e2e-setup": "npx playwright install", + "e2e-test": "./run-e2e-test", + "e2e-test:ui": "npx playwright test --ui" + }, + "devDependencies": { + "@playwright/test": "^1.45.1", + "@types/node": "^20.14.10" + }, + "dependencies": { + "@axe-core/playwright": "^4.9.1", + "dotenv": "^16.4.5" + } +} diff --git a/e2e/playwright.config.js b/e2e/playwright.config.js new file mode 100644 index 00000000..1a2b30f3 --- /dev/null +++ b/e2e/playwright.config.js @@ -0,0 +1,51 @@ +// Load environment variables from .env file if it exists +import * as dotenv from 'dotenv'; + +import { defineConfig, devices } from "@playwright/test"; + +dotenv.config(); + +/** + * See https://playwright.dev/docs/test-configuration. + */ +export default defineConfig({ + // Timeout for each test in milliseconds + timeout: 20000, + testDir: "./tests", // Ensure this points to the correct test directory + // Run tests in files in parallel + fullyParallel: true, + // Fail the build on CI if you accidentally left test.only in the source code. + forbidOnly: !!process.env.CI, + // Retry on CI only + retries: process.env.CI ? 2 : 0, + // Opt out of parallel tests on CI. + workers: process.env.CI ? 1 : undefined, + // Reporter to use. See https://playwright.dev/docs/test-reporters + reporter: "html", + // Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. + use: { + // Base URL to use in actions like `await page.goto('/')`. + baseURL: process.env.BASE_URL, + + // Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer + trace: "on-first-retry", + screenshot: "on", + video: "on-first-retry", + }, + + // Configure projects for major browsers + // Supported browsers: https://playwright.dev/docs/browsers#:~:text=Configure%20Browsers%E2%80%8B,Google%20Chrome%20and%20Microsoft%20Edge. + projects: [ + { + name: "chromium", + use: { ...devices["Desktop Chrome"] }, + }, + + // Test against mobile viewports. + { + name: "Mobile Chrome", + use: { ...devices["Pixel 7"] }, + }, + ], + +}); diff --git a/e2e/run-e2e-test b/e2e/run-e2e-test new file mode 100755 index 00000000..7aedf547 --- /dev/null +++ b/e2e/run-e2e-test @@ -0,0 +1,13 @@ +#!/bin/bash +# +# Script to run Playwright tests with a specified app name. +# Requires the APP_NAME environment variable to be set. + +# Ensure APP_NAME is provided +if [[ -z "${APP_NAME}" ]]; then + echo "You must pass in a specific APP_NAME. IE: APP_NAME=app npm run e2e-test" >&2 + exit 1 +fi + +# Run Playwright tests with the specified app name. +npx playwright test --config "${APP_NAME}/playwright.config.js" diff --git a/e2e/util.js b/e2e/util.js new file mode 100644 index 00000000..c1d2e62a --- /dev/null +++ b/e2e/util.js @@ -0,0 +1,16 @@ +// Merge a base and derived config +export function deepMerge(obj1, obj2) { + const result = { ...obj1 }; + + for (let key in obj2) { + if (obj2.hasOwnProperty(key)) { + if (obj2[key] instanceof Object && obj1[key] instanceof Object) { + result[key] = deepMerge(obj1[key], obj2[key]); + } else { + result[key] = obj2[key]; + } + } + } + + return result; + } diff --git a/infra/.gitignore b/infra/.gitignore index 2cafcb4d..47c0fb89 100644 --- a/infra/.gitignore +++ b/infra/.gitignore @@ -23,6 +23,3 @@ override.tf.json # example: *tfplan* *.plan *.tfstate - -# Ignore archives used for deploying lambdas -*.zip diff --git a/infra/README.md b/infra/README.md index 35d93d2d..5387e22f 100644 --- a/infra/README.md +++ b/infra/README.md @@ -8,6 +8,7 @@ The structure for the infrastructure code looks like this: ```text infra/ Infrastructure code + project-config/ Project-level configuration for account-level resources and resource tags accounts/ [Root module] IaC and IAM resources [app_name]/ Application directory: infrastructure for the main application modules/ Reusable child modules @@ -27,9 +28,18 @@ Details about terraform root modules and child modules are documented in [module ## 🏗️ Project architecture +### ⚙️ Configuration + +The infrastructure derives all of its configuration from static configuration modules: + +- Project config +- App config (per application) + +The configuration modules contain only statically known information and do not have any side effects such as creating infrastructure resources. As such, they are used as both (a) root modules by shell scripts and CI/CD workflows and (b) child modules called by root modules across the infrastructure layers. See [infrastructure configuration](/docs/infra/infrastructure-configuration.md) for more info. + ### 🧅 Infrastructure layers -The infrastructure template is designed to operate on different layers: +The infrastructure is designed to operate on different layers: - Account layer - Network layer @@ -45,7 +55,7 @@ This project has the following AWS environments: - `staging` - `prod` -The environments share the same root modules but will have different configurations. Backend configuration is saved as [`.tfbackend`](https://developer.hashicorp.com/terraform/language/settings/backends/configuration#file) files. Most `.tfbackend` files are named after the environment. For example, the `[app_name]/service` infrastructure resources for the `dev` environment are configured via `dev.s3.tfbackend`. Resources for a module that are shared across environments, such as the build-repository, use `shared.s3.tfbackend`. Resources that are shared across the entire account (e.g. /infra/accounts) use `..s3.tfbackend`. +The environments share the same root modules but will have different configurations. Backend configuration is saved as [`.tfbackend`](https://developer.hashicorp.com/terraform/language/backend#partial-configuration) files. Most `.tfbackend` files are named after the environment. For example, the `[app_name]/service` infrastructure resources for the `dev` environment are configured via `dev.s3.tfbackend`. Resources for a module that are shared across environments, such as the build-repository, use `shared.s3.tfbackend`. Resources that are shared across the entire account (e.g. /infra/accounts) use `..s3.tfbackend`. ### 🔀 Project workflow @@ -57,8 +67,6 @@ Generally, you should use the Make targets or the underlying bin scripts, but yo ### 1️⃣ First time initialization - - To set up this project for the first time (i.e., it has never been deployed to the target AWS account): 1. [Install this template](/README.md#installation) into an application that meets the [Application Requirements](/README.md#application-requirements) @@ -79,7 +87,8 @@ To get set up as a new developer on a project that has already been deployed to 1. [Set up infrastructure developer tools](/docs/infra/set-up-infrastructure-tools.md) 2. [Review how to make changes to infrastructure](/docs/infra/making-infra-changes.md) -3. (Optional) Set up a [terraform workspace](/docs/infra/intro-to-terraform-workspaces.md) +3. [Review how to develop and test infrastructure changes](/docs/infra/develop-and-test-infrastructure-in-isolation-using-workspaces.md) +4. [Review the infrastructure style guide](/docs/infra/style-guide.md) ## 📇 Additional reading diff --git a/infra/accounts/main.tf b/infra/accounts/main.tf index 48f4f618..94f00ba4 100644 --- a/infra/accounts/main.tf +++ b/infra/accounts/main.tf @@ -2,13 +2,13 @@ data "aws_caller_identity" "current" {} data "aws_region" "current" {} locals { - # This must match the name of the bucket created while bootstrapping the account in set-up-current-account.sh + # This must match the name of the bucket created while bootstrapping the account in set-up-current-account tf_state_bucket_name = "${module.project_config.project_name}-${data.aws_caller_identity.current.account_id}-${data.aws_region.current.name}-tf" # Choose the region where this infrastructure should be deployed. region = module.project_config.default_region - # Set project tags that will be used to tag all resources. + # Set project tags that will be used to tag all resources. tags = merge(module.project_config.default_tags, { description = "Backend resources required for terraform state management and GitHub authentication with AWS." }) diff --git a/infra/accounts/outputs.tf b/infra/accounts/outputs.tf index 2ec8af03..86fa2745 100644 --- a/infra/accounts/outputs.tf +++ b/infra/accounts/outputs.tf @@ -1,17 +1,13 @@ -output "project_name" { - value = module.project_config.project_name -} - output "account_id" { value = data.aws_caller_identity.current.account_id } -output "region" { - value = data.aws_region.current.name +output "project_name" { + value = module.project_config.project_name } -output "tf_state_bucket_name" { - value = module.backend.tf_state_bucket_name +output "region" { + value = data.aws_region.current.name } output "tf_log_bucket_name" { @@ -21,3 +17,7 @@ output "tf_log_bucket_name" { output "tf_locks_table_name" { value = module.backend.tf_locks_table_name } + +output "tf_state_bucket_name" { + value = module.backend.tf_state_bucket_name +} diff --git a/infra/app/app-config/build-repository.tf b/infra/app/app-config/build-repository.tf new file mode 100644 index 00000000..43889688 --- /dev/null +++ b/infra/app/app-config/build-repository.tf @@ -0,0 +1,20 @@ +data "external" "account_ids_by_name" { + program = ["${path.module}/../../../bin/account-ids-by-name"] +} + +locals { + image_repository_name = "${local.project_name}-${local.app_name}" + image_repository_region = module.project_config.default_region + image_repository_account_name = module.project_config.network_configs[local.shared_network_name].account_name + image_repository_account_id = data.external.account_ids_by_name.result[local.image_repository_account_name] + + build_repository_config = { + name = local.image_repository_name + region = local.image_repository_region + network_name = local.shared_network_name + account_name = local.image_repository_account_name + account_id = local.image_repository_account_id + repository_arn = "arn:aws:ecr:${local.image_repository_region}:${local.image_repository_account_id}:repository/${local.image_repository_name}" + repository_url = "${local.image_repository_account_id}.dkr.ecr.${local.image_repository_region}.amazonaws.com/${local.image_repository_name}" + } +} diff --git a/infra/app/app-config/dev.tf b/infra/app/app-config/dev.tf index bcb20d09..28f5a82c 100644 --- a/infra/app/app-config/dev.tf +++ b/infra/app/app-config/dev.tf @@ -17,6 +17,13 @@ module "dev_config" { service_memory = 4096 service_desired_instance_count = 3 + # Enable and configure identity provider. + enable_identity_provider = local.enable_identity_provider + + # Support local development against the dev instance. + extra_identity_provider_callback_urls = ["http://localhost"] + extra_identity_provider_logout_urls = ["http://localhost"] + # Enables ECS Exec access for debugging or jump access. # See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html # Defaults to `false`. Uncomment the next line to enable. diff --git a/infra/app/app-config/env-config/database.tf b/infra/app/app-config/env-config/database.tf new file mode 100644 index 00000000..a8dd0409 --- /dev/null +++ b/infra/app/app-config/env-config/database.tf @@ -0,0 +1,15 @@ +locals { + database_config = var.has_database ? { + region = var.default_region + cluster_name = "${var.app_name}-${var.environment}" + app_username = "app" + migrator_username = "migrator" + schema_name = "app" + app_access_policy_name = "${var.app_name}-${var.environment}-app-access" + migrator_access_policy_name = "${var.app_name}-${var.environment}-migrator-access" + + # Enable extensions that require the rds_superuser role to be created here + # See docs/infra/set-up-database.md for more information + superuser_extensions = {} + } : null +} diff --git a/infra/app/app-config/env-config/environment-variables.tf b/infra/app/app-config/env-config/environment-variables.tf index f306b730..7f9a8a5c 100644 --- a/infra/app/app-config/env-config/environment-variables.tf +++ b/infra/app/app-config/env-config/environment-variables.tf @@ -14,126 +14,138 @@ locals { } # Configuration for secrets - # List of configurations for defining environment variables that pull from SSM parameter + # Map of configurations for defining environment variables that pull from SSM parameter # store. Configurations are of the format - # { name = "ENV_VAR_NAME", ssm_param_name = "/ssm/param/name" } + # + # { + # ENV_VAR_NAME = { + # manage_method = "generated" # or "manual" for a secret that was created and stored in SSM manually + # secret_store_name = "/ssm/param/name" + # } + # } # # Manage the secret values of them using AWS Systems Manager: # https://us-east-1.console.aws.amazon.com/systems-manager/parameters/ - secrets = [ - { - name = "SECRET_KEY_BASE" - ssm_param_name = "/service/${var.app_name}-${var.environment}/rails-secret-key-base" + secrets = { + SECRET_KEY_BASE = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/rails-secret-key-base" + }, + RAILS_MASTER_KEY = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/rails-master-key" }, - { - name = "RAILS_MASTER_KEY" - ssm_param_name = "/service/${var.app_name}-${var.environment}/rails-master-key" + CBV_INVITE_SECRET = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/cbv-invite-secret" }, - { - name = "CBV_INVITE_SECRET" - ssm_param_name = "/service/${var.app_name}-${var.environment}/cbv-invite-secret" + SLACK_TEST_EMAIL = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/slack-test-email" }, - { - name = "SLACK_TEST_EMAIL" - ssm_param_name = "/service/${var.app_name}-${var.environment}/slack-test-email" + NEWRELIC_KEY = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/newrelic-key" }, - { - name = "NEWRELIC_KEY" - ssm_param_name = "/service/${var.app_name}-${var.environment}/newrelic-key" + NEW_RELIC_ENV = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/new-relic-env" }, - { - name = "NEW_RELIC_ENV" - ssm_param_name = "/service/${var.app_name}-${var.environment}/new-relic-env" + MAINTENANCE_MODE = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/maintenance-mode" }, # Transmission Configuration: - { - name = "NYC_HRA_EMAIL" - ssm_param_name = "/service/${var.app_name}-${var.environment}/nyc-hra-email" - }, + NYC_HRA_EMAIL = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/nyc-hra-email" + }, + MA_DTA_S3_BUCKET = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/ma-dta-s3-bucket" + }, + MA_DTA_S3_PUBLIC_KEY = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/ma-dta-s3-public-key" + } # Pinwheel Configuration: - { - name = "PINWHEEL_API_TOKEN_PRODUCTION" - ssm_param_name = "/service/${var.app_name}-${var.environment}/pinwheel-api-token-production" + PINWHEEL_API_TOKEN_PRODUCTION = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/pinwheel-api-token-production" }, - { - name = "PINWHEEL_API_TOKEN_DEVELOPMENT" - ssm_param_name = "/service/${var.app_name}-${var.environment}/pinwheel-api-token-development" + PINWHEEL_API_TOKEN_DEVELOPMENT = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/pinwheel-api-token-development" }, - { - name = "PINWHEEL_API_TOKEN_SANDBOX" - ssm_param_name = "/service/${var.app_name}-${var.environment}/pinwheel-api-token-sandbox" + PINWHEEL_API_TOKEN_SANDBOX = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/pinwheel-api-token-sandbox" }, - { - name = "NYC_PINWHEEL_ENVIRONMENT" - ssm_param_name = "/service/${var.app_name}-${var.environment}/nyc-pinwheel-environment" + NYC_PINWHEEL_ENVIRONMENT = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/nyc-pinwheel-environment" }, - { - name = "MA_PINWHEEL_ENVIRONMENT" - ssm_param_name = "/service/${var.app_name}-${var.environment}/ma-pinwheel-environment" + MA_PINWHEEL_ENVIRONMENT = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/ma-pinwheel-environment" }, - { - name = "SANDBOX_PINWHEEL_ENVIRONMENT" - ssm_param_name = "/service/${var.app_name}-${var.environment}/sandbox-pinwheel-environment" + SANDBOX_PINWHEEL_ENVIRONMENT = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/sandbox-pinwheel-environment" }, # SSO Configuration: - { - name = "AZURE_NYC_DSS_CLIENT_ID" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-nyc-dss-client-id" + AZURE_NYC_DSS_CLIENT_ID = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-nyc-dss-client-id" }, - { - name = "AZURE_NYC_DSS_CLIENT_SECRET" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-nyc-dss-client-secret" + AZURE_NYC_DSS_CLIENT_SECRET = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-nyc-dss-client-secret" }, - { - name = "AZURE_NYC_DSS_TENANT_ID" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-nyc-dss-tenant-id" + AZURE_NYC_DSS_TENANT_ID = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-nyc-dss-tenant-id" }, - { - name = "AZURE_MA_DTA_CLIENT_ID" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-ma-dta-client-id" + AZURE_MA_DTA_CLIENT_ID = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-ma-dta-client-id" }, - { - name = "AZURE_MA_DTA_CLIENT_SECRET" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-ma-dta-client-secret" + AZURE_MA_DTA_CLIENT_SECRET = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-ma-dta-client-secret" }, - { - name = "AZURE_MA_DTA_TENANT_ID" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-ma-dta-tenant-id" + AZURE_MA_DTA_TENANT_ID = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-ma-dta-tenant-id" }, - { - name = "AZURE_SANDBOX_CLIENT_ID" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-sandbox-client-id" + AZURE_SANDBOX_CLIENT_ID = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-sandbox-client-id" }, - { - name = "AZURE_SANDBOX_CLIENT_SECRET" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-sandbox-client-secret" + AZURE_SANDBOX_CLIENT_SECRET = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-sandbox-client-secret" }, - { - name = "AZURE_SANDBOX_TENANT_ID" - ssm_param_name = "/service/${var.app_name}-${var.environment}/azure-sandbox-tenant-id" + AZURE_SANDBOX_TENANT_ID = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/azure-sandbox-tenant-id" }, # Other site-specific Configuration: - { - name = "MA_DTA_ALLOWED_CASEWORKER_EMAILS" - ssm_param_name = "/service/${var.app_name}-${var.environment}/ma-dta-allowed-caseworker-emails" + MA_DTA_ALLOWED_CASEWORKER_EMAILS = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/ma-dta-allowed-caseworker-emails" }, - { - name = "MA_DTA_S3_BUCKET" - ssm_param_name = "/service/${var.app_name}-${var.environment}/ma-dta-s3-bucket" + MA_DTA_S3_BUCKET = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/ma-dta-s3-bucket" }, - { - name = "MA_DTA_S3_PUBLIC_KEY" - ssm_param_name = "/service/${var.app_name}-${var.environment}/ma-dta-s3-public-key" + MA_DTA_S3_PUBLIC_KEY = { + manage_method = "manual" + secret_store_name = "/service/${var.app_name}-${var.environment}/ma-dta-s3-public-key" }, - - # MAINTENANCE MODE - { - name = "MAINTENANCE_MODE" - ssm_param_name = "/service/${var.app_name}-${var.environment}/maintenance-mode" - }, - ] + } } diff --git a/infra/app/app-config/env-config/identity-provider.tf b/infra/app/app-config/env-config/identity-provider.tf new file mode 100644 index 00000000..0151d4cc --- /dev/null +++ b/infra/app/app-config/env-config/identity-provider.tf @@ -0,0 +1,45 @@ +# Identity provider configuration. +# If the notification service is configured, the identity provider will use the +# SES-verified email to send notifications. +locals { + # If your application should redirect users, after successful authentication, to a + # page other than the homepage, specify the path fragment here. + # Example: "profile" + # Docs: https://docs.aws.amazon.com/cognito/latest/developerguide/user-pool-settings-client-apps.html + callback_url_path = "" + + # If your application should redirect users, after signing out, to a page other than + # the homepage, specify the path fragment here. + # Example: "logout" + # Docs: https://docs.aws.amazon.com/cognito/latest/developerguide/user-pool-settings-client-apps.html + logout_url_path = "" + + identity_provider_config = var.enable_identity_provider ? { + identity_provider_name = "${var.app_name}-${var.environment}" + + password_policy = { + password_minimum_length = 12 + temporary_password_validity_days = 7 + } + + # Optionally configure email template for resetting a password. + # Set any attribute to a non-null value to override AWS Cognito defaults. + # Docs: https://docs.aws.amazon.com/cognito/latest/developerguide/cognito-user-pool-settings-message-customizations.html + verification_email = { + verification_email_message = null + verification_email_subject = null + } + + # Do not modify this block directly. + client = { + callback_urls = concat( + var.domain_name != null ? ["https://${var.domain_name}/${local.callback_url_path}"] : [], + var.extra_identity_provider_callback_urls + ) + logout_urls = concat( + var.domain_name != null ? ["https://${var.domain_name}/${local.logout_url_path}"] : [], + var.extra_identity_provider_logout_urls + ) + } + } : null +} diff --git a/infra/app/app-config/env-config/main.tf b/infra/app/app-config/env-config/main.tf index a1854caf..6b77e8f3 100644 --- a/infra/app/app-config/env-config/main.tf +++ b/infra/app/app-config/env-config/main.tf @@ -1,7 +1,9 @@ locals { - # The prefix key/value pair is used for Terraform Workspaces, which is useful for projects with multiple infrastructure developers. - # By default, Terraform creates a workspace named “default.” If a non-default workspace is not created this prefix will equal “default”, - # if you choose not to use workspaces set this value to "dev" + # The prefix is used to create uniquely named resources per terraform workspace, which + # are needed in CI/CD for preview environments and tests. + # + # To isolate changes during infrastructure development by using manually created + # terraform workspaces, see: /docs/infra/develop-and-test-infrastructure-in-isolation-using-workspaces.md prefix = terraform.workspace == "default" ? "" : "${terraform.workspace}-" bucket_name = "${local.prefix}${var.project_name}-${var.app_name}-${var.environment}" diff --git a/infra/app/app-config/env-config/notifications.tf b/infra/app/app-config/env-config/notifications.tf new file mode 100644 index 00000000..d4f2bb42 --- /dev/null +++ b/infra/app/app-config/env-config/notifications.tf @@ -0,0 +1,16 @@ +# Notifications configuration +locals { + notifications_config = var.enable_notifications ? { + # Set to an SES-verified email address to be used when sending emails. + # Docs: https://docs.aws.amazon.com/cognito/latest/developerguide/user-pool-email.html + sender_email = null + + # Configure the name that users see in the "From" section of their inbox, so that it's + # clearer who the email is from. + sender_display_name = null + + # Configure the REPLY-TO email address if it should be different from the sender. + # Note: Only used by the identity-provider service. + reply_to_email = null + } : null +} diff --git a/infra/app/app-config/env-config/outputs.tf b/infra/app/app-config/env-config/outputs.tf index 5c874c1e..e95a6842 100644 --- a/infra/app/app-config/env-config/outputs.tf +++ b/infra/app/app-config/env-config/outputs.tf @@ -1,12 +1,14 @@ output "database_config" { - value = var.has_database ? { - region = var.default_region - cluster_name = "${var.app_name}-${var.environment}" - app_username = "app" - migrator_username = "migrator" - schema_name = var.app_name - app_access_policy_name = "${var.app_name}-${var.environment}-app-access" - migrator_access_policy_name = "${var.app_name}-${var.environment}-migrator-access" + value = local.database_config +} + +output "scheduled_jobs" { + value = local.scheduled_jobs +} + +output "incident_management_service_integration" { + value = var.has_incident_management_service ? { + integration_url_param_name = "/monitoring/${var.app_name}/${var.environment}/incident-management-integration-url" } : null } @@ -30,7 +32,7 @@ output "service_config" { var.service_override_extra_environment_variables ) - secrets = toset(local.secrets) + secrets = local.secrets file_upload_jobs = { for job_name, job_config in local.file_upload_jobs : @@ -40,15 +42,17 @@ output "service_config" { } } +output "identity_provider_config" { + value = local.identity_provider_config +} + +output "notifications_config" { + value = local.notifications_config +} + output "storage_config" { value = { bucket_name = local.bucket_name massachusetts_moveit_bucket_name = local.massachusetts_moveit_bucket_name } } - -output "incident_management_service_integration" { - value = var.has_incident_management_service ? { - integration_url_param_name = "/monitoring/${var.app_name}/${var.environment}/incident-management-integration-url" - } : null -} diff --git a/infra/app/app-config/env-config/scheduled_jobs.tf b/infra/app/app-config/env-config/scheduled_jobs.tf new file mode 100644 index 00000000..2c71b23d --- /dev/null +++ b/infra/app/app-config/env-config/scheduled_jobs.tf @@ -0,0 +1,14 @@ +locals { + # The `cron` here is the literal name of the scheduled job. It can be anything you want. + # For example "file_upload_jobs" or "daily_report". Whatever makes sense for your use case. + # The `task_command` is what you want your scheduled job to run, for example: ["poetry", "run", "flask"]. + # Schedule expression defines the frequency at which the job should run. + # The syntax for `schedule_expression` is explained in the following documentation: + # https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-scheduled-rule-pattern.html + scheduled_jobs = { + # cron = { + # task_command = ["python", "-m", "flask", "--app", "app.py", "cron"] + # schedule_expression = "cron(0 * ? * * *)" + # } + } +} diff --git a/infra/app/app-config/env-config/variables.tf b/infra/app/app-config/env-config/variables.tf index 1241f087..bdeb1c08 100644 --- a/infra/app/app-config/env-config/variables.tf +++ b/infra/app/app-config/env-config/variables.tf @@ -1,19 +1,11 @@ -variable "project_name" { - type = string -} - variable "app_name" { type = string } -variable "environment" { - description = "name of the application environment (e.g. dev, staging, prod)" - type = string -} - -variable "network_name" { - description = "Human readable identifier of the network / VPC" +variable "certificate_arn" { type = string + description = "The ARN of the certificate to use for the application" + default = null } variable "default_region" { @@ -27,16 +19,45 @@ variable "domain_name" { default = null } +variable "enable_command_execution" { + type = bool + description = "Enables the ability to manually execute commands on running service containers using AWS ECS Exec" + default = false +} + variable "enable_https" { type = bool description = "Whether to enable HTTPS for the application" default = false } -variable "certificate_arn" { +variable "enable_identity_provider" { + type = bool + description = "Enables identity provider" + default = false +} + +variable "enable_notifications" { + type = bool + description = "Enables notifications" + default = false +} + +variable "environment" { + description = "name of the application environment (e.g. dev, staging, prod)" type = string - description = "The ARN of the certificate to use for the application" - default = null +} + +variable "extra_identity_provider_callback_urls" { + type = list(string) + description = "List of additional URLs that the identity provider will redirect the user to after a successful sign-in. Used for local development." + default = [] +} + +variable "extra_identity_provider_logout_urls" { + type = list(string) + description = "List of additional URLs that the identity provider will redirect the user to after signing out. Used for local development." + default = [] } variable "has_database" { @@ -47,19 +68,28 @@ variable "has_incident_management_service" { type = bool } +variable "network_name" { + description = "Human readable identifier of the network / VPC" + type = string +} + +variable "project_name" { + type = string +} + variable "service_cpu" { type = number default = 256 } -variable "service_memory" { +variable "service_desired_instance_count" { type = number - default = 512 + default = 1 } -variable "service_desired_instance_count" { +variable "service_memory" { type = number - default = 1 + default = 512 } variable "service_override_extra_environment_variables" { @@ -67,12 +97,6 @@ variable "service_override_extra_environment_variables" { description = <..s3.tfbackend - # - # Projects/applications that use the same AWS account for all environments - # will refer to the same account for all environments. For example, if the - # project has a single account named "myaccount", then infra/accounts will - # have one tfbackend file myaccount.XXXXX.s3.tfbackend, and the - # account_names_by_environment map will look like: - # - # account_names_by_environment = { - # shared = "myaccount" - # dev = "myaccount" - # staging = "myaccount" - # prod = "myaccount" - # } - # - # Projects/applications that have separate AWS accounts for each environment - # might have a map that looks more like this: - # - # account_names_by_environment = { - # shared = "dev" - # dev = "dev" - # staging = "staging" - # prod = "prod" - # } - account_names_by_environment = { - dev = "nava-ffs" - # staging = "nava-ffs" - shared = "nava-ffs-prod" - prod = "nava-ffs-prod" - } + # .s3.tfbackend + shared_network_name = "prod" } module "project_config" { diff --git a/infra/app/app-config/outputs.tf b/infra/app/app-config/outputs.tf index 0b5456ac..16a35792 100644 --- a/infra/app/app-config/outputs.tf +++ b/infra/app/app-config/outputs.tf @@ -2,8 +2,12 @@ output "app_name" { value = local.app_name } -output "account_names_by_environment" { - value = local.account_names_by_environment +output "build_repository_config" { + value = local.build_repository_config +} + +output "environment_configs" { + value = local.environment_configs } output "environments" { @@ -26,10 +30,10 @@ output "has_incident_management_service" { value = local.has_incident_management_service } -output "build_repository_config" { - value = local.build_repository_config +output "enable_identity_provider" { + value = local.enable_identity_provider } -output "environment_configs" { - value = local.environment_configs +output "shared_network_name" { + value = local.shared_network_name } diff --git a/infra/app/app-config/prod.tf b/infra/app/app-config/prod.tf index e1843f53..37a803b0 100644 --- a/infra/app/app-config/prod.tf +++ b/infra/app/app-config/prod.tf @@ -9,6 +9,7 @@ module "prod_config" { enable_https = true has_database = local.has_database has_incident_management_service = local.has_incident_management_service + enable_identity_provider = local.enable_identity_provider # These numbers are a starting point based on this article # Update the desired instance size and counts based on the project's specific needs diff --git a/infra/app/app-config/staging.tf b/infra/app/app-config/staging.tf index afbdc878..f629fb80 100644 --- a/infra/app/app-config/staging.tf +++ b/infra/app/app-config/staging.tf @@ -9,6 +9,7 @@ # enable_https = false # has_database = local.has_database # has_incident_management_service = local.has_incident_management_service +# enable_identity_provider = local.enable_identity_provider # # # Enables ECS Exec access for debugging or jump access. # # See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-exec.html diff --git a/infra/app/build-repository/main.tf b/infra/app/build-repository/main.tf index bdba2327..a6c01c6f 100644 --- a/infra/app/build-repository/main.tf +++ b/infra/app/build-repository/main.tf @@ -10,9 +10,12 @@ locals { description = "Backend resources required for storing built release candidate artifacts to be used for deploying to environments." }) + build_repository_config = module.app_config.build_repository_config + # Get list of AWS account ids for the application environments that # will need access to the build repository - app_account_names = values(module.app_config.account_names_by_environment) + network_names = toset([for environment_config in values(module.app_config.environment_configs) : environment_config.network_name]) + app_account_names = [for network_name in local.network_names : module.project_config.network_configs[network_name].account_name] account_ids_by_name = data.external.account_ids_by_name.result app_account_ids = [for account_name in local.app_account_names : local.account_ids_by_name[account_name] if contains(keys(local.account_ids_by_name), account_name)] } @@ -33,7 +36,7 @@ terraform { } provider "aws" { - region = module.app_config.build_repository_config.region + region = local.build_repository_config.region default_tags { tags = local.tags } @@ -48,12 +51,12 @@ module "app_config" { } data "external" "account_ids_by_name" { - program = ["../../../bin/account-ids-by-name.sh"] + program = ["${path.module}/../../../bin/account-ids-by-name"] } module "container_image_repository" { source = "../../modules/container-image-repository" - name = module.app_config.image_repository_name + name = local.build_repository_config.name push_access_role_arn = data.aws_iam_role.github_actions.arn app_account_ids = local.app_account_ids } diff --git a/infra/app/database/main.tf b/infra/app/database/main.tf index faf4e917..df864f5f 100644 --- a/infra/app/database/main.tf +++ b/infra/app/database/main.tf @@ -25,6 +25,8 @@ locals { description = "Database resources for the ${var.environment_name} environment" }) + is_temporary = terraform.workspace != "default" + environment_config = module.app_config.environment_configs[var.environment_name] database_config = local.environment_config.database_config network_config = module.project_config.network_configs[local.environment_config.network_name] @@ -89,4 +91,5 @@ module "database" { database_subnet_group_name = local.network_config.database_subnet_group_name private_subnet_ids = data.aws_subnets.database.ids aws_services_security_group_id = data.aws_security_groups.aws_services.ids[0] + is_temporary = local.is_temporary } diff --git a/infra/app/service/main.tf b/infra/app/service/main.tf index 00bc9d5a..2003afc8 100644 --- a/infra/app/service/main.tf +++ b/infra/app/service/main.tf @@ -24,21 +24,45 @@ data "aws_subnets" "private" { } locals { + # The prefix is used to create uniquely named resources per terraform workspace, which + # are needed in CI/CD for preview environments and tests. + # + # To isolate changes during infrastructure development by using manually created + # terraform workspaces, see: /docs/infra/develop-and-test-infrastructure-in-isolation-using-workspaces.md + prefix = terraform.workspace == "default" ? "" : "${terraform.workspace}-" + # Add environment specific tags tags = merge(module.project_config.default_tags, { environment = var.environment_name description = "Application resources created in ${var.environment_name} environment" }) - is_temporary = startswith(terraform.workspace, "t-") + # All non-default terraform workspaces are considered temporary. + # Temporary environments do not have deletion protection enabled. + # Examples: pull request preview environments are temporary. + is_temporary = terraform.workspace != "default" + build_repository_config = module.app_config.build_repository_config environment_config = module.app_config.environment_configs[var.environment_name] service_config = local.environment_config.service_config database_config = local.environment_config.database_config storage_config = local.environment_config.storage_config incident_management_service_integration_config = local.environment_config.incident_management_service_integration + identity_provider_config = local.environment_config.identity_provider_config + notifications_config = local.environment_config.notifications_config network_config = module.project_config.network_configs[local.environment_config.network_name] + + # Identity provider locals. + # If this is a temporary environment, re-use an existing Cognito user pool. + # Otherwise, create a new one. + identity_provider_user_pool_id = module.app_config.enable_identity_provider ? ( + local.is_temporary ? module.existing_identity_provider[0].user_pool_id : module.identity_provider[0].user_pool_id + ) : null + identity_provider_environment_variables = module.app_config.enable_identity_provider ? { + COGNITO_USER_POOL_ID = local.identity_provider_user_pool_id, + COGNITO_CLIENT_ID = module.identity_provider_client[0].client_id + } : {} } terraform { @@ -47,7 +71,7 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = ">= 4.56.0, < 5.0.0" + version = ">= 5.35.0, < 6.0.0" } } @@ -135,9 +159,10 @@ module "service" { source = "../../modules/service" service_name = local.service_config.service_name - image_repository_name = module.app_config.build_repository_config.name - image_repository_account_id = module.app_config.build_repository_config.account_id - image_tag = local.image_tag + image_repository_arn = local.build_repository_config.repository_arn + image_repository_url = local.build_repository_config.repository_url + + image_tag = local.image_tag vpc_id = data.aws_vpc.network.id public_subnet_ids = data.aws_subnets.public.ids @@ -155,6 +180,7 @@ module "service" { aws_services_security_group_id = data.aws_security_groups.aws_services.ids[0] file_upload_jobs = local.service_config.file_upload_jobs + scheduled_jobs = local.environment_config.scheduled_jobs db_vars = module.app_config.has_database ? { security_group_ids = data.aws_rds_cluster.db_cluster[0].vpc_security_group_ids @@ -169,20 +195,38 @@ module "service" { } } : null - extra_environment_variables = merge({ - FEATURE_FLAGS_PROJECT = module.feature_flags.evidently_project_name - BUCKET_NAME = local.storage_config.bucket_name - MA_MOVEIT_BUCKET_NAME = local.storage_config.massachusetts_moveit_bucket_name - }, local.service_config.extra_environment_variables) - - secrets = local.service_config.secrets - - extra_policies = { - feature_flags_access = module.feature_flags.access_policy_arn, - storage_access = module.storage.access_policy_arn, - ma_moveit_access = module.storage_ma_moveit.access_policy_arn, - email_access = aws_iam_policy.email_access_policy.arn - } + extra_environment_variables = merge( + { + FEATURE_FLAGS_PROJECT = module.feature_flags.evidently_project_name + BUCKET_NAME = local.storage_config.bucket_name + MA_MOVEIT_BUCKET_NAME = local.storage_config.massachusetts_moveit_bucket_name + }, + local.identity_provider_environment_variables, + local.service_config.extra_environment_variables + ) + + secrets = concat( + [for secret_name in keys(local.service_config.secrets) : { + name = secret_name + valueFrom = module.secrets[secret_name].secret_arn + }], + module.app_config.enable_identity_provider ? [{ + name = "COGNITO_CLIENT_SECRET" + valueFrom = module.identity_provider_client[0].client_secret_arn + }] : [] + ) + + extra_policies = merge( + { + feature_flags_access = module.feature_flags.access_policy_arn, + storage_access = module.storage.access_policy_arn, + ma_moveit_access = module.storage_ma_moveit.access_policy_arn, + email_access = aws_iam_policy.email_access_policy.arn, + }, + module.app_config.enable_identity_provider ? { + identity_provider_access = module.identity_provider_client[0].access_policy_arn, + } : {} + ) is_temporary = local.is_temporary } @@ -205,8 +249,9 @@ module "feature_flags" { } module "storage" { - source = "../../modules/storage" - name = local.storage_config.bucket_name + source = "../../modules/storage" + name = local.storage_config.bucket_name + is_temporary = local.is_temporary } module "email" { @@ -214,3 +259,45 @@ module "email" { hosted_zone_domain = local.network_config.domain_config.hosted_zone domain = local.service_config.domain_name } + +# If the app has `enable_identity_provider` set to true AND this is not a temporary +# environment, then create a new identity provider. +module "identity_provider" { + count = module.app_config.enable_identity_provider && !local.is_temporary ? 1 : 0 + source = "../../modules/identity-provider/resources" + + is_temporary = local.is_temporary + + name = local.identity_provider_config.identity_provider_name + password_minimum_length = local.identity_provider_config.password_policy.password_minimum_length + temporary_password_validity_days = local.identity_provider_config.password_policy.temporary_password_validity_days + verification_email_message = local.identity_provider_config.verification_email.verification_email_message + verification_email_subject = local.identity_provider_config.verification_email.verification_email_subject + + sender_email = local.notifications_config == null ? null : local.notifications_config.sender_email + sender_display_name = local.notifications_config == null ? null : local.notifications_config.sender_display_name + reply_to_email = local.notifications_config == null ? null : local.notifications_config.reply_to_email +} + +# If the app has `enable_identity_provider` set to true AND this *is* a temporary +# environment, then use an existing identity provider. +module "existing_identity_provider" { + count = module.app_config.enable_identity_provider && local.is_temporary ? 1 : 0 + source = "../../modules/identity-provider/data" + + name = local.identity_provider_config.identity_provider_name +} + +# If the app has `enable_identity_provider` set to true, create a new identity provider +# client for the service. A new client is created for all environments, including +# temporary environments. +module "identity_provider_client" { + count = module.app_config.enable_identity_provider ? 1 : 0 + source = "../../modules/identity-provider-client/resources" + + callback_urls = local.identity_provider_config.client.callback_urls + logout_urls = local.identity_provider_config.client.logout_urls + name = "${local.prefix}${local.identity_provider_config.identity_provider_name}" + + user_pool_id = local.identity_provider_user_pool_id +} diff --git a/infra/app/service/outputs.tf b/infra/app/service/outputs.tf index 03919c84..ea92375d 100644 --- a/infra/app/service/outputs.tf +++ b/infra/app/service/outputs.tf @@ -1,16 +1,3 @@ -output "service_endpoint" { - description = "The public endpoint for the service." - value = module.service.public_endpoint -} - -output "service_cluster_name" { - value = module.service.cluster_name -} - -output "service_name" { - value = local.service_config.service_name -} - output "application_log_group" { value = module.service.application_log_group } @@ -19,10 +6,6 @@ output "application_log_stream_prefix" { value = module.service.application_log_stream_prefix } -output "migrator_role_arn" { - value = module.service.migrator_role_arn -} - output "ma_moveit_access_key_id" { value = aws_iam_access_key.ma_moveit.id } @@ -31,3 +14,20 @@ output "ma_moveit_secret_access_key" { value = aws_iam_access_key.ma_moveit.secret sensitive = true } + +output "migrator_role_arn" { + value = module.service.migrator_role_arn +} + +output "service_cluster_name" { + value = module.service.cluster_name +} + +output "service_endpoint" { + description = "The public endpoint for the service." + value = module.service.public_endpoint +} + +output "service_name" { + value = local.service_config.service_name +} diff --git a/infra/app/service/secrets.tf b/infra/app/service/secrets.tf new file mode 100644 index 00000000..e65eaa0c --- /dev/null +++ b/infra/app/service/secrets.tf @@ -0,0 +1,16 @@ +module "secrets" { + for_each = local.service_config.secrets + + source = "../../modules/secret" + + # When generating secrets and storing them in parameter store, append the + # terraform workspace to the secret store path if the environment is temporary + # to avoid conflicts with existing environments. + # Don't do this for secrets that are managed manually since the temporary + # environments will need to share those secrets. + secret_store_name = (each.value.manage_method == "generated" && local.is_temporary ? + "${each.value.secret_store_name}/${terraform.workspace}" : + each.value.secret_store_name + ) + manage_method = each.value.manage_method +} diff --git a/infra/modules/auth-github-actions/variables.tf b/infra/modules/auth-github-actions/variables.tf index dae3d0a4..34adee07 100644 --- a/infra/modules/auth-github-actions/variables.tf +++ b/infra/modules/auth-github-actions/variables.tf @@ -1,3 +1,8 @@ +variable "allowed_actions" { + type = list(string) + description = "List of IAM actions to allow GitHub Actions to perform" +} + variable "github_actions_role_name" { type = string description = "The name to use for the IAM role GitHub actions will assume." @@ -7,8 +12,3 @@ variable "github_repository" { type = string description = "The GitHub repository in 'org/repo' format to provide access to AWS account resources. Example: navapbc/template-infra" } - -variable "allowed_actions" { - type = list(string) - description = "List of IAM actions to allow GitHub Actions to perform" -} diff --git a/infra/modules/container-image-repository/variables.tf b/infra/modules/container-image-repository/variables.tf index 1882ca2a..813bd3aa 100644 --- a/infra/modules/container-image-repository/variables.tf +++ b/infra/modules/container-image-repository/variables.tf @@ -1,3 +1,9 @@ +variable "app_account_ids" { + type = list(string) + description = "A list of account ids to grant pull access to the repository. Use this to grant access to the application environment accounts in a multi-account setup." + default = [] +} + variable "name" { type = string description = "The name of image repository." @@ -7,9 +13,3 @@ variable "push_access_role_arn" { type = string description = "The ARN of the role to grant push access to the repository. Use this to grant access to the role that builds and publishes release artifacts." } - -variable "app_account_ids" { - type = list(string) - description = "A list of account ids to grant pull access to the repository. Use this to grant access to the application environment accounts in a multi-account setup." - default = [] -} diff --git a/infra/modules/database/.gitignore b/infra/modules/database/.gitignore index ac3e6dba..e5dd1565 100644 --- a/infra/modules/database/.gitignore +++ b/infra/modules/database/.gitignore @@ -1 +1,2 @@ -/role_manager/vendor +__pycache__ +vendor/ diff --git a/infra/modules/database/backups.tf b/infra/modules/database/backups.tf index 9beca0ba..6b125d72 100644 --- a/infra/modules/database/backups.tf +++ b/infra/modules/database/backups.tf @@ -18,6 +18,9 @@ resource "aws_backup_plan" "backup_plan" { resource "aws_backup_vault" "backup_vault" { name = "${var.name}-db-backup-vault" kms_key_arn = data.aws_kms_key.backup_vault_key.arn + + # Use a separate line to support automated terraform destroy commands + force_destroy = var.is_temporary } # KMS Key for the vault diff --git a/infra/modules/database/main.tf b/infra/modules/database/main.tf index bdd2f60d..5e4ef327 100644 --- a/infra/modules/database/main.tf +++ b/infra/modules/database/main.tf @@ -5,12 +5,11 @@ locals { master_username = "postgres" primary_instance_name = "${var.name}-primary" role_manager_name = "${var.name}-role-manager" - role_manager_package = "${path.root}/role_manager.zip" # The ARN that represents the users accessing the database are of the format: "arn:aws:rds-db:::dbuser:/"" # See https://aws.amazon.com/blogs/database/using-iam-authentication-to-connect-with-pgadmin-amazon-aurora-postgresql-or-amazon-rds-for-postgresql/ db_user_arn_prefix = "arn:aws:rds-db:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:dbuser:${aws_rds_cluster.db.cluster_resource_id}" - engine_version = "14.6" + engine_version = "14.9" engine_major_version = regex("^\\d+", local.engine_version) } @@ -34,17 +33,21 @@ resource "aws_rds_cluster" "db" { manage_master_user_password = true storage_encrypted = true kms_key_id = aws_kms_key.db.arn + allow_major_version_upgrade = false db_cluster_parameter_group_name = aws_rds_cluster_parameter_group.rds_query_logging.name # checkov:skip=CKV_AWS_128:Auth decision needs to be ironed out # checkov:skip=CKV_AWS_162:Auth decision needs to be ironed out iam_database_authentication_enabled = true - deletion_protection = true copy_tags_to_snapshot = true # final_snapshot_identifier = "${var.name}-final" skip_final_snapshot = true + # Use a separate line to support automated terraform destroy commands + # checkov:skip=CKV_AWS_139:Allow disabling deletion protection for automated tests + deletion_protection = !var.is_temporary + serverlessv2_scaling_configuration { max_capacity = 1.0 min_capacity = 0.5 @@ -76,7 +79,7 @@ resource "aws_kms_key" "db" { # ------------- resource "aws_rds_cluster_parameter_group" "rds_query_logging" { - name = var.name + name = "${var.name}-${local.engine_major_version}" family = "aurora-postgresql${local.engine_major_version}" description = "Default cluster parameter group" diff --git a/infra/modules/database/role-manager.tf b/infra/modules/database/role-manager.tf index 9ccbd16c..1c07be32 100644 --- a/infra/modules/database/role-manager.tf +++ b/infra/modules/database/role-manager.tf @@ -6,14 +6,15 @@ # as well as viewing existing roles locals { - db_password_param_name = "/aws/reference/secretsmanager/${data.aws_secretsmanager_secret.db_password.name}" + db_password_param_name = "/aws/reference/secretsmanager/${data.aws_secretsmanager_secret.db_password.name}" + role_manager_archive_path = "${path.module}/role_manager.zip" } resource "aws_lambda_function" "role_manager" { function_name = local.role_manager_name - filename = local.role_manager_package - source_code_hash = data.archive_file.role_manager.output_base64sha256 + filename = local.role_manager_archive_path + source_code_hash = filebase64sha256(local.role_manager_archive_path) runtime = "python3.9" handler = "role_manager.lambda_handler" role = aws_iam_role.role_manager.arn @@ -38,8 +39,6 @@ resource "aws_lambda_function" "role_manager" { DB_SCHEMA = var.schema_name APP_USER = var.app_username APP_PASSWORD_PARAM_NAME = var.create_app_password ? module.app_user_password[0].ssm_name : null - GRANT_APP_USER_IAM = var.grant_app_user_iam ? "true" : "false" - ALLOW_APP_MANAGE_SCHEMA = var.allow_app_manage_schema MIGRATOR_USER = var.migrator_username PYTHONPATH = "vendor" } @@ -56,27 +55,6 @@ resource "aws_lambda_function" "role_manager" { # checkov:skip=CKV_AWS_116:Dead letter queue (DLQ) configuration is only relevant for asynchronous invocations } -# Installs python packages needed by the role manager lambda function before -# creating the zip archive. -# Runs pip install on every apply so that the role manager archive file that -# is generated locally is guaranteed to have the required dependencies even -# when terraform is run by a developer that did not originally create the -# environment. -# Timestamp is used to always trigger replacement. -resource "terraform_data" "role_manager_python_vendor_packages" { - triggers_replace = timestamp() - provisioner "local-exec" { - command = "pip3 install -r ${path.module}/role_manager/requirements.txt -t ${path.module}/role_manager/vendor --upgrade" - } -} - -data "archive_file" "role_manager" { - type = "zip" - source_dir = "${path.module}/role_manager" - output_path = local.role_manager_package - depends_on = [terraform_data.role_manager_python_vendor_packages] -} - data "aws_kms_key" "default_ssm_key" { key_id = "alias/aws/ssm" } diff --git a/infra/modules/database/role_manager.zip b/infra/modules/database/role_manager.zip new file mode 100644 index 00000000..93a0079b Binary files /dev/null and b/infra/modules/database/role_manager.zip differ diff --git a/infra/modules/database/role_manager/check.py b/infra/modules/database/role_manager/check.py new file mode 100644 index 00000000..32ac3856 --- /dev/null +++ b/infra/modules/database/role_manager/check.py @@ -0,0 +1,63 @@ +import os + +from pg8000.native import Connection, literal + +import db + + +def check(config: dict): + """Check that database roles, schema, and privileges were + properly configured + """ + print("Running command 'check' to check database roles, schema, and privileges") + app_username = os.environ.get("APP_USER") + migrator_username = os.environ.get("MIGRATOR_USER") + schema_name = os.environ.get("DB_SCHEMA") + + with ( + db.connect_using_iam(app_username) as app_conn, + db.connect_using_iam(migrator_username) as migrator_conn, + ): + check_search_path(migrator_conn, schema_name) + check_migrator_create_table(migrator_conn) + check_app_use_table(app_conn) + check_superuser_extensions(app_conn, config["superuser_extensions"]) + cleanup_migrator_drop_table(migrator_conn) + + return {"success": True} + + +def check_search_path(migrator_conn: Connection, schema_name: str): + print(f"-- Check that search path is {schema_name}") + assert db.execute(migrator_conn, "SHOW search_path") == [[schema_name]] + + +def check_migrator_create_table(migrator_conn: Connection): + print("-- Check that migrator is able to create tables") + cleanup_migrator_drop_table(migrator_conn) + db.execute( + migrator_conn, + "CREATE TABLE IF NOT EXISTS role_manager_test(created_at TIMESTAMP)", + ) + + +def check_app_use_table(app_conn: Connection): + app_username = app_conn.user.decode("utf-8") + print(f"-- Check that {app_username} is able to read and write from the table") + db.execute(app_conn, "INSERT INTO role_manager_test (created_at) VALUES (NOW())") + db.execute(app_conn, "SELECT * FROM role_manager_test") + + +def check_superuser_extensions(app_conn: Connection, superuser_extensions: dict): + def to_str(enabled): + return "enabled" if enabled else "disabled" + + for extension, should_be_enabled in superuser_extensions.items(): + print(f"-- Check that {extension} extension is {to_str(should_be_enabled)}") + result = db.execute(app_conn, f"SELECT * FROM pg_extension WHERE extname={literal(extension)}") + is_enabled = len(result) > 0 + assert should_be_enabled == is_enabled + +def cleanup_migrator_drop_table(migrator_conn: Connection): + print("-- Clean up role_manager_test table if it exists") + db.execute(migrator_conn, "DROP TABLE IF EXISTS role_manager_test") diff --git a/infra/modules/database/role_manager/db.py b/infra/modules/database/role_manager/db.py new file mode 100644 index 00000000..ea751634 --- /dev/null +++ b/infra/modules/database/role_manager/db.py @@ -0,0 +1,56 @@ +import json +import os + +import boto3 +from pg8000.native import Connection, identifier + + +def connect_as_master_user() -> Connection: + user = os.environ["DB_USER"] + host = os.environ["DB_HOST"] + port = os.environ["DB_PORT"] + database = os.environ["DB_NAME"] + password = get_master_password() + + print(f"Connecting to database: {user=} {host=} {port=} {database=}") + return Connection( + user=user, + host=host, + port=port, + database=database, + password=password, + ssl_context=True, + ) + + +def get_master_password() -> str: + ssm = boto3.client("ssm", region_name=os.environ["AWS_REGION"]) + param_name = os.environ["DB_PASSWORD_PARAM_NAME"] + print(f"Fetching password from parameter store:\n{param_name}") + result = json.loads( + ssm.get_parameter(Name=param_name, WithDecryption=True)["Parameter"]["Value"] + ) + return result["password"] + + +def connect_using_iam(user: str) -> Connection: + client = boto3.client("rds") + host = os.environ["DB_HOST"] + port = os.environ["DB_PORT"] + database = os.environ["DB_NAME"] + token = client.generate_db_auth_token(DBHostname=host, Port=port, DBUsername=user) + print(f"Connecting to database: {user=} {host=} {port=} {database=}") + return Connection( + user=user, + host=host, + port=port, + database=database, + password=token, + ssl_context=True, + ) + + +def execute(conn: Connection, query: str, print_query: bool = True): + if print_query: + print(f"{conn.user.decode('utf-8')}> {query}") + return conn.run(query) diff --git a/infra/modules/database/role_manager/manage.py b/infra/modules/database/role_manager/manage.py new file mode 100644 index 00000000..550fe40c --- /dev/null +++ b/infra/modules/database/role_manager/manage.py @@ -0,0 +1,216 @@ +import itertools +import os +from operator import itemgetter + +from pg8000.native import Connection, identifier + +import db + + +def manage(config: dict): + """Manage database roles, schema, and privileges""" + + print( + "-- Running command 'manage' to manage database roles, schema, and privileges" + ) + with db.connect_as_master_user() as master_conn: + print_current_db_config(master_conn) + configure_database(master_conn, config) + roles, schema_privileges = print_current_db_config(master_conn) + roles_with_groups = get_roles_with_groups(master_conn) + + configure_default_privileges() + + return { + "roles": roles, + "roles_with_groups": roles_with_groups, + "schema_privileges": { + schema_name: schema_acl for schema_name, schema_acl in schema_privileges + }, + } + + +def get_roles(conn: Connection) -> list[str]: + return [ + row[0] + for row in db.execute( + conn, + "SELECT rolname " + "FROM pg_roles " + "WHERE rolname NOT LIKE 'pg_%' " + "AND rolname NOT LIKE 'rds%'", + print_query=False, + ) + ] + + +def get_roles_with_groups(conn: Connection) -> dict[str, str]: + roles_groups = db.execute( + conn, + """ + SELECT u.rolname AS user, g.rolname AS group + FROM pg_roles u + INNER JOIN pg_auth_members a ON u.oid = a.member + INNER JOIN pg_roles g ON g.oid = a.roleid + ORDER BY user ASC + """, + print_query=False, + ) + + result = {} + for user, groups in itertools.groupby(roles_groups, itemgetter(0)): + result[user] = ",".join(map(itemgetter(1), groups)) + return result + + +# Get schema access control lists. The format of the ACLs is abbreviated. To interpret +# what the ACLs mean, see the Postgres documentation on Privileges: +# https://www.postgresql.org/docs/current/ddl-priv.html +def get_schema_privileges(conn: Connection) -> list[tuple[str, str]]: + return [ + (row[0], row[1]) + for row in db.execute( + conn, + """ + SELECT nspname, nspacl + FROM pg_namespace + WHERE nspname NOT LIKE 'pg_%' + AND nspname <> 'information_schema' + """, + print_query=False, + ) + ] + + +def configure_database(conn: Connection, config: dict) -> None: + print("-- Configuring database") + app_username = os.environ.get("APP_USER") + migrator_username = os.environ.get("MIGRATOR_USER") + schema_name = os.environ.get("DB_SCHEMA") + database_name = os.environ.get("DB_NAME") + + # In Postgres 15 and higher, the CREATE privilege on the public + # schema is already revoked/removed from all users except the + # database owner. However, we are explicitly revoking access anyways + # for projects that wish to use earlier versions of Postgres. + print("---- Revoking default access on public schema") + db.execute(conn, "REVOKE CREATE ON SCHEMA public FROM PUBLIC") + + print("---- Revoking database access from public role") + db.execute(conn, f"REVOKE ALL ON DATABASE {identifier(database_name)} FROM PUBLIC") + print(f"---- Setting default search path to schema {schema_name}") + db.execute( + conn, + f"ALTER DATABASE {identifier(database_name)} SET search_path TO {identifier(schema_name)}", + ) + + configure_roles(conn, [migrator_username, app_username], database_name) + configure_schema(conn, schema_name, migrator_username, app_username) + configure_superuser_extensions(conn, config["superuser_extensions"]) + +def configure_roles(conn: Connection, roles: list[str], database_name: str) -> None: + print("---- Configuring roles") + for role in roles: + configure_role(conn, role, database_name) + + +def configure_role(conn: Connection, username: str, database_name: str) -> None: + print(f"------ Configuring role: {username=}") + role = "rds_iam" + db.execute( + conn, + f""" + DO $$ + BEGIN + CREATE USER {identifier(username)}; + EXCEPTION WHEN DUPLICATE_OBJECT THEN + RAISE NOTICE 'user already exists'; + END + $$; + """, + ) + db.execute(conn, f"GRANT {identifier(role)} TO {identifier(username)}") + db.execute( + conn, + f"GRANT CONNECT ON DATABASE {identifier(database_name)} TO {identifier(username)}", + ) + + +def configure_schema( + conn: Connection, schema_name: str, migrator_username: str, app_username: str +) -> None: + print("---- Configuring schema") + print(f"------ Creating schema: {schema_name=}") + db.execute(conn, f"CREATE SCHEMA IF NOT EXISTS {identifier(schema_name)}") + print(f"------ Changing schema owner: new_owner={migrator_username}") + db.execute( + conn, + f"ALTER SCHEMA {identifier(schema_name)} OWNER TO {identifier(migrator_username)}", + ) + print(f"------ Granting schema usage privileges: grantee={app_username}") + db.execute( + conn, + f"GRANT USAGE ON SCHEMA {identifier(schema_name)} TO {identifier(app_username)}", + ) + + +def configure_default_privileges(): + """ + Configure default privileges so that future tables, sequences, and routines + created by the migrator user can be accessed by the app user. + You can only alter default privileges for the current role, so we need to + run these SQL queries as the migrator user rather than as the master user. + """ + migrator_username = os.environ.get("MIGRATOR_USER") + schema_name = os.environ.get("DB_SCHEMA") + app_username = os.environ.get("APP_USER") + with db.connect_using_iam(migrator_username) as conn: + print( + f"------ Granting privileges for future objects in schema: grantee={app_username}" + ) + db.execute( + conn, + f"ALTER DEFAULT PRIVILEGES IN SCHEMA {identifier(schema_name)} GRANT ALL ON TABLES TO {identifier(app_username)}", + ) + db.execute( + conn, + f"ALTER DEFAULT PRIVILEGES IN SCHEMA {identifier(schema_name)} GRANT ALL ON SEQUENCES TO {identifier(app_username)}", + ) + db.execute( + conn, + f"ALTER DEFAULT PRIVILEGES IN SCHEMA {identifier(schema_name)} GRANT ALL ON ROUTINES TO {identifier(app_username)}", + ) + + +def print_current_db_config( + conn: Connection, +) -> tuple[list[str], list[tuple[str, str]]]: + print("-- Current database configuration") + roles = get_roles(conn) + print_roles(roles) + schema_privileges = get_schema_privileges(conn) + print_schema_privileges(schema_privileges) + return roles, schema_privileges + + +def print_roles(roles: list[str]) -> None: + print("---- Roles") + for role in roles: + print(f"------ Role {role}") + + +def print_schema_privileges(schema_privileges: list[tuple[str, str]]) -> None: + print("---- Schema privileges") + for name, acl in schema_privileges: + print(f"------ Schema {name=} {acl=}") + + +def configure_superuser_extensions(conn: Connection, superuser_extensions: dict): + print("---- Configuring superuser extensions") + for extension, should_be_enabled in superuser_extensions.items(): + if should_be_enabled: + print(f"------ Enabling {extension} extension") + db.execute(conn, f"CREATE EXTENSION IF NOT EXISTS {identifier(extension)} SCHEMA pg_catalog") + else: + print(f"------ Disabling or skipping {extension} extension") + db.execute(conn, f"DROP EXTENSION IF EXISTS {identifier(extension)}") diff --git a/infra/modules/database/role_manager/role_manager.py b/infra/modules/database/role_manager/role_manager.py index e0d3d549..20898ef7 100644 --- a/infra/modules/database/role_manager/role_manager.py +++ b/infra/modules/database/role_manager/role_manager.py @@ -1,300 +1,11 @@ -import itertools -import json -import logging -import os -from operator import itemgetter -from typing import Optional +from check import check +from manage import manage -import boto3 -from pg8000.native import Connection, identifier, literal - -logger = logging.getLogger() -logger.setLevel(logging.INFO) def lambda_handler(event, context): - if event == "check": - return check() - elif event == "drop": - return drop() - else: - return manage() - -def manage(): - """Manage database roles, schema, and privileges""" - - logger.info("Running command 'manage' to manage database roles, schema, and privileges") - conn = connect_as_master_user() - - logger.info("Current database configuration") - prev_roles = get_roles(conn) - print_roles(prev_roles) - - prev_schema_privileges = get_schema_privileges(conn) - print_schema_privileges(prev_schema_privileges) - - configure_database(conn) - - logger.info("New database configuration") - - new_roles = get_roles(conn) - print_roles(new_roles) - - new_schema_privileges = get_schema_privileges(conn) - print_schema_privileges(new_schema_privileges) - - return { - "roles": new_roles, - "roles_with_groups": get_roles_with_groups(conn), - "schema_privileges": { - schema_name: schema_acl - for schema_name, schema_acl - in new_schema_privileges - }, - } - -def check(): - """Check that database roles, schema, and privileges were - properly configured - """ - logger.info("Running command 'check' to check database roles, schema, and privileges") - app_username = os.environ.get("APP_USER") - migrator_username = os.environ.get("MIGRATOR_USER") - schema_name = os.environ.get("DB_SCHEMA") - app_conn = connect_using_iam(app_username) - migrator_conn = connect_using_iam(migrator_username) - - check_search_path(migrator_conn, schema_name) - check_migrator_create_table(migrator_conn, app_username) - check_app_use_table(app_conn) - cleanup_migrator_drop_table(migrator_conn) - - return {"success": True} - -def drop(): - """Drop schema""" - logger.info("Running command 'drop_schema' to reset the schema") - migrator_username = os.environ.get("MIGRATOR_USER") - migrator_conn = connect_using_iam(migrator_username) - schema_name = os.environ.get("DB_SCHEMA") - drop_schema(migrator_conn, schema_name) - - return {"success": True} - -def check_search_path(migrator_conn: Connection, schema_name: str): - logger.info("Checking that search path is %s", schema_name) - assert migrator_conn.run("SHOW search_path") == [[schema_name]] - - -def check_migrator_create_table(migrator_conn: Connection, app_username: str): - logger.info("Checking that migrator is able to create tables and grant access to app user: %s", app_username) - migrator_conn.run("CREATE TABLE IF NOT EXISTS temporary(created_at TIMESTAMP)") - migrator_conn.run(f"GRANT ALL PRIVILEGES ON temporary TO {identifier(app_username)}") - - -def check_app_use_table(app_conn: Connection): - logger.info("Checking that app is able to read and write from the table") - app_conn.run("INSERT INTO temporary (created_at) VALUES (NOW())") - app_conn.run("SELECT * FROM temporary") - - -def cleanup_migrator_drop_table(migrator_conn: Connection): - logger.info("Cleaning up the table that migrator created") - migrator_conn.run("DROP TABLE IF EXISTS temporary") - -def drop_schema(migrator_conn: Connection, schema_name: str): - logger.info(f"Dropping schema: {schema_name}") - migrator_conn.run(f"DROP SCHEMA IF EXISTS {identifier(schema_name)} CASCADE") - -def connect_as_master_user() -> Connection: - user = os.environ["DB_USER"] - host = os.environ["DB_HOST"] - port = os.environ["DB_PORT"] - database = os.environ["DB_NAME"] - param_name = os.environ["DB_PASSWORD_PARAM_NAME"] - password = get_password(param_name) - - logger.info("Connecting to database: user=%s host=%s port=%s database=%s", user, host, port, database) - return Connection(user=user, host=host, port=port, database=database, password=password, ssl_context=True) - - -def connect_using_iam(user: str) -> Connection: - client = boto3.client("rds") - host = os.environ["DB_HOST"] - port = os.environ["DB_PORT"] - database = os.environ["DB_NAME"] - token = client.generate_db_auth_token( - DBHostname=host, Port=port, DBUsername=user - ) - logger.info("Connecting to database: user=%s host=%s port=%s database=%s", user, host, port, database) - return Connection(user=user, host=host, port=port, database=database, password=token, ssl_context=True) - -def get_password(param_name: str) -> str: - raw_result = get_ssm_param(param_name) - - # RDS managed secrets via secrets manager will be a JSON payload with - # "username" and "password" keys, so if can parse as JSON, we'll try to - # extract just the password, otherwise return the param value as-is - try: - parsed_result = json.loads(raw_result) - except json.JSONDecodeError: - return raw_result - - if not isinstance(parsed_result, dict): - return raw_result - - return parsed_result.get("password") - -def get_ssm_param(param_name: str) -> str: - ssm = boto3.client("ssm", region_name=os.environ["AWS_REGION"]) - logger.info(f"Fetching from parameter store: {param_name}") - result = ssm.get_parameter( - Name=param_name, - WithDecryption=True, - ) - return result["Parameter"]["Value"] - - -def get_roles(conn: Connection) -> list[str]: - return [row[0] for row in conn.run("SELECT rolname " - "FROM pg_roles " - "WHERE rolname NOT LIKE 'pg_%' " - "AND rolname NOT LIKE 'rds%'")] - - -def get_roles_with_groups(conn: Connection) -> dict[str, str]: - roles_groups = conn.run("SELECT u.rolname AS user, g.rolname AS group \ - FROM pg_roles u \ - INNER JOIN pg_auth_members a ON u.oid = a.member \ - INNER JOIN pg_roles g ON g.oid = a.roleid \ - ORDER BY user ASC") - - result = {} - for user, groups in itertools.groupby(roles_groups, itemgetter(0)): - result[user] = ",".join(map(itemgetter(1), groups)) - return result - - -# Get schema access control lists. The format of the ACLs is abbreviated. To interpret -# what the ACLs mean, see the Postgres documentation on Privileges: -# https://www.postgresql.org/docs/current/ddl-priv.html -def get_schema_privileges(conn: Connection) -> list[tuple[str, str]]: - return [(row[0], row[1]) for row in conn.run("SELECT nspname, nspacl \ - FROM pg_namespace \ - WHERE nspname NOT LIKE 'pg_%' \ - AND nspname <> 'information_schema'")] - - -def configure_database(conn: Connection) -> None: - logger.info("Configuring database") - app_username = os.environ.get("APP_USER") - migrator_username = os.environ.get("MIGRATOR_USER") - schema_name = os.environ.get("DB_SCHEMA") - database_name = os.environ.get("DB_NAME") - grant_app_user_iam = os.environ.get("GRANT_APP_USER_IAM") == "true" - allow_app_manage_schema = os.environ.get("ALLOW_APP_MANAGE_SCHEMA") == "true" - - # If an app user password is defined, retrieve it - app_user_password = None - if app_user_password_param_name := os.environ.get("APP_PASSWORD_PARAM_NAME"): - app_user_password = get_password(app_user_password_param_name) - - logger.info("Revoking default access on public schema") - conn.run("REVOKE CREATE ON SCHEMA public FROM PUBLIC") - logger.info("Revoking database access from public role") - conn.run(f"REVOKE ALL ON DATABASE {identifier(database_name)} FROM PUBLIC") - logger.info("Setting default search path to schema=%s", schema_name) - conn.run(f"ALTER DATABASE {identifier(database_name)} SET search_path TO {identifier(schema_name)}") - - logger.info("Configuring roles") - configure_role(conn, migrator_username, database_name) - configure_role(conn, app_username, database_name, app_user_password, grant_app_user_iam) - configure_schema(conn, schema_name, migrator_username, app_username, allow_app_manage_schema) - - -def configure_role(conn: Connection, username: str, database_name: str, password: Optional[str] = None, grant_iam_role: bool = True) -> None: - logger.info(f"Configuring role: username={username}") - - if not password: - logger.info(f"Create role without password: {username=}") - conn.run( - f""" - DO $$ - BEGIN - CREATE USER {identifier(username)}; - EXCEPTION WHEN DUPLICATE_OBJECT THEN - RAISE NOTICE 'user already exists'; - END - $$; - """ - ) - else: - logger.info("Create role with password: username=%s", username) - conn.run( - f""" - DO $$ - BEGIN - CREATE USER {identifier(username)} WITH PASSWORD {literal(password)}; - EXCEPTION WHEN DUPLICATE_OBJECT THEN - RAISE NOTICE 'user already exists'; - ALTER ROLE {identifier(username)} WITH PASSWORD {literal(password)}; - END - $$; - """ - ) - - role = "rds_iam" - if grant_iam_role: - logger.info(f"Grant role: {role=}, {username=}") - conn.run(f"GRANT {identifier(role)} TO {identifier(username)}") - else: - logger.info(f"Revoke role: {role=}, {username=}") - conn.run(f"REVOKE {identifier(role)} FROM {identifier(username)}") - - logger.info(f"Grant connect: {database_name=}, {username=}") - conn.run(f"GRANT CONNECT ON DATABASE {identifier(database_name)} TO {identifier(username)}") - -def configure_schema(conn: Connection, schema_name: str, migrator_username: str, app_username: str, allow_app_manage_schema: bool = False) -> None: - logger.info("Configuring schema") - - logger.info("Creating schema: schema_name=%s", schema_name) - conn.run(f"CREATE SCHEMA IF NOT EXISTS {identifier(schema_name)}") - - logger.info("Changing schema owner: schema_name=%s owner=%s", schema_name, migrator_username) - conn.run(f"ALTER SCHEMA {identifier(schema_name)} OWNER TO {identifier(migrator_username)}") - - # Need pgcrypto for uuid generation in Rails - logger.info("Creating pgcrypto extension") - conn.run("CREATE EXTENSION IF NOT EXISTS pgcrypto") - - # In most cases, the `app` role only needs USAGE access. This is the default. - # In some cases, it needs to manage the schema, so we optionally grant ALL access. - schema_privileges = "USAGE" - if allow_app_manage_schema: - schema_privileges = "ALL" - # Grant the `app` role any default privileges for future database objects. - logger.info(f"Granting schema privileges for future objects: {schema_name=} role={app_username} {schema_privileges=}") - conn.run( - f"ALTER DEFAULT PRIVILEGES IN SCHEMA {identifier(schema_name)} GRANT {identifier(schema_privileges)} ON TABLES TO {identifier(app_username)}" - ) - conn.run( - f"ALTER DEFAULT PRIVILEGES IN SCHEMA {identifier(schema_name)} GRANT {identifier(schema_privileges)} ON SEQUENCES TO {identifier(app_username)}" - ) - conn.run( - f"ALTER DEFAULT PRIVILEGES IN SCHEMA {identifier(schema_name)} GRANT {identifier(schema_privileges)} ON ROUTINES TO {identifier(app_username)}" - ) - - # Grant the `app` role privileges. - logger.info(f"Granting schema privileges: {schema_name=} role={app_username} {schema_privileges=}") - conn.run(f"GRANT {schema_privileges} ON SCHEMA {identifier(schema_name)} TO {identifier(app_username)}") - - -def print_roles(roles: list[str]) -> None: - logger.info("Roles") - for role in roles: - logger.info(f"Role info: name={role}") - - -def print_schema_privileges(schema_privileges: list[tuple[str, str]]) -> None: - logger.info("Schema privileges") - for schema_name, schema_acl in schema_privileges: - logger.info(f"Schema info: name={schema_name} acl={schema_acl}") \ No newline at end of file + action = event["action"] + assert action in ("check", "manage") + if action == "check": + return check(event["config"]) + elif action == "manage": + return manage(event["config"]) diff --git a/infra/modules/database/variables.tf b/infra/modules/database/variables.tf index b440bab8..fd775303 100644 --- a/infra/modules/database/variables.tf +++ b/infra/modules/database/variables.tf @@ -1,25 +1,16 @@ -variable "name" { - description = "name of the database cluster. Note that this is not the name of the Postgres database itself, but the name of the cluster in RDS. The name of the Postgres database is set in module and defaults to 'app'." - type = string - validation { - condition = can(regex("^[-_\\da-z]+$", var.name)) - error_message = "use only lower case letters, numbers, dashes, and underscores" - } -} - variable "app_access_policy_name" { description = "name of the IAM policy to create that will provide the service the ability to connect to the database as a user that will have read/write access." type = string } -variable "migrator_access_policy_name" { - description = "name of the IAM policy to create that will provide the migration task the ability to connect to the database as a user that will have read/write access." +variable "app_username" { + description = "name of the database user to create that will be for the application." type = string } -variable "app_username" { - description = "name of the database user to create that will be for the application." +variable "aws_services_security_group_id" { type = string + description = "Security group ID for VPC endpoints that access AWS Services" } variable "create_app_password" { @@ -28,26 +19,43 @@ variable "create_app_password" { default = false } -variable "grant_app_user_iam" { - description = "grant application user the rds_iam role. used when authenticating via IAM in an application." - type = bool - default = true +variable "database_name" { + description = "the name of the Postgres database. Defaults to 'app'." + default = "app" + validation { + condition = can(regex("^[_\\da-z]+$", var.database_name)) + error_message = "use only lower case letters, numbers, and underscores (no dashes)" + } +} + +variable "database_subnet_group_name" { + type = string + description = "Name of database subnet group" } -variable "allow_app_manage_schema" { - description = "whether to grant the application user greater permissions to manage the DB schema." +variable "is_temporary" { + description = "Whether the service is meant to be spun up temporarily (e.g. for automated infra tests). This is used to disable deletion protection." type = bool default = false } +variable "migrator_access_policy_name" { + description = "name of the IAM policy to create that will provide the migration task the ability to connect to the database as a user that will have read/write access." + type = string +} + variable "migrator_username" { description = "name of the database user to create that will be for the role that will run database migrations." type = string } -variable "schema_name" { - description = "name of the Postgres schema to create that will be the schema the application will use (rather than using the public schema)" +variable "name" { + description = "name of the database cluster. Note that this is not the name of the Postgres database itself, but the name of the cluster in RDS. The name of the Postgres database is set in module and defaults to 'app'." type = string + validation { + condition = can(regex("^[-_\\da-z]+$", var.name)) + error_message = "use only lower case letters, numbers, dashes, and underscores" + } } variable "port" { @@ -55,31 +63,17 @@ variable "port" { default = 5432 } -variable "database_name" { - description = "the name of the Postgres database. Defaults to 'app'." - default = "app" - validation { - condition = can(regex("^[_\\da-z]+$", var.database_name)) - error_message = "use only lower case letters, numbers, and underscores (no dashes)" - } -} - -variable "vpc_id" { - type = string - description = "Uniquely identifies the VPC." -} - -variable "database_subnet_group_name" { - type = string - description = "Name of database subnet group" -} - variable "private_subnet_ids" { type = list(any) description = "list of private subnet IDs to put the role provisioner and role checker lambda functions in" } -variable "aws_services_security_group_id" { +variable "schema_name" { + description = "name of the Postgres schema to create that will be the schema the application will use (rather than using the public schema)" type = string - description = "Security group ID for VPC endpoints that access AWS Services" +} + +variable "vpc_id" { + type = string + description = "Uniquely identifies the VPC." } diff --git a/infra/modules/domain/outputs.tf b/infra/modules/domain/outputs.tf index bf1615ec..7c0b5184 100644 --- a/infra/modules/domain/outputs.tf +++ b/infra/modules/domain/outputs.tf @@ -1,9 +1,9 @@ -output "hosted_zone_name_servers" { - value = length(aws_route53_zone.zone) > 0 ? aws_route53_zone.zone[0].name_servers : [] -} - output "certificate_arns" { value = { for domain in keys(var.certificate_configs) : domain => aws_acm_certificate.issued[domain].arn } } + +output "hosted_zone_name_servers" { + value = length(aws_route53_zone.zone) > 0 ? aws_route53_zone.zone[0].name_servers : [] +} diff --git a/infra/modules/domain/variables.tf b/infra/modules/domain/variables.tf index 5532c852..2aa58955 100644 --- a/infra/modules/domain/variables.tf +++ b/infra/modules/domain/variables.tf @@ -1,13 +1,3 @@ -variable "name" { - type = string - description = "Fully qualified domain name" -} - -variable "manage_dns" { - type = bool - description = "Whether DNS is managed by the project (true) or managed externally (false)" -} - variable "certificate_configs" { type = map(object({ source = string @@ -20,10 +10,10 @@ variable "certificate_configs" { For each domain's certificate: `source` indicates whether the certificate is managed by the project using AWS Certificate Manager (issued) or imported from an external source (imported) - + `private_key` and `certificate_body` describe the certificate information for imported certificates, which is required if `source` is 'imported'. - EOT + EOT validation { condition = alltrue([ @@ -49,3 +39,13 @@ variable "certificate_configs" { error_message = "certificate_config.certificate_body is required if certificate_config.source is 'imported'" } } + +variable "manage_dns" { + type = bool + description = "Whether DNS is managed by the project (true) or managed externally (false)" +} + +variable "name" { + type = string + description = "Fully qualified domain name" +} diff --git a/infra/modules/feature-flags/logs.tf b/infra/modules/feature-flags/logs.tf index dc639b72..8d7dc7ea 100644 --- a/infra/modules/feature-flags/logs.tf +++ b/infra/modules/feature-flags/logs.tf @@ -2,7 +2,16 @@ data "aws_caller_identity" "current" {} data "aws_region" "current" {} resource "aws_cloudwatch_log_group" "logs" { - name = "feature-flags/${local.evidently_project_name}" + # Prefix log group name with /aws/vendedlogs/ to handle situations where the resource policy + # that AWS automatically creates to allow Evidently to send logs to CloudWatch exceeds the + # 5120 character limit. + # see https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AWS-logs-and-resource-policy.html#AWS-vended-logs-permissions + # see https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_iam-quotas.html#reference_iam-quotas-entity-length + # + # Note that manually creating resource policies is also not ideal, as there is a quote of + # up to 10 CloudWatch Logs resource policies per Region per account, which can't be changed. + # see https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/cloudwatch_limits_cwl.html + name = "/aws/vendedlogs/feature-flags/${local.evidently_project_name}" # checkov:skip=CKV_AWS_158:Feature flag evaluation logs are not sensitive @@ -10,38 +19,3 @@ resource "aws_cloudwatch_log_group" "logs" { # Looser requirements may allow shorter retention periods retention_in_days = 1827 } - -# Manually create policy allowing AWS services to deliver logs to this log group -# so that the automatically created one by AWS doesn't exceed the character limit -# see https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AWS-logs-and-resource-policy.html#AWS-vended-logs-permissions -# see https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_iam-quotas.html#reference_iam-quotas-entity-length -resource "aws_cloudwatch_log_resource_policy" "logs" { - policy_name = "/log-delivery/feature-flags/${local.evidently_project_name}-logs" - policy_document = data.aws_iam_policy_document.logs.json -} - -data "aws_iam_policy_document" "logs" { - statement { - sid = "AWSLogDeliveryWrite" - effect = "Allow" - principals { - type = "Service" - identifiers = ["delivery.logs.amazonaws.com"] - } - actions = [ - "logs:CreateLogStream", - "logs:PutLogEvents", - ] - resources = ["${aws_cloudwatch_log_group.logs.arn}:log-stream:*"] - condition { - test = "StringEquals" - variable = "aws:SourceAccount" - values = [data.aws_caller_identity.current.account_id] - } - condition { - test = "ArnLike" - variable = "aws:SourceArn" - values = ["arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*"] - } - } -} diff --git a/infra/modules/feature-flags/main.tf b/infra/modules/feature-flags/main.tf index 3cd11f3e..d450c34f 100644 --- a/infra/modules/feature-flags/main.tf +++ b/infra/modules/feature-flags/main.tf @@ -10,9 +10,6 @@ resource "aws_evidently_project" "feature_flags" { log_group = aws_cloudwatch_log_group.logs.name } } - # Make sure the resource policy is created first so that AWS doesn't try to - # automatically create one - depends_on = [aws_cloudwatch_log_resource_policy.logs] } resource "aws_evidently_feature" "feature_flag" { diff --git a/infra/modules/feature-flags/outputs.tf b/infra/modules/feature-flags/outputs.tf index 51bb64f5..5269dd6c 100644 --- a/infra/modules/feature-flags/outputs.tf +++ b/infra/modules/feature-flags/outputs.tf @@ -1,9 +1,9 @@ -output "evidently_project_name" { - description = "Name of AWS Evidently feature flags project" - value = local.evidently_project_name -} - output "access_policy_arn" { description = "Policy that allows access to query feature flag values" value = aws_iam_policy.access_policy.arn } + +output "evidently_project_name" { + description = "Name of AWS Evidently feature flags project" + value = local.evidently_project_name +} diff --git a/infra/modules/feature-flags/variables.tf b/infra/modules/feature-flags/variables.tf index a04f471b..0c395a1f 100644 --- a/infra/modules/feature-flags/variables.tf +++ b/infra/modules/feature-flags/variables.tf @@ -1,9 +1,9 @@ -variable "service_name" { - type = string - description = "The name of the service that the feature flagging system will be associated with" -} - variable "feature_flags" { type = set(string) description = "A set of feature flag names" } + +variable "service_name" { + type = string + description = "The name of the service that the feature flagging system will be associated with" +} diff --git a/infra/modules/identity-provider-client/resources/access-control.tf b/infra/modules/identity-provider-client/resources/access-control.tf new file mode 100644 index 00000000..3e4bb9e3 --- /dev/null +++ b/infra/modules/identity-provider-client/resources/access-control.tf @@ -0,0 +1,15 @@ +data "aws_caller_identity" "current" {} +data "aws_region" "current" {} + +resource "aws_iam_policy" "identity_access" { + name = "${var.name}-identity-access" + policy = data.aws_iam_policy_document.identity_access.json +} + +data "aws_iam_policy_document" "identity_access" { + statement { + actions = ["cognito-idp:*"] + effect = "Allow" + resources = ["arn:aws:cognito-idp:${data.aws_region.current.name}:${data.aws_caller_identity.current.id}:userpool/${var.user_pool_id}"] + } +} diff --git a/infra/modules/identity-provider-client/resources/main.tf b/infra/modules/identity-provider-client/resources/main.tf new file mode 100644 index 00000000..7c65292d --- /dev/null +++ b/infra/modules/identity-provider-client/resources/main.tf @@ -0,0 +1,37 @@ +resource "aws_cognito_user_pool_client" "client" { + name = var.name + user_pool_id = var.user_pool_id + + callback_urls = var.callback_urls + logout_urls = var.logout_urls + supported_identity_providers = ["COGNITO"] + refresh_token_validity = 1 + access_token_validity = 60 + id_token_validity = 60 + token_validity_units { + refresh_token = "days" + access_token = "minutes" + id_token = "minutes" + } + + generate_secret = true + allowed_oauth_flows_user_pool_client = true + allowed_oauth_flows = ["code"] + allowed_oauth_scopes = ["phone", "email", "openid", "profile"] + explicit_auth_flows = ["ALLOW_ADMIN_USER_PASSWORD_AUTH", "ALLOW_REFRESH_TOKEN_AUTH"] + + # Avoid security issue where error messages indicate when a user doesn't exist + prevent_user_existence_errors = "ENABLED" + + enable_token_revocation = true + enable_propagate_additional_user_context_data = false + + read_attributes = ["email", "email_verified", "phone_number", "phone_number_verified", "updated_at"] + write_attributes = ["email", "updated_at", "phone_number"] +} + +resource "aws_ssm_parameter" "client_secret" { + name = "/${var.name}/identity-provider/client-secret" + type = "SecureString" + value = aws_cognito_user_pool_client.client.client_secret +} diff --git a/infra/modules/identity-provider-client/resources/outputs.tf b/infra/modules/identity-provider-client/resources/outputs.tf new file mode 100644 index 00000000..3f339506 --- /dev/null +++ b/infra/modules/identity-provider-client/resources/outputs.tf @@ -0,0 +1,14 @@ +output "access_policy_arn" { + description = "The arn for the IAM access policy granting access to the user pool" + value = aws_iam_policy.identity_access.arn +} + +output "client_id" { + description = "The ID of the user pool client" + value = aws_cognito_user_pool_client.client.id +} + +output "client_secret_arn" { + description = "The arn for the SSM parameter storing the user pool client secret" + value = aws_ssm_parameter.client_secret.arn +} diff --git a/infra/modules/identity-provider-client/resources/variables.tf b/infra/modules/identity-provider-client/resources/variables.tf new file mode 100644 index 00000000..5ac6e811 --- /dev/null +++ b/infra/modules/identity-provider-client/resources/variables.tf @@ -0,0 +1,21 @@ +variable "callback_urls" { + type = list(string) + description = "The URL(s) that the identity provider will redirect to after a successful login" + default = [] +} + +variable "user_pool_id" { + type = string + description = "The ID of the user pool that the client will be associated with" +} + +variable "logout_urls" { + type = list(string) + description = "The URL that the identity provider will redirect to after a successful logout" + default = [] +} + +variable "name" { + type = string + description = "Name of the application or service that will act as a client to the identity provider" +} diff --git a/infra/modules/identity-provider/data/main.tf b/infra/modules/identity-provider/data/main.tf new file mode 100644 index 00000000..f3f2ed71 --- /dev/null +++ b/infra/modules/identity-provider/data/main.tf @@ -0,0 +1,6 @@ +############################################################################################ +## A module for retrieving an existing Cognito User Pool +############################################################################################ +data "aws_cognito_user_pools" "existing_user_pools" { + name = var.name +} diff --git a/infra/modules/identity-provider/data/outputs.tf b/infra/modules/identity-provider/data/outputs.tf new file mode 100644 index 00000000..51dd598f --- /dev/null +++ b/infra/modules/identity-provider/data/outputs.tf @@ -0,0 +1,4 @@ +output "user_pool_id" { + description = "The ID of the user pool." + value = tolist(data.aws_cognito_user_pools.existing_user_pools.ids)[0] +} diff --git a/infra/modules/identity-provider/data/variables.tf b/infra/modules/identity-provider/data/variables.tf new file mode 100644 index 00000000..83c24d80 --- /dev/null +++ b/infra/modules/identity-provider/data/variables.tf @@ -0,0 +1,4 @@ +variable "name" { + type = string + description = "The name of an existing cognito user pool" +} diff --git a/infra/modules/identity-provider/resources/main.tf b/infra/modules/identity-provider/resources/main.tf new file mode 100644 index 00000000..70096f4f --- /dev/null +++ b/infra/modules/identity-provider/resources/main.tf @@ -0,0 +1,85 @@ +############################################################################################ +## A module for configuring a Cognito User Pool +## - Configures for email, but not SMS +## - Configures MFA +############################################################################################ + +data "aws_ses_email_identity" "sender" { + count = var.sender_email != null ? 1 : 0 + email = var.sender_email +} + +resource "aws_cognito_user_pool" "main" { + name = var.name + + # Use a separate line to support automated terraform destroy commands + deletion_protection = var.is_temporary ? "INACTIVE" : "ACTIVE" + + username_attributes = ["email"] + auto_verified_attributes = ["email"] + + account_recovery_setting { + recovery_mechanism { + name = "verified_email" + priority = 1 + } + } + + device_configuration { + challenge_required_on_new_device = true + device_only_remembered_on_user_prompt = true + } + + email_configuration { + # Use this SES email to send cognito emails. If we're not using SES for emails then use null. + # Optionally configures the FROM address and the REPLY-TO address. + # Optionally configures using the Cognito default email or using SES. + source_arn = var.sender_email != null ? data.aws_ses_email_identity.sender[0].arn : null + email_sending_account = var.sender_email != null ? "DEVELOPER" : "COGNITO_DEFAULT" + # Customize the name that users see in the "From" section of their inbox, so that it's clearer who the email is from. + # This name also needs to be updated manually in the Cognito console for each environment's Advanced Security emails. + from_email_address = var.sender_email != null ? (var.sender_display_name != null ? "${var.sender_display_name} <${var.sender_email}>" : var.sender_email) : null + reply_to_email_address = var.reply_to_email != null ? var.reply_to_email : null + } + + password_policy { + minimum_length = var.password_minimum_length + temporary_password_validity_days = var.temporary_password_validity_days + } + + mfa_configuration = "OPTIONAL" + software_token_mfa_configuration { + enabled = true + } + + user_pool_add_ons { + advanced_security_mode = "AUDIT" + } + + username_configuration { + case_sensitive = false + } + + user_attribute_update_settings { + attributes_require_verification_before_update = ["email"] + } + + schema { + name = "email" + attribute_data_type = "String" + mutable = "true" + required = "true" + + string_attribute_constraints { + max_length = 2048 + min_length = 0 + } + } + + # Optionally configures email template for resetting a password + verification_message_template { + default_email_option = "CONFIRM_WITH_CODE" + email_message = var.verification_email_message != null ? var.verification_email_message : null + email_subject = var.verification_email_subject != null ? var.verification_email_subject : null + } +} diff --git a/infra/modules/identity-provider/resources/outputs.tf b/infra/modules/identity-provider/resources/outputs.tf new file mode 100644 index 00000000..03914275 --- /dev/null +++ b/infra/modules/identity-provider/resources/outputs.tf @@ -0,0 +1,4 @@ +output "user_pool_id" { + description = "The ID of the user pool." + value = aws_cognito_user_pool.main.id +} diff --git a/infra/modules/identity-provider/resources/variables.tf b/infra/modules/identity-provider/resources/variables.tf new file mode 100644 index 00000000..f980c705 --- /dev/null +++ b/infra/modules/identity-provider/resources/variables.tf @@ -0,0 +1,52 @@ +variable "is_temporary" { + description = "Whether the service is meant to be spun up temporarily (e.g. for automated infra tests). This is used to disable deletion protection." + type = bool + default = false +} + +variable "name" { + type = string + description = "The name of the Cognito User Pool" +} + +variable "password_minimum_length" { + type = number + description = "The password minimum length" + default = 12 +} + +variable "reply_to_email" { + type = string + description = "Email address used as the REPLY-TO for identity service emails" + default = null +} + +variable "sender_display_name" { + type = string + description = "The display name for the identity service's emails. Only used if sender_email is provided" + default = null +} + +variable "sender_email" { + type = string + description = "Email address to use to send identity provider emails. If none is provided, the identity service will be configured to use Cognito's default email functionality, which should only be relied on outside of production." + default = null +} + +variable "temporary_password_validity_days" { + type = number + description = "The number of days a temporary password is valid for" + default = 7 +} + +variable "verification_email_message" { + type = string + description = "The email body for a password reset email. Must contain the {####} placeholder." + default = null +} + +variable "verification_email_subject" { + type = string + description = "The email subject for a password reset email" + default = null +} diff --git a/infra/modules/monitoring/variables.tf b/infra/modules/monitoring/variables.tf index 9b3d4228..15395a38 100644 --- a/infra/modules/monitoring/variables.tf +++ b/infra/modules/monitoring/variables.tf @@ -1,18 +1,7 @@ -variable "service_name" { - type = string - description = "Name of the service running within ECS cluster" -} - -variable "load_balancer_arn_suffix" { - type = string - description = "The ARN suffix for use with CloudWatch Metrics." -} - variable "email_alerts_subscription_list" { type = set(string) default = [] description = "List of emails to subscribe to alerts" - } variable "incident_management_service_integration_url" { @@ -20,3 +9,13 @@ variable "incident_management_service_integration_url" { default = null description = "URL for integrating with for external incident management services" } + +variable "load_balancer_arn_suffix" { + type = string + description = "The ARN suffix for use with CloudWatch Metrics." +} + +variable "service_name" { + type = string + description = "Name of the service running within ECS cluster" +} diff --git a/infra/modules/network/variables.tf b/infra/modules/network/variables.tf index 98fded3c..6bbb67b3 100644 --- a/infra/modules/network/variables.tf +++ b/infra/modules/network/variables.tf @@ -1,8 +1,3 @@ -variable "name" { - type = string - description = "Name to give the VPC. Will be added to the VPC under the 'network_name' tag." -} - variable "aws_services_security_group_name_prefix" { type = string description = "Prefix for the name of the security group attached to VPC endpoints" @@ -13,6 +8,12 @@ variable "database_subnet_group_name" { description = "Name of the database subnet group" } +variable "enable_command_execution" { + type = bool + description = "Whether the application(s) in this network need ECS Exec access. Determines whether to create VPC endpoints needed by ECS Exec." + default = false +} + variable "has_database" { type = bool description = "Whether the application(s) in this network have a database. Determines whether to create VPC endpoints needed by the database layer." @@ -31,8 +32,7 @@ variable "single_nat_gateway" { default = false } -variable "enable_command_execution" { - type = bool - description = "Whether the application(s) in this network need ECS Exec access. Determines whether to create VPC endpoints needed by ECS Exec." - default = false +variable "name" { + type = string + description = "Name to give the VPC. Will be added to the VPC under the 'network_name' tag." } diff --git a/infra/modules/secret/main.tf b/infra/modules/secret/main.tf new file mode 100644 index 00000000..8619c86e --- /dev/null +++ b/infra/modules/secret/main.tf @@ -0,0 +1,26 @@ +locals { + secret = var.manage_method == "generated" ? aws_ssm_parameter.secret[0] : data.aws_ssm_parameter.secret[0] + access_policy_name = "${trimprefix(replace(local.secret.name, "/", "-"), "/")}-access" +} + +resource "random_password" "secret" { + count = var.manage_method == "generated" ? 1 : 0 + + length = 64 + special = true + override_special = "!#$%&*()-_=+[]{}<>:?" +} + +resource "aws_ssm_parameter" "secret" { + count = var.manage_method == "generated" ? 1 : 0 + + name = var.secret_store_name + type = "SecureString" + value = random_password.secret[0].result +} + +data "aws_ssm_parameter" "secret" { + count = var.manage_method == "manual" ? 1 : 0 + + name = var.secret_store_name +} diff --git a/infra/modules/secret/outputs.tf b/infra/modules/secret/outputs.tf new file mode 100644 index 00000000..57ebfcf8 --- /dev/null +++ b/infra/modules/secret/outputs.tf @@ -0,0 +1,3 @@ +output "secret_arn" { + value = local.secret.arn +} diff --git a/infra/modules/secret/variables.tf b/infra/modules/secret/variables.tf new file mode 100644 index 00000000..44d28795 --- /dev/null +++ b/infra/modules/secret/variables.tf @@ -0,0 +1,22 @@ +variable "manage_method" { + type = string + description = < 0 ? aws_iam_role.migrator_task[0].arn : null } + +output "public_endpoint" { + description = "The public endpoint for the service." + value = "http://${aws_lb.alb.dns_name}" +} diff --git a/infra/modules/service/scheduled_jobs.tf b/infra/modules/service/scheduled_jobs.tf new file mode 100644 index 00000000..27c7a352 --- /dev/null +++ b/infra/modules/service/scheduled_jobs.tf @@ -0,0 +1,86 @@ +resource "aws_scheduler_schedule" "scheduled_jobs" { + for_each = var.scheduled_jobs + + # TODO(https://github.com/navapbc/template-infra/issues/164) Encrypt with customer managed KMS key + # checkov:skip=CKV_AWS_297:Encrypt with customer key in future work + + name = "${var.service_name}-${each.key}" + state = "ENABLED" + schedule_expression = each.value.schedule_expression + schedule_expression_timezone = "Etc/UTC" + + flexible_time_window { + mode = "OFF" + } + + # target is the state machine + target { + arn = aws_sfn_state_machine.scheduled_jobs[each.key].arn + role_arn = aws_iam_role.scheduler.arn + + retry_policy { + maximum_retry_attempts = 0 + } + } +} + +resource "aws_sfn_state_machine" "scheduled_jobs" { + for_each = var.scheduled_jobs + + name = "${var.service_name}-${each.key}" + role_arn = aws_iam_role.workflow_orchestrator.arn + + definition = jsonencode({ + "StartAt" : "RunTask", + "States" : { + "RunTask" : { + "Type" : "Task", + # docs: https://docs.aws.amazon.com/step-functions/latest/dg/connect-ecs.html + "Resource" : "arn:aws:states:::ecs:runTask.sync", + "Parameters" : { + "Cluster" : aws_ecs_cluster.cluster.arn, + "TaskDefinition" : aws_ecs_task_definition.app.arn, + "LaunchType" : "FARGATE", + "NetworkConfiguration" : { + "AwsvpcConfiguration" : { + "Subnets" : var.private_subnet_ids, + "SecurityGroups" : [aws_security_group.app.id], + } + }, + "Overrides" : { + "ContainerOverrides" : [ + { + "Name" : var.service_name, + "Command" : each.value.task_command + } + ] + } + }, + "End" : true + } + } + }) + + logging_configuration { + log_destination = "${aws_cloudwatch_log_group.scheduled_jobs[each.key].arn}:*" + include_execution_data = true + level = "ERROR" + } + + tracing_configuration { + enabled = true + } +} + +resource "aws_cloudwatch_log_group" "scheduled_jobs" { + for_each = var.scheduled_jobs + + name_prefix = "/aws/vendedlogs/states/${var.service_name}-${each.key}" + + # Conservatively retain logs for 5 years. + # Looser requirements may allow shorter retention periods + retention_in_days = 1827 + + # TODO(https://github.com/navapbc/template-infra/issues/164) Encrypt with customer managed KMS key + # checkov:skip=CKV_AWS_158:Encrypt service logs with customer key in future work +} diff --git a/infra/modules/service/scheduler_role.tf b/infra/modules/service/scheduler_role.tf new file mode 100644 index 00000000..769d7b36 --- /dev/null +++ b/infra/modules/service/scheduler_role.tf @@ -0,0 +1,61 @@ +#---------------------- +# Schedule Manager Role +#---------------------- +# This role and policy are used by EventBridge to manage the scheduled jobs. + +resource "aws_iam_role" "scheduler" { + name = "${var.service_name}-scheduler" + managed_policy_arns = [aws_iam_policy.scheduler.arn] + assume_role_policy = data.aws_iam_policy_document.scheduler_assume_role.json +} + +data "aws_iam_policy_document" "scheduler_assume_role" { + statement { + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["scheduler.amazonaws.com"] + } + } +} + +resource "aws_iam_policy" "scheduler" { + name = "${var.service_name}-scheduler" + policy = data.aws_iam_policy_document.scheduler.json +} + +data "aws_iam_policy_document" "scheduler" { + + statement { + sid = "StepFunctionsEvents" + actions = [ + "events:PutTargets", + "events:PutRule", + "events:DescribeRule", + ] + resources = ["arn:aws:events:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule"] + } + + dynamic "statement" { + for_each = aws_sfn_state_machine.scheduled_jobs + + content { + actions = [ + "states:StartExecution", + ] + resources = [statement.value.arn] + } + } + + dynamic "statement" { + for_each = aws_sfn_state_machine.scheduled_jobs + + content { + actions = [ + "states:DescribeExecution", + "states:StopExecution", + ] + resources = ["${statement.value.arn}:*"] + } + } +} diff --git a/infra/modules/service/secrets.tf b/infra/modules/service/secrets.tf deleted file mode 100644 index 29a276a4..00000000 --- a/infra/modules/service/secrets.tf +++ /dev/null @@ -1,14 +0,0 @@ -locals { - secrets = [ - for secret in var.secrets : - { - name = secret.name, - valueFrom = secret.ssm_param_name - } - ] - - secret_arn_patterns = [ - for secret in var.secrets : - "arn:aws:ssm:*:*:parameter/${trimprefix(secret.ssm_param_name, "/")}" - ] -} diff --git a/infra/modules/service/variables.tf b/infra/modules/service/variables.tf index c77940be..9f68430e 100644 --- a/infra/modules/service/variables.tf +++ b/infra/modules/service/variables.tf @@ -1,15 +1,6 @@ -variable "service_name" { - description = "name of the service, to be used for infra structure resource naming" - validation { - condition = can(regex("^[-_\\da-z]+$", var.service_name)) - error_message = "use only lower case letters, numbers, dashes, and underscores" - } -} - -variable "domain_name" { +variable "aws_services_security_group_id" { type = string - description = "The fully qualified domain name for the application" - default = null + description = "Security group ID for VPC endpoints that access AWS Services" } variable "certificate_arn" { @@ -18,31 +9,10 @@ variable "certificate_arn" { default = null } -variable "hosted_zone_id" { - type = string - description = "The Route53 hosted zone id for the domain" - default = null -} - -variable "image_tag" { - type = string - description = "The tag of the image to deploy" -} - -variable "image_repository_account_id" { - type = string - description = "The account ID that contains the container image repository" -} - -variable "image_repository_name" { - type = string - description = "The name of the container image repository" -} - -variable "desired_instance_count" { +variable "container_port" { type = number - description = "Number of instances of the task definition to place and keep running." - default = 1 + description = "The port number on the container that's bound to the user-specified" + default = 8000 } variable "cpu" { @@ -51,59 +21,6 @@ variable "cpu" { description = "Number of cpu units used by the task, expessed as an integer value, e.g 512 " } -variable "memory" { - type = number - default = 512 - description = "Amount (in MiB) of memory used by the task. e.g. 2048" -} - -variable "enable_command_execution" { - type = bool - default = false - description = "Whether the service should enable ECS Exec, such as for debugging" -} - -variable "container_port" { - type = number - description = "The port number on the container that's bound to the user-specified" - default = 8000 -} - -variable "vpc_id" { - type = string - description = "Uniquely identifies the VPC." -} - -variable "public_subnet_ids" { - type = list(any) - description = "Public subnet ids in VPC" -} - -variable "private_subnet_ids" { - type = list(any) - description = "Private subnet ids in VPC" -} - -variable "aws_services_security_group_id" { - type = string - description = "Security group ID for VPC endpoints that access AWS Services" -} - -variable "extra_environment_variables" { - type = map(string) - description = "Additional environment variables to pass to the service container. Map from environment variable name to the value." - default = {} -} - -variable "secrets" { - type = set(object({ - name = string - ssm_param_name = string - })) - description = "List of configurations for defining environment variables that pull from SSM parameter store" - default = [] -} - variable "db_vars" { description = "Variables for integrating the app service with a database" type = object({ @@ -121,6 +38,30 @@ variable "db_vars" { default = null } +variable "desired_instance_count" { + type = number + description = "Number of instances of the task definition to place and keep running." + default = 1 +} + +variable "domain_name" { + type = string + description = "The fully qualified domain name for the application" + default = null +} + +variable "enable_command_execution" { + type = bool + default = false + description = "Whether the service should enable ECS Exec, such as for debugging" +} + +variable "extra_environment_variables" { + type = map(string) + description = "Additional environment variables to pass to the service container. Map from environment variable name to the value." + default = {} +} + variable "extra_policies" { description = "Map of extra IAM policies to attach to the service's task role. The map's keys define the resource name in terraform." type = map(string) @@ -133,22 +74,21 @@ variable "file_upload_jobs" { path_prefix = string task_command = list(string) })) - description = <` - and ``. For example if task_command is: + command can optionally include the placeholder values`` + and``. For example if task_command is: ["python", "etl.py", ""] - Then if an object was uploaded to s3://somebucket/path/to/file.txt, the + Then if an object was uploaded tos3://somebucket/path/to/file.txt, the task will execute the command: python etl.py path/to/file.txt @@ -156,7 +96,76 @@ variable "file_upload_jobs" { default = {} } +variable "hosted_zone_id" { + type = string + description = "The Route53 hosted zone id for the domain" + default = null +} + +variable "image_repository_arn" { + type = string + description = "The name of the container image repository" +} + +variable "image_repository_url" { + type = string + description = "The name of the container image repository" +} + +variable "image_tag" { + type = string + description = "The tag of the image to deploy" +} + variable "is_temporary" { description = "Whether the service is meant to be spun up temporarily (e.g. for automated infra tests). This is used to disable deletion protection for the load balancer." type = bool + default = false +} + +variable "memory" { + type = number + default = 512 + description = "Amount (in MiB) of memory used by the task. e.g. 2048" +} + +variable "private_subnet_ids" { + type = list(any) + description = "Private subnet ids in VPC" +} + +variable "public_subnet_ids" { + type = list(any) + description = "Public subnet ids in VPC" +} + +variable "scheduled_jobs" { + description = "Variable for configuration of the step functions scheduled job" + type = map(object({ + task_command = list(string) + schedule_expression = string + })) + default = {} +} + +variable "secrets" { + type = set(object({ + name = string + valueFrom = string + })) + description = "List of configurations for defining environment variables that pull from SSM parameter store" + default = [] +} + +variable "service_name" { + description = "name of the service, to be used for infra structure resource naming" + validation { + condition = can(regex("^[-_\\da-z]+$", var.service_name)) + error_message = "use only lower case letters, numbers, dashes, and underscores" + } +} + +variable "vpc_id" { + type = string + description = "Uniquely identifies the VPC." } diff --git a/infra/modules/service/workflow_orchestrator_role.tf b/infra/modules/service/workflow_orchestrator_role.tf new file mode 100644 index 00000000..f4ea716f --- /dev/null +++ b/infra/modules/service/workflow_orchestrator_role.tf @@ -0,0 +1,109 @@ +#-------------------------------- +# Scheduler Workflow Manager Role +#-------------------------------- +# This role and policy are used by the Step Functions state machine that manages the scheduled jobs workflow. + +resource "aws_iam_role" "workflow_orchestrator" { + name = "${var.service_name}-workflow-orchestrator" + managed_policy_arns = [aws_iam_policy.workflow_orchestrator.arn] + assume_role_policy = data.aws_iam_policy_document.workflow_orchestrator_assume_role.json +} + +data "aws_iam_policy_document" "workflow_orchestrator_assume_role" { + statement { + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["states.amazonaws.com"] + } + condition { + test = "ArnLike" + variable = "aws:SourceArn" + values = ["arn:aws:states:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:stateMachine:*"] + } + + condition { + test = "StringLike" + variable = "aws:SourceAccount" + values = [ + data.aws_caller_identity.current.account_id + ] + } + } +} + +resource "aws_iam_policy" "workflow_orchestrator" { + name = "${var.service_name}-workflow-orchestrator" + policy = data.aws_iam_policy_document.workflow_orchestrator.json +} + +#tfsec:ignore:aws-iam-no-policy-wildcards +data "aws_iam_policy_document" "workflow_orchestrator" { + # checkov:skip=CKV_AWS_111:These permissions are scoped just fine + + statement { + sid = "UnscopeLogsPermissions" + actions = [ + "logs:CreateLogDelivery", + "logs:CreateLogStream", + "logs:GetLogDelivery", + "logs:UpdateLogDelivery", + "logs:DeleteLogDelivery", + "logs:ListLogDeliveries", + "logs:PutLogEvents", + "logs:PutResourcePolicy", + "logs:DescribeResourcePolicies", + "logs:DescribeLogGroups", + ] + resources = ["*"] + } + + statement { + sid = "StepFunctionsEvents" + actions = [ + "events:PutTargets", + "events:PutRule", + "events:DescribeRule", + ] + resources = [ + "arn:aws:events:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:rule/StepFunctionsGetEventsForECSTaskRule", + ] + } + + statement { + effect = "Allow" + actions = ["ecs:RunTask"] + resources = ["${aws_ecs_task_definition.app.arn_without_revision}:*"] + condition { + test = "ArnLike" + variable = "ecs:cluster" + values = [aws_ecs_cluster.cluster.arn] + } + } + + statement { + effect = "Allow" + actions = [ + "ecs:StopTask", + "ecs:DescribeTasks", + ] + resources = ["arn:aws:ecs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:task/${var.service_name}/*"] + condition { + test = "ArnLike" + variable = "ecs:cluster" + values = [aws_ecs_cluster.cluster.arn] + } + } + + + statement { + sid = "PassRole" + actions = [ + "iam:PassRole", + ] + resources = [ + aws_iam_role.task_executor.arn, + aws_iam_role.app_service.arn, + ] + } +} diff --git a/infra/modules/storage/main.tf b/infra/modules/storage/main.tf index 48f465af..5fb7f111 100644 --- a/infra/modules/storage/main.tf +++ b/infra/modules/storage/main.tf @@ -1,6 +1,8 @@ resource "aws_s3_bucket" "storage" { - bucket = var.name - force_destroy = false + bucket = var.name + + # Use a separate line to support automated terraform destroy commands + force_destroy = var.is_temporary # checkov:skip=CKV_AWS_18:TODO(https://github.com/navapbc/template-infra/issues/507) Implement access logging diff --git a/infra/modules/storage/variables.tf b/infra/modules/storage/variables.tf index 0a261c96..61358ba5 100644 --- a/infra/modules/storage/variables.tf +++ b/infra/modules/storage/variables.tf @@ -1,3 +1,9 @@ +variable "is_temporary" { + description = "Whether the service is meant to be spun up temporarily (e.g. for automated infra tests). This is used to disable deletion protection." + type = bool + default = false +} + variable "name" { type = string description = "Name of the AWS S3 bucket. Needs to be globally unique across all regions." diff --git a/infra/modules/terraform-backend-s3/main.tf b/infra/modules/terraform-backend-s3/main.tf index 3b95ca3c..cebb177f 100644 --- a/infra/modules/terraform-backend-s3/main.tf +++ b/infra/modules/terraform-backend-s3/main.tf @@ -52,6 +52,7 @@ resource "aws_s3_bucket" "tf_state" { # Prevent accidental destruction a developer executing terraform destory in the wrong directory. Contains terraform state files. lifecycle { + # Use a separate line to support automated terraform destroy commands prevent_destroy = true } } diff --git a/infra/modules/terraform-backend-s3/outputs.tf b/infra/modules/terraform-backend-s3/outputs.tf index 8ce14d4a..5866610b 100644 --- a/infra/modules/terraform-backend-s3/outputs.tf +++ b/infra/modules/terraform-backend-s3/outputs.tf @@ -1,11 +1,11 @@ -output "tf_state_bucket_name" { - value = aws_s3_bucket.tf_state.bucket +output "tf_locks_table_name" { + value = aws_dynamodb_table.terraform_lock.name } output "tf_log_bucket_name" { value = aws_s3_bucket.tf_log.bucket } -output "tf_locks_table_name" { - value = aws_dynamodb_table.terraform_lock.name +output "tf_state_bucket_name" { + value = aws_s3_bucket.tf_state.bucket } diff --git a/infra/networks/outputs.tf b/infra/networks/outputs.tf index 4c916802..95412c5f 100644 --- a/infra/networks/outputs.tf +++ b/infra/networks/outputs.tf @@ -1,11 +1,11 @@ -output "hosted_zone_name_servers" { - value = module.domain.hosted_zone_name_servers +output "certificate_arns" { + value = module.domain.certificate_arns } output "certificate_domains" { value = keys(local.domain_config.certificate_configs) } -output "certificate_arns" { - value = module.domain.certificate_arns +output "hosted_zone_name_servers" { + value = module.domain.hosted_zone_name_servers } diff --git a/infra/project-config/aws-services.tf b/infra/project-config/aws-services.tf index e4a190a1..ee8ccdce 100644 --- a/infra/project-config/aws-services.tf +++ b/infra/project-config/aws-services.tf @@ -4,6 +4,7 @@ locals { "apigateway", "application-autoscaling", "autoscaling", + "backup", "cloudwatch", "cognito-idp", "dynamodb", @@ -30,6 +31,7 @@ locals { "sns", "ses", "ssm", + "states", "waf-regional", "wafv2", ] diff --git a/infra/project-config/networks.tf b/infra/project-config/networks.tf index fe06d010..f440c9cc 100644 --- a/infra/project-config/networks.tf +++ b/infra/project-config/networks.tf @@ -1,6 +1,7 @@ locals { network_configs = { dev = { + account_name = "nava-ffs" database_subnet_group_name = "dev" domain_config = { @@ -30,6 +31,7 @@ locals { } staging = { + account_name = "staging" database_subnet_group_name = "staging" domain_config = { @@ -41,6 +43,7 @@ locals { } prod = { + account_name = "nava-ffs-prod" database_subnet_group_name = "prod" domain_config = { diff --git a/infra/project-config/outputs.tf b/infra/project-config/outputs.tf index d82461cb..d063022e 100644 --- a/infra/project-config/outputs.tf +++ b/infra/project-config/outputs.tf @@ -1,13 +1,10 @@ -output "project_name" { - value = local.project_name -} - -output "owner" { - value = local.owner +output "aws_services" { + description = "AWS services that this project uses" + value = local.aws_services } -output "code_repository_url" { - value = local.code_repository_url +output "aws_services_security_group_name_prefix" { + value = local.aws_services_security_group_name_prefix } output "code_repository" { @@ -15,11 +12,14 @@ output "code_repository" { description = "The 'org/repo' string of the repo (e.g. 'navapbc/template-infra'). This is extracted from the repo URL (e.g. 'git@github.com:navapbc/template-infra.git' or 'https://github.com/navapbc/template-infra.git')" } +output "code_repository_url" { + value = local.code_repository_url +} + output "default_region" { value = local.default_region } -# Common tags for all accounts and environments output "default_tags" { value = { project = local.project_name @@ -27,7 +27,7 @@ output "default_tags" { repository = local.code_repository_url terraform = true terraform_workspace = terraform.workspace - # description is set in each environments local use key project_description if required. + # description is set in each environments local use key project_description if required. } } @@ -35,15 +35,14 @@ output "github_actions_role_name" { value = local.github_actions_role_name } -output "aws_services" { - description = "AWS services that this project uses" - value = local.aws_services +output "network_configs" { + value = local.network_configs } -output "aws_services_security_group_name_prefix" { - value = local.aws_services_security_group_name_prefix +output "owner" { + value = local.owner } -output "network_configs" { - value = local.network_configs +output "project_name" { + value = local.project_name } diff --git a/infra/test/infra_test.go b/infra/test/infra_test.go index 8f63ced9..a264fbec 100644 --- a/infra/test/infra_test.go +++ b/infra/test/infra_test.go @@ -10,7 +10,6 @@ import ( "github.com/gruntwork-io/terratest/modules/random" "github.com/gruntwork-io/terratest/modules/shell" "github.com/gruntwork-io/terratest/modules/terraform" - "github.com/stretchr/testify/require" ) var uniqueId = strings.ToLower(random.UniqueId()) @@ -103,49 +102,7 @@ func RunEndToEndTests(t *testing.T, terraformOptions *terraform.Options) { fmt.Println("::endgroup::") } -func EnableDestroyService(t *testing.T, terraformOptions *terraform.Options) { - fmt.Println("::group::Set force_destroy = true and prevent_destroy = false for s3 buckets in service layer") - shell.RunCommand(t, shell.Command{ - Command: "sed", - Args: []string{ - "-i.bak", - "s/force_destroy = false/force_destroy = true/g", - "infra/modules/service/access-logs.tf", - }, - WorkingDir: "../../", - }) - shell.RunCommand(t, shell.Command{ - Command: "sed", - Args: []string{ - "-i.bak", - "s/prevent_destroy = true/prevent_destroy = false/g", - "infra/modules/service/access-logs.tf", - }, - WorkingDir: "../../", - }) - shell.RunCommand(t, shell.Command{ - Command: "sed", - Args: []string{ - "-i.bak", - "s/force_destroy = false/force_destroy = true/g", - "infra/modules/storage/main.tf", - }, - WorkingDir: "../../", - }) - - // Clone the options and set targets to only apply to the buckets - terraformOptions, err := terraformOptions.Clone() - require.NoError(t, err) - terraformOptions.Targets = []string{ - "module.service.aws_s3_bucket.access_logs", - "module.storage.aws_s3_bucket.storage", - } - terraform.Apply(t, terraformOptions) - fmt.Println("::endgroup::") -} - func DestroyService(t *testing.T, terraformOptions *terraform.Options) { - EnableDestroyService(t, terraformOptions) fmt.Println("::group::Destroy service layer") terraform.Destroy(t, terraformOptions) fmt.Println("::endgroup::")