From c5a52bd625d73f651ba233707bbf4d034129d443 Mon Sep 17 00:00:00 2001 From: Rohith Jayawardene Date: Fri, 26 Apr 2024 13:56:48 +0100 Subject: [PATCH] [BUGFIX] - Plan & Apply (#1391) When we moved to passing the terraform plan via secret to the apply, a number of things needed fixing --- cmd/step/main.go | 4 +- pkg/assets/job.yaml.tpl | 18 +++++--- pkg/controller/configuration/delete.go | 2 + pkg/controller/configuration/ensure.go | 45 +++++++++++++------ .../configuration/reconcile_test.go | 13 +++--- pkg/utils/jobs/jobs.go | 2 +- test/e2e/check-suite.sh | 3 +- .../e2e/integration/custom-state-backend.bats | 9 +++- test/e2e/integration/destroy.bats | 10 +++++ test/e2e/integration/infracost.bats | 2 +- test/e2e/integration/plan.bats | 28 ++++++++++++ 11 files changed, 104 insertions(+), 32 deletions(-) diff --git a/cmd/step/main.go b/cmd/step/main.go index 567d5cc05..c3f89c92d 100644 --- a/cmd/step/main.go +++ b/cmd/step/main.go @@ -225,8 +225,6 @@ func uploadSecret(ctx context.Context, cc client.Client, namespace, name, path s if err != nil { return err } - original := secret.DeepCopy() - // @step: define the secret if secret.Data == nil { secret.Data = make(map[string][]byte) @@ -238,5 +236,5 @@ func uploadSecret(ctx context.Context, cc client.Client, namespace, name, path s return cc.Create(ctx, secret) } - return cc.Patch(ctx, secret, client.MergeFrom(original)) + return cc.Update(ctx, secret) } diff --git a/pkg/assets/job.yaml.tpl b/pkg/assets/job.yaml.tpl index cb8d8339a..77526c1f8 100644 --- a/pkg/assets/job.yaml.tpl +++ b/pkg/assets/job.yaml.tpl @@ -160,6 +160,10 @@ spec: mountPath: /run {{- end }} + # + # @step: if policy is defined, an external source is defined and the stage is planout + # then we need to retrieve the external source + # {{- if and (.Policy) (not .Policy.Source) (eq .Stage "plan") }} {{- $image := .Images.Executor }} {{- $imagePullPolicy := .ImagePullPolicy }} @@ -195,8 +199,10 @@ spec: args: - --comment=Executing Terraform {{- if eq .Stage "plan" }} - - --command=/bin/terraform plan {{ .TerraformArguments }} -out=/run/plan.out -lock=false -no-color - - --command=/bin/terraform show -json /run/plan.out > /run/plan.json + - --command=/bin/terraform plan {{ .TerraformArguments }} -out=/run/plan.out -lock=false -no-color -input=false + # We need to retain a uncompressed version, for checkov and infracosts + - --command=/bin/terraform show -json /run/plan.out > /run/tfplan.json + - --command=/bin/cp /run/tfplan.json /run/plan.json - --command=/bin/gzip /run/plan.json - --command=/bin/mv /run/plan.json.gz /run/plan.json - --namespace=$(KUBE_NAMESPACE) @@ -204,7 +210,7 @@ spec: - --upload=$(TERRAFORM_PLAN_OUT_NAME)=/run/plan.out {{- end }} {{- if eq .Stage "apply" }} - - --command=/bin/terraform apply -lock=false -no-color /run/plan.out + - --command=/bin/terraform apply {{ .TerraformArguments }} -lock=false -no-color -input=false -auto-approve {{- if .SaveTerraformState }} - --command=/bin/terraform state pull > /run/tfstate - --command=/bin/gzip /run/tfstate @@ -281,8 +287,8 @@ spec: - /run/bin/step args: - --comment=Evaluating the costs - - --command=/usr/bin/infracost breakdown --path /run/plan.json - - --command=/usr/bin/infracost breakdown --path /run/plan.json --format json > /run/costs.json + - --command=/usr/bin/infracost breakdown --path /run/tfplan.json + - --command=/usr/bin/infracost breakdown --path /run/tfplan.json --format json > /run/costs.json - --namespace=$(KUBE_NAMESPACE) - --upload=$(COST_REPORT_NAME)=/run/costs.json - --is-failure=/run/steps/terraform.failed @@ -319,7 +325,7 @@ spec: {{- if and (.Policy) (eq .Stage "plan") }} {{- $configfile := "/run/checkov/checkov.yaml" }} - {{- $options := "--framework terraform_plan -f /run/plan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true" }} + {{- $options := "--framework terraform_plan -f /run/tfplan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true" }} {{- if .Policy.Source }} {{- $configfile = printf "%s/%s" "/run/checkov" .Policy.Source.Configuration }} {{- end }} diff --git a/pkg/controller/configuration/delete.go b/pkg/controller/configuration/delete.go index 67ccb4352..faaf4f582 100644 --- a/pkg/controller/configuration/delete.go +++ b/pkg/controller/configuration/delete.go @@ -177,6 +177,8 @@ func (c *Controller) ensureConfigurationSecretsDeleted(configuration *terraformv configuration.GetTerraformCostSecretName(), configuration.GetTerraformPolicySecretName(), configuration.GetTerraformStateSecretName(), + configuration.GetTerraformPlanOutSecretName(), + configuration.GetTerraformPlanJSONSecretName(), } for _, name := range names { diff --git a/pkg/controller/configuration/ensure.go b/pkg/controller/configuration/ensure.go index 2c5a148f7..ce2684b8a 100644 --- a/pkg/controller/configuration/ensure.go +++ b/pkg/controller/configuration/ensure.go @@ -726,7 +726,9 @@ func (c *Controller) ensureTerraformPlan(configuration *terraformv1alpha1.Config case cond.GetCondition().IsFailed(configuration.GetGeneration()): return reconcile.Result{}, controller.ErrIgnore + // @step: has this generation of the configuration already been planned? case cond.GetCondition().IsComplete(configuration.GetGeneration()): + if !configuration.Spec.EnableDriftDetection || configuration.GetAnnotations()[terraformv1alpha1.DriftAnnotation] == "" { // @note: this is effectively checking the status of plan condition - if the condition is True // for the given generation we can say the plan has already been run and can move on @@ -734,7 +736,8 @@ func (c *Controller) ensureTerraformPlan(configuration *terraformv1alpha1.Config } } - // @step: should we save the terraform state + // @step: should we save the terraform state? - when the backend being used it not kubernetes, we need to + // save the state, as we process this later saveState := c.HasBackendTemplate() if state.provider != nil && state.provider.HasBackendTemplate() { saveState = true @@ -874,7 +877,8 @@ func (c *Controller) ensureTerraformPlan(configuration *terraformv1alpha1.Config } } -// ensureTerraformPlanSecret is responsible for ensuring the plan ran successfully +// ensureTerraformPlanSecret is responsible for retrieving the terraform plan secret, which is produced during the plan +// stage func (c *Controller) ensureTerraformPlanSecret(configuration *terraformv1alpha1.Configuration, state *state) controller.EnsureFunc { cond := controller.ConditionMgr(configuration, corev1alpha1.ConditionReady, c.recorder) @@ -883,6 +887,7 @@ func (c *Controller) ensureTerraformPlanSecret(configuration *terraformv1alpha1. secret.Name = configuration.GetTerraformPlanJSONSecretName() secret.Namespace = c.ControllerNamespace + // @step: check the secret exists found, err := kubernetes.GetIfExists(ctx, c.cc, secret) if err != nil { cond.Failed(err, "Failed to get terraform plan secret (%s/%s)", c.ControllerNamespace, secret.Name) @@ -894,7 +899,10 @@ func (c *Controller) ensureTerraformPlanSecret(configuration *terraformv1alpha1. return reconcile.Result{}, controller.ErrIgnore } + + // @step: store the terraform plan in the state state.tfplan = secret + return reconcile.Result{}, nil } } @@ -999,8 +1007,7 @@ func (c *Controller) ensurePolicyStatus(configuration *terraformv1alpha1.Configu key := "results_json.json" return func(ctx context.Context) (reconcile.Result, error) { - switch { - case state.checkovConstraint == nil: + if state.checkovConstraint == nil { cond.Success("Security policy is not configured") return reconcile.Result{}, nil @@ -1095,12 +1102,14 @@ func (c *Controller) ensureDriftDetection(configuration *terraformv1alpha1.Confi cond := controller.ConditionMgr(configuration, corev1alpha1.ConditionReady, c.recorder) return func(ctx context.Context) (reconcile.Result, error) { - if configuration.Spec.EnableDriftDetection && - configuration.GetAnnotations()[terraformv1alpha1.DriftAnnotation] != configuration.Status.DriftTimestamp { - // @note: everytime we run a drift we update the timestamp on the status, this is used to ensure we don't - // try and rerun the drift. We should remove the annotation from the configuration but that has issues as it - // updates the resourceVersion which make updating the status conflict. - configuration.Status.DriftTimestamp = configuration.GetAnnotations()[terraformv1alpha1.DriftAnnotation] + + if configuration.Spec.EnableDriftDetection { + if configuration.GetAnnotations()[terraformv1alpha1.DriftAnnotation] != configuration.Status.DriftTimestamp { + // @note: everytime we run a drift we update the timestamp on the status, this is used to ensure we don't + // try and rerun the drift. We should remove the annotation from the configuration but that has issues as it + // updates the resourceVersion which make updating the status conflict. + configuration.Status.DriftTimestamp = configuration.GetAnnotations()[terraformv1alpha1.DriftAnnotation] + } } tfplan, err := terraform.DecodePlan(state.tfplan.Data[terraformv1alpha1.TerraformPlanJSONSecretKey]) @@ -1125,18 +1134,27 @@ func (c *Controller) ensureTerraformApply(configuration *terraformv1alpha1.Confi readyCond := controller.ConditionMgr(configuration, corev1alpha1.ConditionReady, c.recorder) return func(ctx context.Context) (reconcile.Result, error) { - _, stateExists, err := kubernetes.GetSecretIfExists(ctx, c.cc, c.ControllerNamespace, configuration.GetTerraformStateSecretName()) + // @step: retrieve the terraform state secret, if it exists + _, found, err := kubernetes.GetSecretIfExists(ctx, c.cc, c.ControllerNamespace, configuration.GetTerraformStateSecretName()) if err != nil { cond.Failed(err, "Failed to get terraform state secret") + return reconcile.Result{}, err } switch { - case !state.hasDrift && stateExists: - // There is nothing to apply, we can skip it. + case !state.hasDrift && found: + log.WithFields(log.Fields{ + "namespace": configuration.Namespace, + "name": configuration.Name, + }).Info("no drift detected, skipping terraform apply") + + // @step: there is nothing to apply, we can skip it. configuration.Status.ResourceStatus = terraformv1alpha1.ResourcesInSync cond.Success("Nothing to apply") + return reconcile.Result{}, nil + case configuration.NeedsApproval() && !configuration.Spec.EnableAutoApproval: cond.ActionRequired("Waiting for terraform apply annotation to be set to true") // update the ready condition to reflect the new state @@ -1147,6 +1165,7 @@ func (c *Controller) ensureTerraformApply(configuration *terraformv1alpha1.Confi return reconcile.Result{}, controller.ErrIgnore } + // @step: decode the terraform plan tfplan, err := terraform.DecodePlan(state.tfplan.Data[terraformv1alpha1.TerraformPlanJSONSecretKey]) if err != nil { cond.Failed(err, "Failed to decode the terraform plan") diff --git a/pkg/controller/configuration/reconcile_test.go b/pkg/controller/configuration/reconcile_test.go index 64fabe94e..1b4515091 100644 --- a/pkg/controller/configuration/reconcile_test.go +++ b/pkg/controller/configuration/reconcile_test.go @@ -757,7 +757,7 @@ var _ = Describe("Configuration Controller", func() { verifyPolicyArguments := []string{ "--comment=Evaluating Against Security Policy", - "--command=/usr/local/bin/checkov --config /run/checkov/checkov.yaml --framework terraform_plan -f /run/plan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true >/dev/null", + "--command=/usr/local/bin/checkov --config /run/checkov/checkov.yaml --framework terraform_plan -f /run/tfplan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true >/dev/null", "--command=/bin/cat /run/results_cli.txt", "--namespace=$(KUBE_NAMESPACE)", "--upload=$(POLICY_REPORT_NAME)=/run/results_json.json", @@ -1855,8 +1855,9 @@ terraform { expected := []string{ "--comment=Executing Terraform", - "--command=/bin/terraform plan --var-file variables.tfvars.json -out=/run/plan.out -lock=false -no-color", - "--command=/bin/terraform show -json /run/plan.out > /run/plan.json", + "--command=/bin/terraform plan --var-file variables.tfvars.json -out=/run/plan.out -lock=false -no-color -input=false", + "--command=/bin/terraform show -json /run/plan.out > /run/tfplan.json", + "--command=/bin/cp /run/tfplan.json /run/plan.json", "--command=/bin/gzip /run/plan.json", "--command=/bin/mv /run/plan.json.gz /run/plan.json", "--namespace=$(KUBE_NAMESPACE)", @@ -2459,7 +2460,7 @@ terraform { expected := []string{ "--comment=Evaluating Against Security Policy", - "--command=/usr/local/bin/checkov --config /run/checkov/config.yaml --framework terraform_plan -f /run/plan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true >/dev/null", + "--command=/usr/local/bin/checkov --config /run/checkov/config.yaml --framework terraform_plan -f /run/tfplan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true >/dev/null", "--command=/bin/cat /run/results_cli.txt", "--namespace=$(KUBE_NAMESPACE)", "--upload=$(POLICY_REPORT_NAME)=/run/results_json.json", @@ -2665,7 +2666,7 @@ terraform { Expect(job.Spec.Template.Spec.Containers[1].Command).To(Equal([]string{"/run/bin/step"})) Expect(job.Spec.Template.Spec.Containers[1].Args).To(Equal([]string{ "--comment=Evaluating Against Security Policy", - "--command=/usr/local/bin/checkov --config /run/checkov/checkov.yaml --framework terraform_plan -f /run/plan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true >/dev/null", + "--command=/usr/local/bin/checkov --config /run/checkov/checkov.yaml --framework terraform_plan -f /run/tfplan.json --soft-fail -o json -o cli --output-file-path /run --repo-root-for-plan-enrichment /data --download-external-modules true >/dev/null", "--command=/bin/cat /run/results_cli.txt", "--namespace=$(KUBE_NAMESPACE)", "--upload=$(POLICY_REPORT_NAME)=/run/results_json.json", @@ -3137,7 +3138,7 @@ terraform { expected := []string{ "--comment=Executing Terraform", - "--command=/bin/terraform apply -lock=false -no-color /run/plan.out", + "--command=/bin/terraform apply --var-file variables.tfvars.json -lock=false -no-color -input=false -auto-approve", "--on-error=/run/steps/terraform.failed", "--on-success=/run/steps/terraform.complete", } diff --git a/pkg/utils/jobs/jobs.go b/pkg/utils/jobs/jobs.go index f30083c6b..dbb9f9646 100644 --- a/pkg/utils/jobs/jobs.go +++ b/pkg/utils/jobs/jobs.go @@ -240,9 +240,9 @@ func (r *Render) createTerraformFromTemplate(options Options, stage string) (*ba "Infracosts": options.InfracostsSecret, "InfracostsReport": r.configuration.GetTerraformCostSecretName(), "PolicyReport": r.configuration.GetTerraformPolicySecretName(), - "TerraformState": r.configuration.GetTerraformStateSecretName(), "TerraformPlanJSON": r.configuration.GetTerraformPlanJSONSecretName(), "TerraformPlanOut": r.configuration.GetTerraformPlanOutSecretName(), + "TerraformState": r.configuration.GetTerraformStateSecretName(), }, } diff --git a/test/e2e/check-suite.sh b/test/e2e/check-suite.sh index 82011a0b5..5037c8496 100755 --- a/test/e2e/check-suite.sh +++ b/test/e2e/check-suite.sh @@ -68,8 +68,9 @@ run_bats() { APP_NAMESPACE=${APP_NAMESPACE} \ BUCKET=${BUCKET} \ CLOUD=${CLOUD} \ - RESOURCE_NAME=bucket-${CLOUD:-"test"} \ + INFRACOST_API_KEY=${INFRACOST_API_KEY} \ NAMESPACE="terraform-system" \ + RESOURCE_NAME=bucket-${CLOUD:-"test"} \ USE_CHART=${USE_CHART} \ VERSION=${VERSION} \ bats ${BATS_OPTIONS} ${@} || exit 1 diff --git a/test/e2e/integration/custom-state-backend.bats b/test/e2e/integration/custom-state-backend.bats index e6b303d15..7c058a921 100644 --- a/test/e2e/integration/custom-state-backend.bats +++ b/test/e2e/integration/custom-state-backend.bats @@ -34,6 +34,13 @@ teardown() { [[ "$status" -eq 0 ]] } +@test "We should be able to clear the terraform-system namespace" { + runit "kubectl -n terraform-system delete jobs --all" + [[ "$status" -eq 0 ]] + runit "kubectl -n terraform-system delete pods --all" + [[ "$status" -eq 0 ]] +} + @test "We should be able to create a custom backend configuration secret" { runit "kubectl -n terraform-system delete secret terraform-backend-config || true" [[ "$status" -eq 0 ]] @@ -188,7 +195,7 @@ EOF } @test "We should have a application secret in the configuration namespace" { - runit "kubectl -n ${APP_NAMESPACE} get secret custom-secret" + retry 10 "kubectl -n ${APP_NAMESPACE} get secret custom-secret" [[ "$status" -eq 0 ]] } diff --git a/test/e2e/integration/destroy.bats b/test/e2e/integration/destroy.bats index 51d6eaa04..23c78b1ee 100644 --- a/test/e2e/integration/destroy.bats +++ b/test/e2e/integration/destroy.bats @@ -86,3 +86,13 @@ teardown() { runit "kubectl -n ${NAMESPACE} get secret policy-${UUID} 2>&1" "grep -q NotFound" [[ "$status" -eq 0 ]] } + +@test "We should no longer have secrets related to the terraform plan" { + UUID=$(cat ${BATS_TMPDIR}/resource.uuid) + [[ "$status" -eq 0 ]] + + runit "kubectl -n ${NAMESPACE} get secret tfplan-out-${UUID} 2>&1" "grep -q NotFound" + [[ "$status" -eq 0 ]] + runit "kubectl -n ${NAMESPACE} get secret tfplan-json-${UUID} 2>&1" "grep -q NotFound" + [[ "$status" -eq 0 ]] +} diff --git a/test/e2e/integration/infracost.bats b/test/e2e/integration/infracost.bats index 904c4b626..10f216ea3 100644 --- a/test/e2e/integration/infracost.bats +++ b/test/e2e/integration/infracost.bats @@ -26,7 +26,7 @@ teardown() { } @test "We should skip infracost when not running on aws cloud" { - [[ -z ${INFRACOST_API_KEY} ]] && touch ${BATS_PARENT_TMPNAME}.skip + [[ -z "${INFRACOST_API_KEY}" ]] && touch ${BATS_PARENT_TMPNAME}.skip [[ "${CLOUD}" == "aws" ]] || touch ${BATS_PARENT_TMPNAME}.skip } diff --git a/test/e2e/integration/plan.bats b/test/e2e/integration/plan.bats index 71b0f70b5..d5294519a 100644 --- a/test/e2e/integration/plan.bats +++ b/test/e2e/integration/plan.bats @@ -64,6 +64,26 @@ teardown() { [[ "$status" -eq 0 ]] } +@test "We should have a secret containing the terraform plan" { + UUID=$(kubectl -n ${APP_NAMESPACE} get configuration ${RESOURCE_NAME} -o json | jq -r '.metadata.uid') + [[ "$status" -eq 0 ]] + + runit "kubectl -n ${NAMESPACE} get secret tfplan-out-${UUID}" + [[ "$status" -eq 0 ]] + runit "kubectl -n ${NAMESPACE} get secret tfplan-out-${UUID} -o json" "jq -r '.data[\"plan.out\"]'" + [[ "$status" -eq 0 ]] +} + +@test "We should have a secret containing the terraform plan in json" { + UUID=$(kubectl -n ${APP_NAMESPACE} get configuration ${RESOURCE_NAME} -o json | jq -r '.metadata.uid') + [[ "$status" -eq 0 ]] + + runit "kubectl -n ${NAMESPACE} get secret tfplan-json-${UUID}" + [[ "$status" -eq 0 ]] + runit "kubectl -n ${NAMESPACE} get secret tfplan-json-${UUID} -o json" "jq -r '.data[\"plan.json\"]'" + [[ "$status" -eq 0 ]] +} + @test "We should have a configuration in pending approval" { expected="Waiting for terraform apply annotation to be set to true" @@ -87,6 +107,8 @@ teardown() { } @test "We should have a secret in the terraform namespace containing the report" { + [[ "${INFRACOST_API_KEY}" == "" ]] && skip "INFRACOST_API_KEY is not set" + UUID=$(kubectl -n ${APP_NAMESPACE} get configuration ${RESOURCE_NAME} -o json | jq -r '.metadata.uid') [[ "$status" -eq 0 ]] @@ -97,11 +119,15 @@ teardown() { } @test "We should see the cost integration is enabled" { + [[ "${INFRACOST_API_KEY}" == "" ]] && skip "INFRACOST_API_KEY is not set" + runit "kubectl -n ${APP_NAMESPACE} get configuration ${RESOURCE_NAME} -o json" "jq -r '.status.costs.enabled' | grep -q true" [[ "$status" -eq 0 ]] } @test "We should see the cost associated to the configuration" { + [[ "${INFRACOST_API_KEY}" == "" ]] && skip "INFRACOST_API_KEY is not set" + runit "kubectl -n ${APP_NAMESPACE} get configuration ${RESOURCE_NAME} -o json" "jq -r '.status.costs.monthly' | grep -q '\$0'" [[ "$status" -eq 0 ]] runit "kubectl -n ${APP_NAMESPACE} get configuration ${RESOURCE_NAME} -o json" "jq -r '.status.costs.hourly' | grep -q '\$0'" @@ -109,6 +135,8 @@ teardown() { } @test "We should have a copy of the infracost report in the configuration namespace" { + [[ "${INFRACOST_API_KEY}" == "" ]] && skip "INFRACOST_API_KEY is not set" + UUID=$(kubectl -n ${APP_NAMESPACE} get configuration ${RESOURCE_NAME} -o json | jq -r '.metadata.uid') [[ "$status" -eq 0 ]]