diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 0000000000..19919fef03 --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,7 @@ +[allowlist] + description = "Global Allowlist" + + paths = [ + '''hack-kms\/''', + ] + diff --git a/bindata/assets/kms/job-pod.yaml b/bindata/assets/kms/job-pod.yaml new file mode 100644 index 0000000000..511f4f9d74 --- /dev/null +++ b/bindata/assets/kms/job-pod.yaml @@ -0,0 +1,47 @@ +apiVersion: v1 +kind: Pod +metadata: + name: aws-kms-setup + namespace: kube-system + labels: + name: aws-kms-setup +spec: + containers: + - name: aws-kms-setup + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-creds + key: aws_access_key_id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-creds + key: aws_secret_access_key + - name: AWS_DEFAULT_REGION + value: "{{.AWSRegion}}" + - name: OPENSHIFT_INFRA_NAME + value: "{{.OpenShiftInfraId}}" + image: public.ecr.aws/aws-cli/aws-cli + command: + - "bash" + args: + - "/var/src/kms/setup-kms.sh" + # command: + # - "sleep" + # args: + # - "600" + resources: + limits: + memory: "64Mi" + cpu: "300m" + volumeMounts: + - name: kms-script + mountPath: /var/src/kms + volumes: + - name: kms-script + configMap: + name: kms-script + restartPolicy: OnFailure + serviceAccountName: kms-setup-sa diff --git a/bindata/assets/kms/job-sa-role.yaml b/bindata/assets/kms/job-sa-role.yaml new file mode 100644 index 0000000000..e33926b719 --- /dev/null +++ b/bindata/assets/kms/job-sa-role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kms-setup-role + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["create", "update"] diff --git a/bindata/assets/kms/job-sa-rolebinding.yaml b/bindata/assets/kms/job-sa-rolebinding.yaml new file mode 100644 index 0000000000..a1d33de920 --- /dev/null +++ b/bindata/assets/kms/job-sa-rolebinding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kms-setup-rbac + namespace: kube-system +subjects: +- kind: ServiceAccount + name: kms-setup-sa + namespace: kube-system +roleRef: + kind: Role + name: kms-setup-role + apiGroup: rbac.authorization.k8s.io diff --git a/bindata/assets/kms/job-sa.yaml b/bindata/assets/kms/job-sa.yaml new file mode 100644 index 0000000000..02bbfe2999 --- /dev/null +++ b/bindata/assets/kms/job-sa.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kms-setup-sa + namespace: kube-system diff --git a/bindata/assets/kms/job-sh-cm.yaml b/bindata/assets/kms/job-sh-cm.yaml new file mode 100644 index 0000000000..90f41e1eaa --- /dev/null +++ b/bindata/assets/kms/job-sh-cm.yaml @@ -0,0 +1,123 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kms-script + namespace: kube-system +data: + policy.json: |- + { + "Id": "key-policy-01", + "Statement": [ + { + "Sid": "Enable IAM User Permissions", + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam:::root" + }, + "Action": "kms:*", + "Resource": "*" + }, + { + "Sid": "Allow use of the key", + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam:::role/-master-role" + }, + "Action": [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ], + "Resource": "*" + }, + { + "Sid": "Allow attachment of persistent resources", + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam:::role/-master-role" + }, + "Action": [ + "kms:CreateGrant", + "kms:ListGrants", + "kms:RevokeGrant" + ], + "Resource": "*", + "Condition": { + "Bool": { + "kms:GrantIsForAWSResource": "true" + } + } + } + ] + } + setup-kms.sh: |- + #!/bin/bash + AWS_REGION=${AWS_DEFAULT_REGION} + KMS_KEY_NAME=${OPENSHIFT_INFRA_NAME}-master-kek + + # create an AWS KMS key + KMS_KEY_ID=$(aws kms create-key \ + --region ${AWS_REGION} \ + --query KeyMetadata.KeyId \ + --output text \ + --description "used with k8s encryption provider" \ + --key-usage ENCRYPT_DECRYPT) + + aws kms describe-key \ + --region ${AWS_REGION} \ + --key-id "${KMS_KEY_ID}" + + # shows the KMS key name on the AWS console, and so + # key is search-able by name specified + aws kms create-alias \ + --region ${AWS_REGION} \ + --alias-name "alias/${KMS_KEY_NAME}" \ + --target-key-id "${KMS_KEY_ID}" + + aws iam list-role-policies \ + --role-name "${OPENSHIFT_INFRA_NAME}"-master-role \ + --region ${AWS_REGION} + + AWS_ACCOUNT_ID=$(aws sts get-caller-identity \ + --region ${AWS_REGION} \ + --query "Account" \ + --output text) + BASE_DIR=$(dirname $0) + + cat "${BASE_DIR}"/policy.json | sed "s//${AWS_ACCOUNT_ID}/g" | sed "s//${OPENSHIFT_INFRA_NAME}/g" > /tmp/policy-rendered.json + + aws kms put-key-policy \ + --region ${AWS_REGION} \ + --key-id "${KMS_KEY_ID}" \ + --policy-name default \ + --policy file:///tmp/policy-rendered.json \ + + # prints ARN of the KMS key so it can be + # later used with kms plugin + KMS_KEY_ARN=$(aws kms describe-key \ + --region ${AWS_REGION} \ + --key-id "${KMS_KEY_ID}" \ + --query KeyMetadata.Arn \ + --output text) + echo ${KMS_KEY_ARN} + + echo '{"kind": "ConfigMap", "metadata": {"name": "kms-key"}, "data": {"aws_kms_arn": "'"${KMS_KEY_ARN}"'"}}' > /tmp/kms-cm.json + + SA=/var/run/secrets/kubernetes.io/serviceaccount + for i in {1..10}; + do + echo "writing configmap, attempt ${i}" + if curl -sS \ + --cacert ${SA}/ca.crt \ + -H "Authorization: Bearer $(cat ${SA}/token)" \ + -H "Content-Type: application/json" \ + -X POST \ + --data-binary "@/tmp/kms-cm.json" \ + https://$KUBERNETES_SERVICE_HOST/api/v1/namespaces/kube-system/configmaps | grep '"kind": "ConfigMap"'; + then + break + fi + sleep 5; + done diff --git a/bindata/assets/kube-apiserver/pod.yaml b/bindata/assets/kube-apiserver/pod.yaml index 20a23cf1b8..d5aafb712d 100644 --- a/bindata/assets/kube-apiserver/pod.yaml +++ b/bindata/assets/kube-apiserver/pod.yaml @@ -98,6 +98,8 @@ spec: name: cert-dir - mountPath: /var/log/kube-apiserver name: audit-dir + - mountPath: /var/kms-plugin + name: var-kms-plugin livenessProbe: httpGet: scheme: HTTPS @@ -262,6 +264,40 @@ spec: requests: memory: 50Mi cpu: 10m + - image: {{.KMSPluginImage}} + name: cloud-kms-plugin + args: + - --debug + - --health-port=:18081 + - --key={{.AWSKMSKeyARN}} + - --listen=/var/kms-plugin/socket.sock + - --region={{.AWSRegion}} + ports: + - containerPort: 18081 + protocol: TCP + name: check-kms + livenessProbe: + httpGet: + scheme: HTTP + port: 18081 + path: livez + initialDelaySeconds: 15 + timeoutSeconds: 10 + periodSeconds: 60 + readinessProbe: + httpGet: + scheme: HTTP + port: 18081 + path: healthz + initialDelaySeconds: 10 + timeoutSeconds: 10 + volumeMounts: + - mountPath: /var/kms-plugin + name: var-kms-plugin + resources: + requests: + memory: 256Mi + cpu: 200m terminationGracePeriodSeconds: {{.GracefulTerminationDuration}} hostNetwork: true priorityClassName: system-node-critical @@ -277,3 +313,7 @@ spec: - hostPath: path: /var/log/kube-apiserver name: audit-dir + - hostPath: + path: /var/kms-plugin + type: DirectoryOrCreate + name: var-kms-plugin diff --git a/hack-kms/desired_config.yaml b/hack-kms/desired_config.yaml new file mode 100644 index 0000000000..2644e50e69 --- /dev/null +++ b/hack-kms/desired_config.yaml @@ -0,0 +1,21 @@ +apiVersion: apiserver.config.k8s.io/v1 +kind: EncryptionConfiguration +resources: + - resources: + - configmaps + providers: + - kms: + name: gcp-kms-encryption + endpoint: unix:///var/kms-plugin/socket.sock + cachesize: 1000 + timeout: 5s + - identity: {} + - resources: + - secrets + providers: + - kms: + name: gcp-kms-encryption + endpoint: unix:///var/kms-plugin/socket.sock + cachesize: 1000 + timeout: 5s + - identity: {} diff --git a/hack-kms/example_decrypted_config.json b/hack-kms/example_decrypted_config.json new file mode 100644 index 0000000000..127df809b7 --- /dev/null +++ b/hack-kms/example_decrypted_config.json @@ -0,0 +1,46 @@ +{ + "kind": "EncryptionConfiguration", + "apiVersion": "apiserver.config.k8s.io/v1", + "resources": [ + { + "resources": [ + "configmaps" + ], + "providers": [ + { + "identity": {} + }, + { + "aescbc": { + "keys": [ + { + "name": "1", + "secret": "fttcMWlNpNa7BIP6/P2kMEPJoTW72A5HdCEyOBX92lc=" + } + ] + } + } + ] + }, + { + "resources": [ + "secrets" + ], + "providers": [ + { + "identity": {} + }, + { + "aescbc": { + "keys": [ + { + "name": "1", + "secret": "fttcMWlNpNa7BIP6/P2kMEPJoTW72A5HdCEyOBX92lc=" + } + ] + } + } + ] + } + ] +} \ No newline at end of file diff --git a/hack-kms/example_encrypted_config.json b/hack-kms/example_encrypted_config.json new file mode 100644 index 0000000000..b75126259b --- /dev/null +++ b/hack-kms/example_encrypted_config.json @@ -0,0 +1,46 @@ +{ + "kind": "EncryptionConfiguration", + "apiVersion": "apiserver.config.k8s.io/v1", + "resources": [ + { + "resources": [ + "configmaps" + ], + "providers": [ + { + "aescbc": { + "keys": [ + { + "name": "1", + "secret": "fttcMWlNpNa7BIP6/P2kMEPJoTW72A5HdCEyOBX92lc=" + } + ] + } + }, + { + "identity": {} + } + ] + }, + { + "resources": [ + "secrets" + ], + "providers": [ + { + "aescbc": { + "keys": [ + { + "name": "1", + "secret": "fttcMWlNpNa7BIP6/P2kMEPJoTW72A5HdCEyOBX92lc=" + } + ] + } + }, + { + "identity": {} + } + ] + } + ] +} \ No newline at end of file diff --git a/hack-kms/gcloud-cmd.sh b/hack-kms/gcloud-cmd.sh new file mode 100644 index 0000000000..7257598323 --- /dev/null +++ b/hack-kms/gcloud-cmd.sh @@ -0,0 +1,25 @@ +#!/usr/bin/bash + +PROJECT=$(oc get infrastructure cluster -o json | jq -r '.status.platformStatus.gcp.projectID') +REGION=$(oc get infrastructure cluster -o json | jq -r '.status.platformStatus.gcp.region') +INFRA_ID=$(oc get infrastructure cluster -o json | jq -r '.status.infrastructureName') + +KEYRING_NAME="${INFRA_ID}-kms" +KEY_NAME="kube-encryption" + +gcloud kms keyrings create "${KEYRING_NAME}" --project "${PROJECT}" --location "${REGION}" +gcloud kms keys create "${KEY_NAME}" --project "${PROJECT}" --location "${REGION}" --keyring "${INFRA_ID}-kms" --purpose encryption + +MASTER_NODE_NAME=$(oc get nodes -l node-role.kubernetes.io/control-plane -o json | jq -r '.items[0].metadata.name') +MASTER_NODE_ZONE=$(oc get node "${MASTER_NODE_NAME}" -o json | jq -r '.metadata.labels["topology.kubernetes.io/zone"]') + +SERVICE_ACCOUNT=$(gcloud compute instances describe "${MASTER_NODE_NAME}" --zone "${MASTER_NODE_ZONE}" --project "${PROJECT}" | yq '.serviceAccounts[0].email') + +gcloud kms keys add-iam-policy-binding "${KEY_NAME}" \ + --project "${PROJECT}" \ + --location "${REGION}" \ + --keyring "${KEYRING_NAME}" \ + --member "serviceAccount:${SERVICE_ACCOUNT}" \ + --role "roles/cloudkms.cryptoKeyEncrypterDecrypter" + +echo "projects/${PROJECT}/locations/${REGION}/keyRings/${KEYRING_NAME}/cryptoKeys/${KEY_NAME}" diff --git a/hack-kms/operator-config.yaml b/hack-kms/operator-config.yaml new file mode 100644 index 0000000000..ee6bfeb134 --- /dev/null +++ b/hack-kms/operator-config.yaml @@ -0,0 +1,3 @@ +apiVersion: operator.openshift.io/v1 +kind: GenericOperatorConfig + diff --git a/hack-kms/run-operator-locally.sh b/hack-kms/run-operator-locally.sh new file mode 100644 index 0000000000..8e2630e190 --- /dev/null +++ b/hack-kms/run-operator-locally.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Run this script from root of repository +# Usage: bash hack-kms/run-operator-locally.sh + +# disable CVO on cluster +oc scale -n openshift-cluster-version deploy cluster-version-operator --replicas 0 + +# scale down operator running on cluster +oc scale -n openshift-kube-apiserver-operator deploy kube-apiserver-operator --replicas=0 + +# enable encryption on the cluster +# this looks like will enable aesgcm encryption for etcd +# but due to our patched implementation +# this will use KMS instead of AES-GCM +# +# NOTE: This is an important step, without this step only kms plugin +# will be added as container to kube-apiserver pod but never be used!! +oc patch --type=merge apiserver/cluster --type=merge -p='{"spec":{"encryption":{"type":"aesgcm"}}}' + +OPERATOR_ENVS=$(oc get deploy kube-apiserver-operator -n openshift-kube-apiserver-operator -o json | jq '.spec.template.spec.containers[0].env') + +export IMAGE=$(echo "${OPERATOR_ENVS[@]}" | jq '.[] | select(.name=="IMAGE") | .value' -r) +export OPERATOR_IMAGE=$(echo "${OPERATOR_ENVS[@]}" | jq '.[] | select(.name=="OPERATOR_IMAGE") | .value' -r) +export OPERAND_IMAGE_VERSION=$(echo "${OPERATOR_ENVS[@]}" | jq '.[] | select(.name=="OPERAND_IMAGE_VERSION") | .value' -r) +export OPERATOR_IMAGE_VERSION=$(echo "${OPERATOR_ENVS[@]}" | jq '.[] | select(.name=="OPERATOR_IMAGE_VERSION") | .value' -r) +export POD_NAME=kube-apiserver-operator + +KUBECONFIG=$HOME/.kube/config + +make build +./cluster-kube-apiserver-operator operator --config=./hack-kms/operator-config.yaml --kubeconfig=$KUBECONFIG --namespace openshift-kube-apiserver-operator diff --git a/pkg/operator/certrotationcontroller/certrotationcontroller.go b/pkg/operator/certrotationcontroller/certrotationcontroller.go index 34a9c0daac..a7ae6abccd 100644 --- a/pkg/operator/certrotationcontroller/certrotationcontroller.go +++ b/pkg/operator/certrotationcontroller/certrotationcontroller.go @@ -122,10 +122,6 @@ func newCertRotationController( rotationDay = day klog.Warningf("!!! UNSUPPORTED VALUE SET !!!") klog.Warningf("Certificate rotation base set to %q", rotationDay) - } else { - // for the development cycle, make the rotation 60 times faster (every twelve hours or so). - // This must be reverted before we ship - rotationDay = rotationDay / 60 } certRotator := certrotation.NewCertRotationController( diff --git a/pkg/operator/kms.go b/pkg/operator/kms.go new file mode 100644 index 0000000000..a8a43b2ab4 --- /dev/null +++ b/pkg/operator/kms.go @@ -0,0 +1,76 @@ +package operator + +import ( + "bytes" + "context" + "fmt" + "text/template" + "time" + + "github.com/openshift/cluster-kube-apiserver-operator/bindata" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +type KMSAssetClass struct { + OpenShiftInfraName string + AWSRegion string +} + +func NewKMSAssetClass(openshiftInfraName, awsRegion string) *KMSAssetClass { + return &KMSAssetClass{ + OpenShiftInfraName: openshiftInfraName, + AWSRegion: awsRegion, + } +} + +func (k *KMSAssetClass) Asset(name string) ([]byte, error) { + b, err := bindata.Asset(name) + if err != nil { + return nil, err + } + + // templated values for AWS region and OpenShift infrastructureName + if name == "assets/kms/job-pod.yaml" { + templatedVals := struct { + AWSRegion string + OpenShiftInfraId string + }{ + AWSRegion: k.AWSRegion, + OpenShiftInfraId: k.OpenShiftInfraName, + } + + tmpl, err := template.New("kms-job").Parse(string(b)) + if err != nil { + return nil, err + } + + var buf bytes.Buffer + err = tmpl.Execute(&buf, templatedVals) + if err != nil { + return nil, err + } + return buf.Bytes(), nil + } + return b, nil +} + +func AWSKMSKeyARNGetter(kubeClient *kubernetes.Clientset) func() string { + return func() string { + var err error = fmt.Errorf("to begin with") + var cm *corev1.ConfigMap + var keyExists bool + var AWSKMSKeyARN string + + for err != nil && !keyExists { + cm, err = kubeClient.CoreV1().ConfigMaps("kube-system").Get(context.TODO(), "kms-key", metav1.GetOptions{}) + AWSKMSKeyARN, keyExists = cm.Data["aws_kms_arn"] + + // retry every 5 seconds + time.Sleep(5 * time.Second) + } + + return AWSKMSKeyARN + } +} diff --git a/pkg/operator/kmsencstatecontroller/controller.go b/pkg/operator/kmsencstatecontroller/controller.go new file mode 100644 index 0000000000..2fdd9c1297 --- /dev/null +++ b/pkg/operator/kmsencstatecontroller/controller.go @@ -0,0 +1,307 @@ +package kmsencstatecontroller + +// adapted from: https://github.com/openshift/cluster-kube-apiserver-operator/blob/release-4.13/vendor/github.com/openshift/library-go/pkg/operator/encryption/controllers/state_controller.go + +import ( + "context" + "fmt" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/util/workqueue" + + operatorv1 "github.com/openshift/api/operator/v1" + + configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1" + "github.com/openshift/library-go/pkg/controller/factory" + encryptioncontoller "github.com/openshift/library-go/pkg/operator/encryption/controllers" + "github.com/openshift/library-go/pkg/operator/encryption/encryptionconfig" + "github.com/openshift/library-go/pkg/operator/encryption/state" + "github.com/openshift/library-go/pkg/operator/encryption/statemachine" + "github.com/openshift/library-go/pkg/operator/events" + "github.com/openshift/library-go/pkg/operator/management" + "github.com/openshift/library-go/pkg/operator/resource/resourceapply" + operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers" +) + +const stateWorkKey = "key" + +// preconditionsFulfilled a function that indicates whether all prerequisites are met and we can Sync. +type preconditionsFulfilled func() (bool, error) + +// stateController is responsible for creating a single secret in +// openshift-config-managed with the name destName. This single secret +// contains the complete EncryptionConfiguration that is consumed by the API +// server that is performing the encryption. Thus this secret represents +// the current state of all resources in encryptedGRs. Every encryption key +// that matches encryptionSecretSelector is included in this final secret. +// This secret is synced into targetNamespace at a static location. This +// indirection allows the cluster to recover from the deletion of targetNamespace. +// See getResourceConfigs for details on how the raw state of all keys +// is converted into a single encryption config. The logic for determining +// the current write key is of special interest. +type stateController struct { + component string + name string + encryptionSecretSelector metav1.ListOptions + + operatorClient operatorv1helpers.OperatorClient + secretClient corev1client.SecretsGetter + deployer statemachine.Deployer + provider encryptioncontoller.Provider + preconditionsFulfilledFn preconditionsFulfilled +} + +func NewStateController( + component string, + provider encryptioncontoller.Provider, + deployer statemachine.Deployer, + preconditionsFulfilledFn preconditionsFulfilled, + operatorClient operatorv1helpers.OperatorClient, + apiServerConfigInformer configv1informers.APIServerInformer, + kubeInformersForNamespaces operatorv1helpers.KubeInformersForNamespaces, + secretClient corev1client.SecretsGetter, + encryptionSecretSelector metav1.ListOptions, + eventRecorder events.Recorder, +) factory.Controller { + c := &stateController{ + operatorClient: operatorClient, + name: "EncryptionStateController", + + component: component, + + encryptionSecretSelector: encryptionSecretSelector, + secretClient: secretClient, + deployer: deployer, + provider: provider, + preconditionsFulfilledFn: preconditionsFulfilledFn, + } + + return factory.New().ResyncEvery(time.Minute).WithSync(c.sync).WithInformers( + operatorClient.Informer(), + kubeInformersForNamespaces.InformersFor("openshift-config-managed").Core().V1().Secrets().Informer(), + apiServerConfigInformer.Informer(), // do not remove, used by the precondition checker + deployer, + ).ToController(c.name, eventRecorder.WithComponentSuffix("encryption-state-controller")) + +} + +func (c *stateController) sync(ctx context.Context, syncCtx factory.SyncContext) (err error) { + degradedCondition := &operatorv1.OperatorCondition{Type: "EncryptionStateControllerDegraded", Status: operatorv1.ConditionFalse} + defer func() { + if degradedCondition == nil { + return + } + if _, _, updateError := operatorv1helpers.UpdateStatus(ctx, c.operatorClient, operatorv1helpers.UpdateConditionFn(*degradedCondition)); updateError != nil { + err = updateError + } + }() + + if ready, err := shouldRunEncryptionController(c.operatorClient, c.preconditionsFulfilledFn, c.provider.ShouldRunEncryptionControllers); err != nil || !ready { + if err != nil { + degradedCondition = nil + } + return err // we will get re-kicked when the operator status updates + } + + configError := c.generateAndApplyCurrentEncryptionConfigSecret(ctx, syncCtx.Queue(), syncCtx.Recorder(), c.provider.EncryptedGRs()) + if configError != nil { + degradedCondition.Status = operatorv1.ConditionTrue + degradedCondition.Reason = "Error" + degradedCondition.Message = configError.Error() + } + return configError +} + +type eventWithReason struct { + reason string + message string +} + +func (c *stateController) generateAndApplyCurrentEncryptionConfigSecret(ctx context.Context, queue workqueue.RateLimitingInterface, recorder events.Recorder, encryptedGRs []schema.GroupResource) error { + currentConfig, desiredEncryptionState, encryptionSecrets, transitioningReason, err := statemachine.GetEncryptionConfigAndState(ctx, c.deployer, c.secretClient, c.encryptionSecretSelector, encryptedGRs) + if err != nil { + return err + } + if len(transitioningReason) > 0 { + queue.AddAfter(stateWorkKey, 2*time.Minute) + return nil + } + + if currentConfig == nil && len(encryptionSecrets) == 0 { + // we depend on the key controller to create the first key to bootstrap encryption. + // Later-on either the config exists or there are keys, even in the case of disabled + // encryption via the apiserver config. + return nil + } + + desiredEncryptionConfig := encryptionconfig.FromEncryptionState(desiredEncryptionState) + + // hack: KMS + // switch between Decryption/Encryption here + + // change desiredEncryptionConfig to use kms instead of aescbc/aesgcm + desiredEncryptionConfig = patchEncryptionConfigForKMS(desiredEncryptionConfig) + // desiredEncryptionConfig = patchEncryptionConfigForDecryptingKMS(desiredEncryptionConfig) + + changed, err := c.applyEncryptionConfigSecret(ctx, desiredEncryptionConfig, recorder) + if err != nil { + return err + } + + if changed { + currentEncryptionConfig, _ := encryptionconfig.ToEncryptionState(currentConfig, encryptionSecrets) + if actionEvents := eventsFromEncryptionConfigChanges(currentEncryptionConfig, desiredEncryptionState); len(actionEvents) > 0 { + for _, event := range actionEvents { + recorder.Eventf(event.reason, event.message) + } + } + } + return nil +} + +func (c *stateController) applyEncryptionConfigSecret(ctx context.Context, encryptionConfig *apiserverconfigv1.EncryptionConfiguration, recorder events.Recorder) (bool, error) { + s, err := encryptionconfig.ToSecret("openshift-config-managed", fmt.Sprintf("%s-%s", encryptionconfig.EncryptionConfSecretName, c.component), encryptionConfig) + if err != nil { + return false, err + } + + _, changed, applyErr := resourceapply.ApplySecret(ctx, c.secretClient, recorder, s) + return changed, applyErr +} + +// eventsFromEncryptionConfigChanges return slice of event reasons with messages corresponding to a difference between current and desired encryption state. +func eventsFromEncryptionConfigChanges(current, desired map[schema.GroupResource]state.GroupResourceState) []eventWithReason { + var result []eventWithReason + // handle removals from current first + for currentGroupResource := range current { + if _, exists := desired[currentGroupResource]; !exists { + result = append(result, eventWithReason{ + reason: "EncryptionResourceRemoved", + message: fmt.Sprintf("Resource %q was removed from encryption config", currentGroupResource), + }) + } + } + for desiredGroupResource, desiredGroupResourceState := range desired { + currentGroupResource, exists := current[desiredGroupResource] + if !exists { + keyMessage := "without write key" + if desiredGroupResourceState.HasWriteKey() { + keyMessage = fmt.Sprintf("with write key %q", desiredGroupResourceState.WriteKey.Key.Name) + } + result = append(result, eventWithReason{ + reason: "EncryptionResourceAdded", + message: fmt.Sprintf("Resource %q was added to encryption config %s", desiredGroupResource, keyMessage), + }) + continue + } + if !currentGroupResource.HasWriteKey() && desiredGroupResourceState.HasWriteKey() { + result = append(result, eventWithReason{ + reason: "EncryptionKeyPromoted", + message: fmt.Sprintf("Promoting key %q for resource %q to write key", desiredGroupResourceState.WriteKey.Key.Name, desiredGroupResource), + }) + } + if currentGroupResource.HasWriteKey() && !desiredGroupResourceState.HasWriteKey() { + result = append(result, eventWithReason{ + reason: "EncryptionKeyRemoved", + message: fmt.Sprintf("Removing key %q for resource %q to write key", currentGroupResource.WriteKey.Key.Name, desiredGroupResource), + }) + } + if currentGroupResource.HasWriteKey() && desiredGroupResourceState.HasWriteKey() { + if currentGroupResource.WriteKey.ExternalReason != desiredGroupResourceState.WriteKey.ExternalReason { + result = append(result, eventWithReason{ + reason: "EncryptionWriteKeyTriggeredExternal", + message: fmt.Sprintf("Triggered key %q for resource %q because %s", currentGroupResource.WriteKey.Key.Name, desiredGroupResource, desiredGroupResourceState.WriteKey.ExternalReason), + }) + } + if currentGroupResource.WriteKey.InternalReason != desiredGroupResourceState.WriteKey.InternalReason { + result = append(result, eventWithReason{ + reason: "EncryptionWriteKeyTriggeredInternal", + message: fmt.Sprintf("Triggered key %q for resource %q because %s", currentGroupResource.WriteKey.Key.Name, desiredGroupResource, desiredGroupResourceState.WriteKey.InternalReason), + }) + } + if !state.EqualKeyAndEqualID(¤tGroupResource.WriteKey, &desiredGroupResourceState.WriteKey) { + result = append(result, eventWithReason{ + reason: "EncryptionWriteKeyChanged", + message: fmt.Sprintf("Write key %q for resource %q changed", currentGroupResource.WriteKey.Key.Name, desiredGroupResource), + }) + } + } + if len(currentGroupResource.ReadKeys) != len(desiredGroupResourceState.ReadKeys) { + result = append(result, eventWithReason{ + reason: "EncryptionReadKeysChanged", + message: fmt.Sprintf("Number of read keys for resource %q changed from %d to %d", desiredGroupResource, len(currentGroupResource.ReadKeys), len(desiredGroupResourceState.ReadKeys)), + }) + } + } + return result +} + +func shouldRunEncryptionController(operatorClient operatorv1helpers.OperatorClient, preconditionsFulfilledFn preconditionsFulfilled, shouldRunFn func() (bool, error)) (bool, error) { + if shouldRun, err := shouldRunFn(); !shouldRun || err != nil { + return false, err + } + + operatorSpec, _, _, err := operatorClient.GetOperatorState() + if err != nil { + return false, err + } + + if !management.IsOperatorManaged(operatorSpec.ManagementState) { + return false, nil + } + + return preconditionsFulfilledFn() +} + +// func patchEncryptionConfigForDecryptingKMS(existingConfig *apiserverconfigv1.EncryptionConfiguration) *apiserverconfigv1.EncryptionConfiguration { +// newConfig := existingConfig.DeepCopy() +// for i := range newConfig.Resources { +// kmsProvider := apiserverconfigv1.ProviderConfiguration{ +// KMS: &apiserverconfigv1.KMSConfiguration{ +// APIVersion: "v2", +// Name: "cloud-kms-plugin", +// Endpoint: "unix:///var/kms-plugin/socket.sock", +// Timeout: &metav1.Duration{ +// Duration: 5 * time.Second, +// }, +// }, +// } + +// newProviders := []apiserverconfigv1.ProviderConfiguration{} +// remainingProviders := []apiserverconfigv1.ProviderConfiguration{} +// for _, provider := range newConfig.Resources[i].Providers { +// if provider.Identity != nil { +// newProviders = append(newProviders, provider) +// } else { +// remainingProviders = append(remainingProviders, provider) +// } +// } +// newProviders = append(newProviders, kmsProvider) +// newConfig.Resources[i].Providers = append(newProviders, remainingProviders...) +// } +// return newConfig +// } + +func patchEncryptionConfigForKMS(existingConfig *apiserverconfigv1.EncryptionConfiguration) *apiserverconfigv1.EncryptionConfiguration { + newConfig := existingConfig.DeepCopy() + for i := range newConfig.Resources { + kmsProvider := apiserverconfigv1.ProviderConfiguration{ + KMS: &apiserverconfigv1.KMSConfiguration{ + APIVersion: "v2", + Name: fmt.Sprintf("cloud-kms-%03d", i+1), + Endpoint: "unix:///var/kms-plugin/socket.sock", + Timeout: &metav1.Duration{ + Duration: 5 * time.Second, + }, + }, + } + + newProviders := []apiserverconfigv1.ProviderConfiguration{kmsProvider} + newConfig.Resources[i].Providers = append(newProviders, newConfig.Resources[i].Providers...) + } + return newConfig +} diff --git a/pkg/operator/kmsencstatecontroller/preconditions.go b/pkg/operator/kmsencstatecontroller/preconditions.go new file mode 100644 index 0000000000..b6994dd4b5 --- /dev/null +++ b/pkg/operator/kmsencstatecontroller/preconditions.go @@ -0,0 +1,94 @@ +package kmsencstatecontroller + +// copied from: https://github.com/openshift/cluster-kube-apiserver-operator/blob/release-4.13/vendor/github.com/openshift/library-go/pkg/operator/encryption/preconditions.go + +import ( + "fmt" + + configv1listers "github.com/openshift/client-go/config/listers/config/v1" + "github.com/openshift/library-go/pkg/operator/encryption/encryptionconfig" + "github.com/openshift/library-go/pkg/operator/encryption/state" + operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + corev1listers "k8s.io/client-go/listers/core/v1" +) + +type preconditionChecker struct { + component string + encryptionSecretSelector labels.Selector + + secretLister corev1listers.SecretNamespaceLister + apiServerConfigLister configv1listers.APIServerLister +} + +// NewEncryptionEnabledPrecondition determines if encryption controllers should synchronise. +// It uses the cache for gathering data to avoid sending requests to the API servers. +func NewEncryptionEnabledPrecondition(apiServerConfigLister configv1listers.APIServerLister, kubeInformersForNamespaces operatorv1helpers.KubeInformersForNamespaces, encryptionSecretSelectorString, component string) (*preconditionChecker, error) { + encryptionSecretSelector, err := labels.Parse(encryptionSecretSelectorString) + if err != nil { + return nil, err + } + return &preconditionChecker{ + component: component, + encryptionSecretSelector: encryptionSecretSelector, + secretLister: kubeInformersForNamespaces.SecretLister().Secrets("openshift-config-managed"), + apiServerConfigLister: apiServerConfigLister, + }, nil +} + +// PreconditionFulfilled a method that indicates whether all prerequisites are met and we can Sync. +// This method MUST be call after the informers synced +func (pc *preconditionChecker) PreconditionFulfilled() (bool, error) { + encryptionWasEnabled, err := pc.encryptionWasEnabled() + if err != nil { + return false, err // got an error, report it and run the sync loops + } + if !encryptionWasEnabled { + return false, nil // encryption hasn't been enabled - no work to do + } + + // TODO: add a step that would determine if encryption is disabled on previously encrypted clusters that would require: + // having the current mode set to Identity + // having all servers on the same revision + // having desired and actual encryption configuration aligned + // having all resources migrated + + return true, nil // we might have work to do +} + +// encryptionWasEnabled checks whether encryption was enabled on a cluster. It wasn't enabled when: +// +// a server configuration doesn't exist +// the current encryption mode is empty or set to identity mode and +// a secret with encryption configuration doesn't exist in the managed namespace and +// secrets with encryption keys don't exist in the managed namespace +func (pc *preconditionChecker) encryptionWasEnabled() (bool, error) { + apiServerConfig, err := pc.apiServerConfigLister.Get("cluster") + if errors.IsNotFound(err) { + return false, nil + } else if err != nil { + return false, err // unknown error + } + + if currentMode := state.Mode(apiServerConfig.Spec.Encryption.Type); len(currentMode) > 0 && currentMode != state.Identity { + return true, nil // encryption might be actually in progress + } + + encryptionConfiguration, err := pc.secretLister.Get(fmt.Sprintf("%s-%s", encryptionconfig.EncryptionConfSecretName, pc.component)) + if err != nil && !errors.IsNotFound(err) { + return false, err // unknown error + } + if encryptionConfiguration != nil { + return true, nil + } + + // very unlikely - encryption config doesn't exists but we have some encryption keys + // but since this is coming from a cache just double check + + encryptionSecrets, err := pc.secretLister.List(pc.encryptionSecretSelector) + if err != nil && !errors.IsNotFound(err) { + return false, err // unknown error + } + return len(encryptionSecrets) > 0, nil +} diff --git a/pkg/operator/starter.go b/pkg/operator/starter.go index 0ff0c3c69e..ba5f3f8554 100644 --- a/pkg/operator/starter.go +++ b/pkg/operator/starter.go @@ -23,6 +23,7 @@ import ( "github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/configobservation/configobservercontroller" "github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/configobservation/node" "github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/connectivitycheckcontroller" + kmse "github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/kmsencstatecontroller" "github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/kubeletversionskewcontroller" "github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/nodekubeconfigcontroller" "github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/operatorclient" @@ -40,6 +41,7 @@ import ( "github.com/openshift/library-go/pkg/operator/encryption" "github.com/openshift/library-go/pkg/operator/encryption/controllers/migrators" encryptiondeployer "github.com/openshift/library-go/pkg/operator/encryption/deployer" + "github.com/openshift/library-go/pkg/operator/encryption/secrets" "github.com/openshift/library-go/pkg/operator/eventwatch" "github.com/openshift/library-go/pkg/operator/genericoperatorclient" "github.com/openshift/library-go/pkg/operator/latencyprofilecontroller" @@ -217,6 +219,33 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle WithConditionalResources(bindata.Asset, []string{"assets/alerts/kube-apiserver-slos.yaml"}, never, nil). // TODO remove in 4.13 AddKubeInformers(kubeInformersForNamespaces) + // hack: KMS + kmsAssets := NewKMSAssetClass(infrastructure.Status.InfrastructureName, infrastructure.Status.PlatformStatus.AWS.Region) + kmsJobStaticResourceController := staticresourcecontroller.NewStaticResourceController( + "KMSJobStaticResources", + kmsAssets.Asset, + []string{ + "assets/kms/job-sh-cm.yaml", + "assets/kms/job-sa.yaml", + "assets/kms/job-sa-role.yaml", + "assets/kms/job-sa-rolebinding.yaml", + + "assets/kms/job-pod.yaml", + }, + (&resourceapply.ClientHolder{}). + WithKubernetes(kubeClient). + WithDynamicClient(dynamicClient), + operatorClient, + controllerContext.EventRecorder, + ) + go kmsJobStaticResourceController.Run(ctx, 1) + // ^ you don't need this extra kmsJobStaticResourceController, + // if you wish to create the AWS KMS instance yourself, once created + // manually please place the AWS KMS Key ARN value with key "aws_kms_arn", + // onto the kms-key config-map in the kube-system namespace, + // and everything else should work as expected. + // eg. '{"kind": "ConfigMap", "metadata": {"name": "kms-key", "namespace": "kube-system"}, "data": {"aws_kms_arn": "arn:aws:kms:eu-west-3::key/"}}' + targetConfigReconciler := targetconfigcontroller.NewTargetConfigController( os.Getenv("IMAGE"), os.Getenv("OPERATOR_IMAGE"), @@ -226,6 +255,10 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle kubeClient, startupmonitorreadiness.IsStartupMonitorEnabledFunction(configInformers.Config().V1().Infrastructures().Lister(), operatorClient), controllerContext.EventRecorder, + + // hack: kms + AWSKMSKeyARNGetter(kubeClient), + infrastructure.Status.PlatformStatus.AWS.Region, ) nodeKubeconfigController := nodekubeconfigcontroller.NewNodeKubeconfigController( @@ -357,6 +390,32 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle return err } + // --- KMS enc --- + encryptionConfigLabelSelector := metav1.ListOptions{LabelSelector: secrets.EncryptionKeySecretsLabel + "=" + operatorclient.TargetNamespace} + encryptionPreconditionChecker, err := kmse.NewEncryptionEnabledPrecondition( + configInformers.Config().V1().APIServers().Lister(), + kubeInformersForNamespaces, encryptionConfigLabelSelector.LabelSelector, + operatorclient.TargetNamespace) + if err != nil { + return err + } + encryptionStateController := kmse.NewStateController( + operatorclient.TargetNamespace, + encryption.StaticEncryptionProvider{ + schema.GroupResource{Group: "", Resource: "secrets"}, + schema.GroupResource{Group: "", Resource: "configmaps"}, + }, + deployer, + encryptionPreconditionChecker.PreconditionFulfilled, + operatorClient, + configInformers.Config().V1().APIServers(), + kubeInformersForNamespaces, + kubeClient.CoreV1(), + encryptionConfigLabelSelector, + controllerContext.EventRecorder, + ) + // --- + certRotationTimeUpgradeableController := certrotationtimeupgradeablecontroller.NewCertRotationTimeUpgradeableController( operatorClient, kubeInformersForNamespaces.InformersFor(operatorclient.GlobalUserSpecifiedConfigNamespace).Core().V1().ConfigMaps(), @@ -459,7 +518,12 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle go configObserver.Run(ctx, 1) go clusterOperatorStatus.Run(ctx, 1) go certRotationController.Run(ctx, 1) + + // --- KMS enc --- go encryptionControllers.Run(ctx, 1) + go encryptionStateController.Run(ctx, 1) + // --- + go certRotationTimeUpgradeableController.Run(ctx, 1) go terminationObserver.Run(ctx, 1) go eventWatcher.Run(ctx, 1) diff --git a/pkg/operator/targetconfigcontroller/targetconfigcontroller.go b/pkg/operator/targetconfigcontroller/targetconfigcontroller.go index d9b7171266..51e1c783b8 100644 --- a/pkg/operator/targetconfigcontroller/targetconfigcontroller.go +++ b/pkg/operator/targetconfigcontroller/targetconfigcontroller.go @@ -48,6 +48,10 @@ type TargetConfigController struct { configMapLister corev1listers.ConfigMapLister isStartupMonitorEnabledFn func() (bool, error) + + // hack: KMS + awsKMSKeyARNGetter func() string + awsRegion string } func NewTargetConfigController( @@ -58,6 +62,8 @@ func NewTargetConfigController( kubeClient kubernetes.Interface, isStartupMonitorEnabledFn func() (bool, error), eventRecorder events.Recorder, + awsKMSKeyARNGetter func() string, + awsRegion string, ) factory.Controller { c := &TargetConfigController{ targetImagePullSpec: targetImagePullSpec, @@ -66,6 +72,10 @@ func NewTargetConfigController( kubeClient: kubeClient, configMapLister: kubeInformersForNamespaces.ConfigMapLister(), isStartupMonitorEnabledFn: isStartupMonitorEnabledFn, + + // hack: KMS + awsKMSKeyARNGetter: awsKMSKeyARNGetter, + awsRegion: awsRegion, } return factory.New().WithInformers( @@ -160,7 +170,7 @@ func createTargetConfig(ctx context.Context, c TargetConfigController, recorder if err != nil { errors = append(errors, fmt.Errorf("%q: %v", "configmap/config", err)) } - _, _, err = managePods(ctx, c.kubeClient.CoreV1(), c.isStartupMonitorEnabledFn, recorder, operatorSpec, c.targetImagePullSpec, c.operatorImagePullSpec) + _, _, err = managePods(ctx, c.kubeClient.CoreV1(), c.isStartupMonitorEnabledFn, recorder, operatorSpec, c.targetImagePullSpec, c.operatorImagePullSpec, c.awsRegion, c.awsKMSKeyARNGetter) if err != nil { errors = append(errors, fmt.Errorf("%q: %v", "configmap/kube-apiserver-pod", err)) } @@ -229,8 +239,8 @@ func manageKubeAPIServerConfig(ctx context.Context, client coreclientv1.ConfigMa return resourceapply.ApplyConfigMap(ctx, client, recorder, requiredConfigMap) } -func managePods(ctx context.Context, client coreclientv1.ConfigMapsGetter, isStartupMonitorEnabledFn func() (bool, error), recorder events.Recorder, operatorSpec *operatorv1.StaticPodOperatorSpec, imagePullSpec, operatorImagePullSpec string) (*corev1.ConfigMap, bool, error) { - appliedPodTemplate, err := manageTemplate(string(bindata.MustAsset("assets/kube-apiserver/pod.yaml")), imagePullSpec, operatorImagePullSpec, operatorSpec) +func managePods(ctx context.Context, client coreclientv1.ConfigMapsGetter, isStartupMonitorEnabledFn func() (bool, error), recorder events.Recorder, operatorSpec *operatorv1.StaticPodOperatorSpec, imagePullSpec, operatorImagePullSpec, awsRegion string, awsKMSKeyARNGetter func() string) (*corev1.ConfigMap, bool, error) { + appliedPodTemplate, err := manageTemplate(string(bindata.MustAsset("assets/kube-apiserver/pod.yaml")), imagePullSpec, operatorImagePullSpec, awsKMSKeyARNGetter(), awsRegion, operatorSpec) if err != nil { return nil, false, err } @@ -485,6 +495,11 @@ type kasTemplate struct { GracefulTerminationDuration int SetupContainerTimeoutDuration int GOGC int + + // hack: KMS + KMSPluginImage string + AWSKMSKeyARN string + AWSRegion string } func effectiveConfiguration(spec *operatorv1.StaticPodOperatorSpec) (map[string]interface{}, error) { @@ -501,7 +516,7 @@ func effectiveConfiguration(spec *operatorv1.StaticPodOperatorSpec) (map[string] return effectiveConfig, nil } -func manageTemplate(rawTemplate string, imagePullSpec string, operatorImagePullSpec string, operatorSpec *operatorv1.StaticPodOperatorSpec) (string, error) { +func manageTemplate(rawTemplate string, imagePullSpec string, operatorImagePullSpec string, awsKMSKeyARN, awsRegion string, operatorSpec *operatorv1.StaticPodOperatorSpec) (string, error) { var verbosity string switch operatorSpec.LogLevel { case operatorv1.Normal: @@ -539,6 +554,11 @@ func manageTemplate(rawTemplate string, imagePullSpec string, operatorImagePullS // 80s for minimum-termination-duration (10s port wait, 65s to let pending requests finish after port has been freed) + 5s extra cri-o's graceful termination period SetupContainerTimeoutDuration: gracefulTerminationDuration + 80 + 5, GOGC: gogc, + + // hack: KMS + KMSPluginImage: "quay.io/swghosh/aws-cloud-kms", + AWSKMSKeyARN: awsKMSKeyARN, + AWSRegion: awsRegion, } tmpl, err := template.New("kas").Parse(rawTemplate) if err != nil { diff --git a/pkg/operator/targetconfigcontroller/targetconfigcontroller_test.go b/pkg/operator/targetconfigcontroller/targetconfigcontroller_test.go index 1d136467ca..2f2ed4d5d3 100644 --- a/pkg/operator/targetconfigcontroller/targetconfigcontroller_test.go +++ b/pkg/operator/targetconfigcontroller/targetconfigcontroller_test.go @@ -224,6 +224,8 @@ func TestManageTemplate(t *testing.T) { scenario.template, "CaptainAmerica", "Piper", + "", + "", scenario.operatorSpec) // validate diff --git a/vendor/github.com/openshift/library-go/pkg/operator/encryption/controllers.go b/vendor/github.com/openshift/library-go/pkg/operator/encryption/controllers.go index 88baef229a..8352bfbc72 100644 --- a/vendor/github.com/openshift/library-go/pkg/operator/encryption/controllers.go +++ b/vendor/github.com/openshift/library-go/pkg/operator/encryption/controllers.go @@ -78,18 +78,19 @@ func NewControllers( encryptionSecretSelector, eventRecorder, ), - controllers.NewStateController( - component, - provider, - deployer, - encryptionEnabledChecker.PreconditionFulfilled, - operatorClient, - apiServerInformer, - kubeInformersForNamespaces, - secretsClient, - encryptionSecretSelector, - eventRecorder, - ), + // // Removing state controller from here as it is added in operator with mods + // controllers.NewStateController( + // component, + // provider, + // deployer, + // encryptionEnabledChecker.PreconditionFulfilled, + // operatorClient, + // apiServerInformer, + // kubeInformersForNamespaces, + // secretsClient, + // encryptionSecretSelector, + // eventRecorder, + // ), controllers.NewPruneController( provider, deployer,