diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 5eaed40f7..e1c419106 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -45,6 +45,9 @@ runs: - name: az set sub shell: bash run: az account set --subscription ${{ inputs.subscription-id }} + - name: create workload msi + shell: bash + run: AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-msi - name: create cluster shell: bash run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} AZURE_LOCATION=${{ inputs.location }} make az-mkaks-cilium @@ -54,6 +57,11 @@ runs: client-id: ${{ inputs.client-id }} tenant-id: ${{ inputs.tenant-id }} subscription-id: ${{ inputs.subscription-id }} + - name: create federated cred + shell: bash + run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} make az-create-federated-cred - name: update azure perms shell: bash - run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm + run: | + AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm + AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr diff --git a/Makefile-az.mk b/Makefile-az.mk index dd3b6b369..033fc7154 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -10,8 +10,12 @@ endif AZURE_CLUSTER_NAME ?= karpenter AZURE_RESOURCE_GROUP_MC = MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION) -az-all: az-login az-mkaks-cilium az-perm az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload -az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines +KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa +AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi +KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID + +az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload +az-all-savm: az-login az-mkaks-savm az-perm-savm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines az-login: ## Login into Azure az login @@ -32,10 +36,21 @@ az-mkaks: az-mkacr ## Create test AKS cluster (with --vm-set-type AvailabilitySe az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane cilium, --network-plugin cilium, and --network-plugin-mode overlay) az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \ - --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay + --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \ + --enable-oidc-issuer --enable-workload-identity az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter +az-create-workload-msi: + # create the workload MSI that is the backing for the karpenter pod auth + az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" + +az-create-federated-cred: + $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) + + # create federated credential linked to the karpenter service account for auth usage + az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${SYSTEM_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange + az-mkaks-savm: az-mkrg ## Create experimental cluster with standalone VMs (+ ACR) az deployment group create --resource-group $(AZURE_RESOURCE_GROUP) --template-file hack/azure/aks-savm.bicep --parameters aksname=$(AZURE_CLUSTER_NAME) acrname=$(AZURE_ACR_NAME) az aks get-credentials --resource-group $(AZURE_RESOURCE_GROUP) --name $(AZURE_CLUSTER_NAME) --overwrite-existing @@ -78,6 +93,18 @@ az-patch-skaffold-azureoverlay: az-patch-skaffold az-fetch-network-info yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.networkPlugin = "azure"' skaffold.yaml + # old identity path is still the default, so need to override the values values with new logic. + # TODO (chmcbrid): update the new logic path as the default. + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_CREDENTIAL_FROM_ENVIRONMENT")) .value = "true"' skaffold.yaml + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_MANAGED_IDENTITY_EXTENSION")) .value = "false"' skaffold.yaml + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = ""' skaffold.yaml + + yq -i '.manifests.helm.releases[0].overrides.serviceAccount.annotations."azure.workload.identity/client-id" = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' skaffold.yaml + yq -i '.manifests.helm.releases[0].overrides.serviceAccount.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' skaffold.yaml + + yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml + az-fetch-network-info: $(eval AZURE_VNET_NAME=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].name")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_VNET_NAME")) .value = "$(AZURE_VNET_NAME)"' skaffold.yaml @@ -92,7 +119,16 @@ az-rmvmss-vms: ## Delete all VMs in VMSS Flex (use with care!) az vmss delete-instances --name $(AZURE_CLUSTER_NAME)-vmss --resource-group $(AZURE_RESOURCE_GROUP_MC) --instance-ids '*' az-perm: ## Create role assignments to let Karpenter manage VMs and Network - # Note (charliedmcb): need to be objectId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API" + # Note: need to be principalId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API" + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here + @echo Consider "make az-patch-skaffold"! + +az-perm-savm: ## Create role assignments to let Karpenter manage VMs and Network + # Note: savm has not been converted over to use a workload identity $(eval AZURE_OBJECT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.objectId")) az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor" az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" @@ -101,9 +137,9 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network @echo Consider "make az-patch-skaffold"! az-perm-acr: - $(eval AZURE_CLIENT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.clientId")) + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) $(eval AZURE_ACR_ID=$(shell az acr show --name $(AZURE_ACR_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".id")) - az role assignment create --assignee $(AZURE_CLIENT_ID) --scope $(AZURE_ACR_ID) --role "AcrPull" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope $(AZURE_ACR_ID) --role "AcrPull" az-aks-check-acr: az aks check-acr --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --acr $(AZURE_ACR_NAME) diff --git a/go.mod b/go.mod index 748a238b0..5dbed1716 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( github.com/go-logr/zapr v1.3.0 github.com/go-playground/validator/v10 v10.13.0 github.com/imdario/mergo v0.3.16 + github.com/jongio/azidext/go/azidext v0.5.0 github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/onsi/ginkgo/v2 v2.15.0 github.com/onsi/gomega v1.31.1 diff --git a/go.sum b/go.sum index 5c96be624..4d7566719 100644 --- a/go.sum +++ b/go.sum @@ -285,6 +285,10 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= +github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= +github.com/jongio/azidext/go/azidext v0.5.0 h1:uPInXD4NZ3J0k79FPwIA0YXknFn+WcqZqSgs3/jPgvQ= +github.com/jongio/azidext/go/azidext v0.5.0/go.mod h1:TVRX/hJhzbsCKaOIzicH6a8IvOH0hpjWk/JwZZgtXeU= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= diff --git a/pkg/auth/autorest_auth.go b/pkg/auth/autorest_auth.go index 28e8fb398..133e631a7 100644 --- a/pkg/auth/autorest_auth.go +++ b/pkg/auth/autorest_auth.go @@ -23,10 +23,23 @@ import ( "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/adal" "github.com/Azure/go-autorest/autorest/azure" - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" + + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "github.com/jongio/azidext/go/azidext" ) func NewAuthorizer(config *Config, env *azure.Environment) (autorest.Authorizer, error) { + // TODO (charliedmcb): need to get track 2 support for the skewer API, and align all auth under workload identity in the same way within cred.go + if config.UseCredentialFromEnvironment { + klog.V(2).Infoln("auth: using workload identity for new authorizer") + cred, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, fmt.Errorf("default cred: %w", err) + } + return azidext.NewTokenCredentialAdapter(cred, []string{azidext.DefaultManagementScope}), nil + } + token, err := newServicePrincipalTokenFromCredentials(config, env) if err != nil { return nil, fmt.Errorf("retrieve service principal token: %w", err) diff --git a/pkg/auth/config.go b/pkg/auth/config.go index d27ae9307..b2db025a3 100644 --- a/pkg/auth/config.go +++ b/pkg/auth/config.go @@ -68,12 +68,13 @@ type Config struct { AuthMethod string `json:"authMethod" yaml:"authMethod"` // Settings for a service principal. - AADClientID string `json:"aadClientId" yaml:"aadClientId"` - AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"` - AADClientCertPath string `json:"aadClientCertPath" yaml:"aadClientCertPath"` - AADClientCertPassword string `json:"aadClientCertPassword" yaml:"aadClientCertPassword"` - UseManagedIdentityExtension bool `json:"useManagedIdentityExtension" yaml:"useManagedIdentityExtension"` - UserAssignedIdentityID string `json:"userAssignedIdentityID" yaml:"userAssignedIdentityID"` + AADClientID string `json:"aadClientId" yaml:"aadClientId"` + AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"` + AADClientCertPath string `json:"aadClientCertPath" yaml:"aadClientCertPath"` + AADClientCertPassword string `json:"aadClientCertPassword" yaml:"aadClientCertPassword"` + UseCredentialFromEnvironment bool `json:"useCredentialFromEnvironment" yaml:"useCredentialFromEnvironment"` + UseManagedIdentityExtension bool `json:"useManagedIdentityExtension" yaml:"useManagedIdentityExtension"` + UserAssignedIdentityID string `json:"userAssignedIdentityID" yaml:"userAssignedIdentityID"` //Configs only for AKS ClusterName string `json:"clusterName" yaml:"clusterName"` @@ -87,7 +88,7 @@ type Config struct { func (cfg *Config) PrepareConfig() error { cfg.BaseVars() - err := cfg.prepareMSI() + err := cfg.prepareID() if err != nil { return err } @@ -113,7 +114,15 @@ func (cfg *Config) BaseVars() { // cfg.VnetGuid = os.Getenv("AZURE_VNET_GUID") // This field needs to be resolved inside of karpenter, so we will get it in the azClient initialization } -func (cfg *Config) prepareMSI() error { +func (cfg *Config) prepareID() error { + useCredentialFromEnvironmentFromEnv := os.Getenv("ARM_USE_CREDENTIAL_FROM_ENVIRONMENT") + if len(useCredentialFromEnvironmentFromEnv) > 0 { + shouldUse, err := strconv.ParseBool(useCredentialFromEnvironmentFromEnv) + if err != nil { + return err + } + cfg.UseCredentialFromEnvironment = shouldUse + } useManagedIdentityExtensionFromEnv := os.Getenv("ARM_USE_MANAGED_IDENTITY_EXTENSION") if len(useManagedIdentityExtensionFromEnv) > 0 { shouldUse, err := strconv.ParseBool(useManagedIdentityExtensionFromEnv) diff --git a/pkg/auth/cred.go b/pkg/auth/cred.go index f0f49f819..bd1df61fd 100644 --- a/pkg/auth/cred.go +++ b/pkg/auth/cred.go @@ -21,6 +21,7 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "k8s.io/klog/v2" ) // NewCredential provides a token credential for msi and service principal auth @@ -29,7 +30,13 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) { return nil, fmt.Errorf("failed to create credential, nil config provided") } + if cfg.UseCredentialFromEnvironment { + klog.V(2).Infoln("cred: using workload identity for new credential") + return azidentity.NewDefaultAzureCredential(nil) + } + if cfg.UseManagedIdentityExtension || cfg.AADClientID == "msi" { + klog.V(2).Infoln("cred: using msi for new credential") msiCred, err := azidentity.NewManagedIdentityCredential(&azidentity.ManagedIdentityCredentialOptions{ ID: azidentity.ClientID(cfg.UserAssignedIdentityID), }) @@ -39,6 +46,7 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) { return msiCred, nil } // service principal case + klog.V(2).Infoln("cred: using sp for new credential") cred, err := azidentity.NewClientSecretCredential(cfg.TenantID, cfg.AADClientID, cfg.AADClientSecret, nil) if err != nil { return nil, err diff --git a/skaffold.yaml b/skaffold.yaml index 8ddaf2b53..808f93fbe 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -49,6 +49,8 @@ manifests: value: "Please run make az-all" - name: LOCATION value: westus2 + - name: ARM_USE_CREDENTIAL_FROM_ENVIRONMENT + value: "false" - name: ARM_USE_MANAGED_IDENTITY_EXTENSION value: "true" - name: ARM_USER_ASSIGNED_IDENTITY_ID