From c7c84d7c0612ed5f080eb63f1d23828ecf4706b6 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Thu, 4 Jan 2024 14:21:18 -0800 Subject: [PATCH 01/27] add in new make targets for workload id --- Makefile-az.mk | 24 +++++++++++++++++++++++- karpenter-service-account.yaml | 7 +++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 karpenter-service-account.yaml diff --git a/Makefile-az.mk b/Makefile-az.mk index dd3b6b369..a967ecb45 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -10,6 +10,11 @@ endif AZURE_CLUSTER_NAME ?= karpenter AZURE_RESOURCE_GROUP_MC = MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION) +KARPENTER_NAMESPACE ?= karpenter +KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa +AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi +KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID + az-all: az-login az-mkaks-cilium az-perm az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines @@ -32,10 +37,27 @@ az-mkaks: az-mkacr ## Create test AKS cluster (with --vm-set-type AvailabilitySe az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane cilium, --network-plugin cilium, and --network-plugin-mode overlay) az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \ - --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay + --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \ + --enable-oidc-issuer --enable-workload-identity az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter +az-create-karpenter-msi: + az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" --subscription "${AZURE_SUBSCRIPTION_ID}" + +az-create-service-account: + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) + yq -i '.metadata.annotations."azure.workload.identity/client-id" = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' karpenter-service-account.yaml + yq -i '.metadata.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' karpenter-service-account.yaml + yq -i '.metadata.namespace = "$(KARPENTER_NAMESPACE)"' karpenter-service-account.yaml + + kubectl apply -f karpenter-service-account.yaml + +az-create-federate-creds: + $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}"-g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) + + az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${KARPENTER_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange + az-mkaks-savm: az-mkrg ## Create experimental cluster with standalone VMs (+ ACR) az deployment group create --resource-group $(AZURE_RESOURCE_GROUP) --template-file hack/azure/aks-savm.bicep --parameters aksname=$(AZURE_CLUSTER_NAME) acrname=$(AZURE_ACR_NAME) az aks get-credentials --resource-group $(AZURE_RESOURCE_GROUP) --name $(AZURE_CLUSTER_NAME) --overwrite-existing diff --git a/karpenter-service-account.yaml b/karpenter-service-account.yaml new file mode 100644 index 000000000..bf2ffccf6 --- /dev/null +++ b/karpenter-service-account.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + azure.workload.identity/client-id: "" + name: "" + namespace: "" \ No newline at end of file From f04c8c66f9d56a013935f957be4c70799a420aca Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Fri, 5 Jan 2024 21:12:39 +0000 Subject: [PATCH 02/27] update working code --- Makefile-az.mk | 7 ++++++- pkg/auth/config.go | 13 +++++++++++-- pkg/auth/cred.go | 4 ++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index a967ecb45..9c67cf997 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -51,10 +51,11 @@ az-create-service-account: yq -i '.metadata.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' karpenter-service-account.yaml yq -i '.metadata.namespace = "$(KARPENTER_NAMESPACE)"' karpenter-service-account.yaml + kubectl create namespace $(KARPENTER_NAMESPACE) --dry-run=client -o yaml | kubectl apply -f - kubectl apply -f karpenter-service-account.yaml az-create-federate-creds: - $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}"-g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) + $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${KARPENTER_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange @@ -86,6 +87,10 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con yq -i '.manifests.helm.releases[0].overrides.settings.azure.kubeletClientTLSBootstrapToken = "$(BOOTSTRAP_TOKEN)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.sshPublicKey = "$(SSH_PUBLIC_KEY)"' skaffold.yaml + yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml + yq -i '.manifests.helm.releases[0].overrides.serviceAccount.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' skaffold.yaml + yq -i '.manifests.helm.releases[0].overrides.serviceAccount.create = false' skaffold.yaml + az-patch-skaffold-kubenet: az-patch-skaffold az-fetch-network-info $(eval AZURE_SUBNET_ID=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].id")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml diff --git a/pkg/auth/config.go b/pkg/auth/config.go index d27ae9307..57014107a 100644 --- a/pkg/auth/config.go +++ b/pkg/auth/config.go @@ -72,6 +72,7 @@ type Config struct { AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"` AADClientCertPath string `json:"aadClientCertPath" yaml:"aadClientCertPath"` AADClientCertPassword string `json:"aadClientCertPassword" yaml:"aadClientCertPassword"` + UseNewCredWorkflow bool `json:"useNewCredWorkflow" yaml:"useNewCredWorkflow"` UseManagedIdentityExtension bool `json:"useManagedIdentityExtension" yaml:"useManagedIdentityExtension"` UserAssignedIdentityID string `json:"userAssignedIdentityID" yaml:"userAssignedIdentityID"` @@ -87,7 +88,7 @@ type Config struct { func (cfg *Config) PrepareConfig() error { cfg.BaseVars() - err := cfg.prepareMSI() + err := cfg.prepareID() if err != nil { return err } @@ -113,7 +114,15 @@ func (cfg *Config) BaseVars() { // cfg.VnetGuid = os.Getenv("AZURE_VNET_GUID") // This field needs to be resolved inside of karpenter, so we will get it in the azClient initialization } -func (cfg *Config) prepareMSI() error { +func (cfg *Config) prepareID() error { + useNewCredWorkflowFromEnv := os.Getenv("ARM_USE_NEW_CRED_WORKFLOW") + if len(useNewCredWorkflowFromEnv) > 0 { + shouldUse, err := strconv.ParseBool(useNewCredWorkflowFromEnv) + if err != nil { + return err + } + cfg.UseNewCredWorkflow = shouldUse + } useManagedIdentityExtensionFromEnv := os.Getenv("ARM_USE_MANAGED_IDENTITY_EXTENSION") if len(useManagedIdentityExtensionFromEnv) > 0 { shouldUse, err := strconv.ParseBool(useManagedIdentityExtensionFromEnv) diff --git a/pkg/auth/cred.go b/pkg/auth/cred.go index f0f49f819..10e3beaae 100644 --- a/pkg/auth/cred.go +++ b/pkg/auth/cred.go @@ -29,6 +29,10 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) { return nil, fmt.Errorf("failed to create credential, nil config provided") } + if cfg.UseNewCredWorkflow { + return azidentity.NewDefaultAzureCredential(nil) + } + if cfg.UseManagedIdentityExtension || cfg.AADClientID == "msi" { msiCred, err := azidentity.NewManagedIdentityCredential(&azidentity.ManagedIdentityCredentialOptions{ ID: azidentity.ClientID(cfg.UserAssignedIdentityID), From 3a7ae87ca84c5366d05ae0ae608c2f6a1b2b43a8 Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Fri, 5 Jan 2024 21:41:04 +0000 Subject: [PATCH 03/27] saving possible working version --- Makefile-az.mk | 27 +++++++++------------------ karpenter-service-account.yaml | 7 ------- skaffold.yaml | 2 +- 3 files changed, 10 insertions(+), 26 deletions(-) delete mode 100644 karpenter-service-account.yaml diff --git a/Makefile-az.mk b/Makefile-az.mk index 9c67cf997..81441f211 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -45,15 +45,6 @@ az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane c az-create-karpenter-msi: az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" --subscription "${AZURE_SUBSCRIPTION_ID}" -az-create-service-account: - $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) - yq -i '.metadata.annotations."azure.workload.identity/client-id" = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' karpenter-service-account.yaml - yq -i '.metadata.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' karpenter-service-account.yaml - yq -i '.metadata.namespace = "$(KARPENTER_NAMESPACE)"' karpenter-service-account.yaml - - kubectl create namespace $(KARPENTER_NAMESPACE) --dry-run=client -o yaml | kubectl apply -f - - kubectl apply -f karpenter-service-account.yaml - az-create-federate-creds: $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) @@ -68,7 +59,7 @@ az-rmrg: ## Destroy test ACR and AKS cluster by deleting the resource group (use az group delete --name $(AZURE_RESOURCE_GROUP) az-patch-skaffold: ## Update Azure client env vars and settings in skaffold config - $(eval AZURE_CLIENT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.clientId")) + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) $(eval CLUSTER_ENDPOINT=$(shell kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')) # bootstrap token $(eval TOKEN_SECRET_NAME=$(shell kubectl get -n kube-system secrets --field-selector=type=bootstrap.kubernetes.io/token -o jsonpath='{.items[0].metadata.name}')) @@ -79,7 +70,6 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con $(eval SSH_PUBLIC_KEY=$(shell cat ~/.ssh/id_rsa.pub) azureuser) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_SUBSCRIPTION_ID")) .value = "$(AZURE_SUBSCRIPTION_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="LOCATION")) .value = "$(AZURE_LOCATION)"' skaffold.yaml - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = "$(AZURE_CLIENT_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_NODE_RESOURCE_GROUP")) .value = "$(AZURE_RESOURCE_GROUP_MC)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterName = "$(AZURE_CLUSTER_NAME)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterEndpoint = "$(CLUSTER_ENDPOINT)"' skaffold.yaml @@ -87,9 +77,10 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con yq -i '.manifests.helm.releases[0].overrides.settings.azure.kubeletClientTLSBootstrapToken = "$(BOOTSTRAP_TOKEN)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.sshPublicKey = "$(SSH_PUBLIC_KEY)"' skaffold.yaml - yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml + yq -i '.manifests.helm.releases[0].overrides.serviceAccount.annotations."azure.workload.identity/client-id" = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.serviceAccount.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' skaffold.yaml - yq -i '.manifests.helm.releases[0].overrides.serviceAccount.create = false' skaffold.yaml + + yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml az-patch-skaffold-kubenet: az-patch-skaffold az-fetch-network-info $(eval AZURE_SUBNET_ID=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].id")) @@ -120,11 +111,11 @@ az-rmvmss-vms: ## Delete all VMs in VMSS Flex (use with care!) az-perm: ## Create role assignments to let Karpenter manage VMs and Network # Note (charliedmcb): need to be objectId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API" - $(eval AZURE_OBJECT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.objectId")) - az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor" - az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" - az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator" - az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here @echo Consider "make az-patch-skaffold"! az-perm-acr: diff --git a/karpenter-service-account.yaml b/karpenter-service-account.yaml deleted file mode 100644 index bf2ffccf6..000000000 --- a/karpenter-service-account.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - annotations: - azure.workload.identity/client-id: "" - name: "" - namespace: "" \ No newline at end of file diff --git a/skaffold.yaml b/skaffold.yaml index cc0eb94c4..681ec44dd 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -49,7 +49,7 @@ manifests: value: "" - name: LOCATION value: westus2 - - name: ARM_USE_MANAGED_IDENTITY_EXTENSION + - name: ARM_USE_NEW_CRED_WORKFLOW value: "true" - name: ARM_USER_ASSIGNED_IDENTITY_ID value: "" From 455adeac7633f9e369886afdae9b44bb87bc10fe Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Fri, 5 Jan 2024 23:28:54 +0000 Subject: [PATCH 04/27] plug in the needed make commands and update acr perms for workload id --- Makefile-az.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 81441f211..c1e2dcf6b 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -15,7 +15,7 @@ KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID -az-all: az-login az-mkaks-cilium az-perm az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload +az-all: az-login az-mkaks-cilium az-create-karpenter-msi az-create-federate-creds az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines az-login: ## Login into Azure @@ -119,9 +119,9 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network @echo Consider "make az-patch-skaffold"! az-perm-acr: - $(eval AZURE_CLIENT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.clientId")) + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) $(eval AZURE_ACR_ID=$(shell az acr show --name $(AZURE_ACR_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".id")) - az role assignment create --assignee $(AZURE_CLIENT_ID) --scope $(AZURE_ACR_ID) --role "AcrPull" + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope $(AZURE_ACR_ID) --role "AcrPull" az-aks-check-acr: az aks check-acr --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --acr $(AZURE_ACR_NAME) From 4cd29e0f316b499e53f4833cc3373ebb838934af Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Fri, 5 Jan 2024 23:53:10 +0000 Subject: [PATCH 05/27] update to allow sku client to still use SP based off of the workload id --- Makefile-az.mk | 1 + pkg/auth/autorest_auth.go | 1 + skaffold.yaml | 2 ++ 3 files changed, 4 insertions(+) diff --git a/Makefile-az.mk b/Makefile-az.mk index c1e2dcf6b..86b77be6b 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -70,6 +70,7 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con $(eval SSH_PUBLIC_KEY=$(shell cat ~/.ssh/id_rsa.pub) azureuser) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_SUBSCRIPTION_ID")) .value = "$(AZURE_SUBSCRIPTION_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="LOCATION")) .value = "$(AZURE_LOCATION)"' skaffold.yaml + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_NODE_RESOURCE_GROUP")) .value = "$(AZURE_RESOURCE_GROUP_MC)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterName = "$(AZURE_CLUSTER_NAME)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterEndpoint = "$(CLUSTER_ENDPOINT)"' skaffold.yaml diff --git a/pkg/auth/autorest_auth.go b/pkg/auth/autorest_auth.go index 28e8fb398..aa653b926 100644 --- a/pkg/auth/autorest_auth.go +++ b/pkg/auth/autorest_auth.go @@ -42,6 +42,7 @@ func newServicePrincipalTokenFromCredentials(config *Config, env *azure.Environm return nil, fmt.Errorf("creating the OAuth config: %w", err) } + // TODO (charliedmcb): look at updating this with the new workload identity logic. Would be nice if we could align all the auth. if config.UseManagedIdentityExtension { klog.V(2).Infoln("azure: using managed identity extension to retrieve access token") msiEndpoint, err := adal.GetMSIVMEndpoint() diff --git a/skaffold.yaml b/skaffold.yaml index 681ec44dd..715c69256 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -51,6 +51,8 @@ manifests: value: westus2 - name: ARM_USE_NEW_CRED_WORKFLOW value: "true" + - name: ARM_USE_MANAGED_IDENTITY_EXTENSION + value: "true" - name: ARM_USER_ASSIGNED_IDENTITY_ID value: "" - name: AZURE_NODE_RESOURCE_GROUP From da40a1c6437632bc1b709be7831d0511e57ad150 Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Mon, 8 Jan 2024 21:28:32 +0000 Subject: [PATCH 06/27] update to principalId for e2e flow --- Makefile-az.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 86b77be6b..4f75a0671 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -111,8 +111,8 @@ az-rmvmss-vms: ## Delete all VMs in VMSS Flex (use with care!) az vmss delete-instances --name $(AZURE_CLUSTER_NAME)-vmss --resource-group $(AZURE_RESOURCE_GROUP_MC) --instance-ids '*' az-perm: ## Create role assignments to let Karpenter manage VMs and Network - # Note (charliedmcb): need to be objectId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API" - $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) + # Note (charliedmcb): need to be principalId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API" + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor" az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator" @@ -120,7 +120,7 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network @echo Consider "make az-patch-skaffold"! az-perm-acr: - $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) $(eval AZURE_ACR_ID=$(shell az acr show --name $(AZURE_ACR_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".id")) az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope $(AZURE_ACR_ID) --role "AcrPull" From 48bdef217ba87abbd61cb22723c95116b5770540 Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Tue, 9 Jan 2024 18:49:17 +0000 Subject: [PATCH 07/27] update sku to use new workload id auth patterning as the backing with wrapper --- Makefile-az.mk | 1 - go.mod | 1 + go.sum | 2 ++ pkg/auth/autorest_auth.go | 13 ++++++++++++- skaffold.yaml | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 4f75a0671..f3f84c6d6 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -70,7 +70,6 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con $(eval SSH_PUBLIC_KEY=$(shell cat ~/.ssh/id_rsa.pub) azureuser) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_SUBSCRIPTION_ID")) .value = "$(AZURE_SUBSCRIPTION_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="LOCATION")) .value = "$(AZURE_LOCATION)"' skaffold.yaml - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_NODE_RESOURCE_GROUP")) .value = "$(AZURE_RESOURCE_GROUP_MC)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterName = "$(AZURE_CLUSTER_NAME)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterEndpoint = "$(CLUSTER_ENDPOINT)"' skaffold.yaml diff --git a/go.mod b/go.mod index 1a2e57bc5..1d68aa1a7 100644 --- a/go.mod +++ b/go.mod @@ -92,6 +92,7 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/jongio/azidext/go/azidext v0.5.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/kelseyhightower/envconfig v1.4.0 // indirect diff --git a/go.sum b/go.sum index a4d781a82..874354921 100644 --- a/go.sum +++ b/go.sum @@ -283,6 +283,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jongio/azidext/go/azidext v0.5.0 h1:uPInXD4NZ3J0k79FPwIA0YXknFn+WcqZqSgs3/jPgvQ= +github.com/jongio/azidext/go/azidext v0.5.0/go.mod h1:TVRX/hJhzbsCKaOIzicH6a8IvOH0hpjWk/JwZZgtXeU= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= diff --git a/pkg/auth/autorest_auth.go b/pkg/auth/autorest_auth.go index aa653b926..8554caaf4 100644 --- a/pkg/auth/autorest_auth.go +++ b/pkg/auth/autorest_auth.go @@ -23,10 +23,21 @@ import ( "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/adal" "github.com/Azure/go-autorest/autorest/azure" - klog "k8s.io/klog/v2" + "k8s.io/klog/v2" + + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "github.com/jongio/azidext/go/azidext" ) func NewAuthorizer(config *Config, env *azure.Environment) (autorest.Authorizer, error) { + if config.UseNewCredWorkflow { + cred, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, fmt.Errorf("default cred: %w", err) + } + return azidext.NewTokenCredentialAdapter(cred, []string{azidext.DefaultManagementScope}), nil + } + token, err := newServicePrincipalTokenFromCredentials(config, env) if err != nil { return nil, fmt.Errorf("retrieve service principal token: %w", err) diff --git a/skaffold.yaml b/skaffold.yaml index 715c69256..b98930a94 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -52,7 +52,7 @@ manifests: - name: ARM_USE_NEW_CRED_WORKFLOW value: "true" - name: ARM_USE_MANAGED_IDENTITY_EXTENSION - value: "true" + value: "false" - name: ARM_USER_ASSIGNED_IDENTITY_ID value: "" - name: AZURE_NODE_RESOURCE_GROUP From f89ddc02fce82b1d9956ef32d859632ac86b18ac Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 05:54:17 -0800 Subject: [PATCH 08/27] support savm path --- Makefile-az.mk | 15 +++++++-------- hack/azure/aks-savm.bicep | 8 +++++++- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index f3f84c6d6..0be71694f 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -10,13 +10,12 @@ endif AZURE_CLUSTER_NAME ?= karpenter AZURE_RESOURCE_GROUP_MC = MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION) -KARPENTER_NAMESPACE ?= karpenter KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID -az-all: az-login az-mkaks-cilium az-create-karpenter-msi az-create-federate-creds az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload -az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines +az-all: az-login az-mkaks-cilium az-create-workload-id-msi az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload +az-all-savm: az-login az-mkaks-savm az-create-workload-id-msi az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines az-login: ## Login into Azure az login @@ -42,13 +41,13 @@ az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane c az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter -az-create-karpenter-msi: - az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" --subscription "${AZURE_SUBSCRIPTION_ID}" - -az-create-federate-creds: +az-create-workload-id-msi: $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) - az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${KARPENTER_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange + # create the workload MSI that is the backing for the karpenter pod auth + az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" --subscription "${AZURE_SUBSCRIPTION_ID}" + # create federated credential linked to the karpenter service account for auth usage + az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${SYSTEM_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange az-mkaks-savm: az-mkrg ## Create experimental cluster with standalone VMs (+ ACR) az deployment group create --resource-group $(AZURE_RESOURCE_GROUP) --template-file hack/azure/aks-savm.bicep --parameters aksname=$(AZURE_CLUSTER_NAME) acrname=$(AZURE_ACR_NAME) diff --git a/hack/azure/aks-savm.bicep b/hack/azure/aks-savm.bicep index 6ad992d2a..84458980b 100644 --- a/hack/azure/aks-savm.bicep +++ b/hack/azure/aks-savm.bicep @@ -38,7 +38,7 @@ resource vnet 'Microsoft.Network/virtualNetworks@2022-05-01' = { //resource podsubnet 'subnets' existing = { name: 'podsubnet' } } -resource aks 'Microsoft.ContainerService/managedClusters@2022-07-01' = { +resource aks 'Microsoft.ContainerService/managedClusters@2023-01-02-preview' = { location: location name: aksname identity: { @@ -68,6 +68,12 @@ resource aks 'Microsoft.ContainerService/managedClusters@2022-07-01' = { dnsServiceIP: '10.0.0.10' dockerBridgeCidr: '172.17.0.1/16' } + "oidcIssuerProfile": { + "enabled": true + } + "workloadIdentity": { + "enabled": true + } } } From 854ea8443f565ef03ce72370022485fdb9107a69 Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Tue, 9 Jan 2024 20:37:18 +0000 Subject: [PATCH 09/27] fix bicept format --- hack/azure/aks-savm.bicep | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hack/azure/aks-savm.bicep b/hack/azure/aks-savm.bicep index 84458980b..c45b5511f 100644 --- a/hack/azure/aks-savm.bicep +++ b/hack/azure/aks-savm.bicep @@ -68,11 +68,11 @@ resource aks 'Microsoft.ContainerService/managedClusters@2023-01-02-preview' = { dnsServiceIP: '10.0.0.10' dockerBridgeCidr: '172.17.0.1/16' } - "oidcIssuerProfile": { - "enabled": true + oidcIssuerProfile: { + enabled: true } - "workloadIdentity": { - "enabled": true + workloadIdentity: { + enabled: true } } } From e05e108eecc365a0042b81415c96120fd9a63a0a Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 07:19:51 -0800 Subject: [PATCH 10/27] update so only azureoverlay path is on the new workload id for now --- Makefile-az.mk | 22 +++++++++++++++------- hack/azure/aks-savm.bicep | 6 ------ skaffold.yaml | 4 ++-- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 0be71694f..40db19c48 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -15,7 +15,7 @@ AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID az-all: az-login az-mkaks-cilium az-create-workload-id-msi az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload -az-all-savm: az-login az-mkaks-savm az-create-workload-id-msi az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines +az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines az-login: ## Login into Azure az login @@ -58,7 +58,7 @@ az-rmrg: ## Destroy test ACR and AKS cluster by deleting the resource group (use az group delete --name $(AZURE_RESOURCE_GROUP) az-patch-skaffold: ## Update Azure client env vars and settings in skaffold config - $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) + $(eval AZURE_CLIENT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.clientId")) $(eval CLUSTER_ENDPOINT=$(shell kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')) # bootstrap token $(eval TOKEN_SECRET_NAME=$(shell kubectl get -n kube-system secrets --field-selector=type=bootstrap.kubernetes.io/token -o jsonpath='{.items[0].metadata.name}')) @@ -69,6 +69,7 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con $(eval SSH_PUBLIC_KEY=$(shell cat ~/.ssh/id_rsa.pub) azureuser) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_SUBSCRIPTION_ID")) .value = "$(AZURE_SUBSCRIPTION_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="LOCATION")) .value = "$(AZURE_LOCATION)"' skaffold.yaml + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = "$(AZURE_CLIENT_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_NODE_RESOURCE_GROUP")) .value = "$(AZURE_RESOURCE_GROUP_MC)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterName = "$(AZURE_CLUSTER_NAME)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.clusterEndpoint = "$(CLUSTER_ENDPOINT)"' skaffold.yaml @@ -76,11 +77,6 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con yq -i '.manifests.helm.releases[0].overrides.settings.azure.kubeletClientTLSBootstrapToken = "$(BOOTSTRAP_TOKEN)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.sshPublicKey = "$(SSH_PUBLIC_KEY)"' skaffold.yaml - yq -i '.manifests.helm.releases[0].overrides.serviceAccount.annotations."azure.workload.identity/client-id" = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' skaffold.yaml - yq -i '.manifests.helm.releases[0].overrides.serviceAccount.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' skaffold.yaml - - yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml - az-patch-skaffold-kubenet: az-patch-skaffold az-fetch-network-info $(eval AZURE_SUBNET_ID=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].id")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml @@ -95,6 +91,18 @@ az-patch-skaffold-azureoverlay: az-patch-skaffold az-fetch-network-info yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml yq -i '.manifests.helm.releases[0].overrides.settings.azure.networkPlugin = "azure"' skaffold.yaml + # old identity path is still the default, so need to override the values values with new logic. + # TODO (chmcbrid): update the new logic path as the default. + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_NEW_CRED_WORKFLOW")) .value = "true"' skaffold.yaml + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_MANAGED_IDENTITY_EXTENSION")) .value = "false"' skaffold.yaml + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = ""' skaffold.yaml + + yq -i '.manifests.helm.releases[0].overrides.serviceAccount.annotations."azure.workload.identity/client-id" = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' skaffold.yaml + yq -i '.manifests.helm.releases[0].overrides.serviceAccount.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' skaffold.yaml + + yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml + az-fetch-network-info: $(eval AZURE_VNET_NAME=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].name")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_VNET_NAME")) .value = "$(AZURE_VNET_NAME)"' skaffold.yaml diff --git a/hack/azure/aks-savm.bicep b/hack/azure/aks-savm.bicep index c45b5511f..4db440099 100644 --- a/hack/azure/aks-savm.bicep +++ b/hack/azure/aks-savm.bicep @@ -67,12 +67,6 @@ resource aks 'Microsoft.ContainerService/managedClusters@2023-01-02-preview' = { serviceCidr: '10.0.0.0/16' dnsServiceIP: '10.0.0.10' dockerBridgeCidr: '172.17.0.1/16' - } - oidcIssuerProfile: { - enabled: true - } - workloadIdentity: { - enabled: true } } } diff --git a/skaffold.yaml b/skaffold.yaml index b98930a94..d9e4fd1ed 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -50,9 +50,9 @@ manifests: - name: LOCATION value: westus2 - name: ARM_USE_NEW_CRED_WORKFLOW - value: "true" - - name: ARM_USE_MANAGED_IDENTITY_EXTENSION value: "false" + - name: ARM_USE_MANAGED_IDENTITY_EXTENSION + value: "true" - name: ARM_USER_ASSIGNED_IDENTITY_ID value: "" - name: AZURE_NODE_RESOURCE_GROUP From 74d0074ddc484462c122f678f04a2868a39fa5b5 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 07:25:43 -0800 Subject: [PATCH 11/27] adding in more logging --- pkg/auth/autorest_auth.go | 1 + pkg/auth/cred.go | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/pkg/auth/autorest_auth.go b/pkg/auth/autorest_auth.go index 8554caaf4..3013a242f 100644 --- a/pkg/auth/autorest_auth.go +++ b/pkg/auth/autorest_auth.go @@ -31,6 +31,7 @@ import ( func NewAuthorizer(config *Config, env *azure.Environment) (autorest.Authorizer, error) { if config.UseNewCredWorkflow { + klog.V(2).Infoln("auth: using workload identity for new authorizer") cred, err := azidentity.NewDefaultAzureCredential(nil) if err != nil { return nil, fmt.Errorf("default cred: %w", err) diff --git a/pkg/auth/cred.go b/pkg/auth/cred.go index 10e3beaae..bb96b7ed5 100644 --- a/pkg/auth/cred.go +++ b/pkg/auth/cred.go @@ -21,6 +21,7 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "k8s.io/klog/v2" ) // NewCredential provides a token credential for msi and service principal auth @@ -30,10 +31,12 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) { } if cfg.UseNewCredWorkflow { + klog.V(2).Infoln("cred: using workload identity for new credential") return azidentity.NewDefaultAzureCredential(nil) } if cfg.UseManagedIdentityExtension || cfg.AADClientID == "msi" { + klog.V(2).Infoln("cred: using msi for new credential") msiCred, err := azidentity.NewManagedIdentityCredential(&azidentity.ManagedIdentityCredentialOptions{ ID: azidentity.ClientID(cfg.UserAssignedIdentityID), }) @@ -43,6 +46,7 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) { return msiCred, nil } // service principal case + klog.V(2).Infoln("cred: using sp for new credential") cred, err := azidentity.NewClientSecretCredential(cfg.TenantID, cfg.AADClientID, cfg.AADClientSecret, nil) if err != nil { return nil, err From 33b7d7f2e0c19cc3179173e0c8f81fd6e3407169 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 07:38:10 -0800 Subject: [PATCH 12/27] update github actions --- .github/actions/e2e/create-cluster/action.yaml | 7 ++++++- Makefile-az.mk | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 2b0400bca..5f0e01acf 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -54,6 +54,11 @@ runs: client-id: ${{ inputs.client-id }} tenant-id: ${{ inputs.tenant-id }} subscription-id: ${{ inputs.subscription-id }} + - name: create workload id + shell: bash + run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - name: update azure perms shell: bash - run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm + run: | + AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm + AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr diff --git a/Makefile-az.mk b/Makefile-az.mk index 40db19c48..b6832439e 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -45,7 +45,7 @@ az-create-workload-id-msi: $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) # create the workload MSI that is the backing for the karpenter pod auth - az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" --subscription "${AZURE_SUBSCRIPTION_ID}" + az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" # create federated credential linked to the karpenter service account for auth usage az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${SYSTEM_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange From 847324f80d2510e9577f704bb8a65edfd84463d0 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 07:57:00 -0800 Subject: [PATCH 13/27] revert bicep file --- hack/azure/aks-savm.bicep | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hack/azure/aks-savm.bicep b/hack/azure/aks-savm.bicep index 4db440099..6ad992d2a 100644 --- a/hack/azure/aks-savm.bicep +++ b/hack/azure/aks-savm.bicep @@ -38,7 +38,7 @@ resource vnet 'Microsoft.Network/virtualNetworks@2022-05-01' = { //resource podsubnet 'subnets' existing = { name: 'podsubnet' } } -resource aks 'Microsoft.ContainerService/managedClusters@2023-01-02-preview' = { +resource aks 'Microsoft.ContainerService/managedClusters@2022-07-01' = { location: location name: aksname identity: { @@ -67,7 +67,7 @@ resource aks 'Microsoft.ContainerService/managedClusters@2023-01-02-preview' = { serviceCidr: '10.0.0.0/16' dnsServiceIP: '10.0.0.10' dockerBridgeCidr: '172.17.0.1/16' - } + } } } From a4661f52c56ac89b178f37149cd34ee069dd227e Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 07:58:52 -0800 Subject: [PATCH 14/27] update TODO comment --- pkg/auth/autorest_auth.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/auth/autorest_auth.go b/pkg/auth/autorest_auth.go index 3013a242f..95a32d895 100644 --- a/pkg/auth/autorest_auth.go +++ b/pkg/auth/autorest_auth.go @@ -30,6 +30,7 @@ import ( ) func NewAuthorizer(config *Config, env *azure.Environment) (autorest.Authorizer, error) { + // TODO (charliedmcb): need to get track 2 support for the skewer API, and align all auth under workload identity in the same way within cred.go if config.UseNewCredWorkflow { klog.V(2).Infoln("auth: using workload identity for new authorizer") cred, err := azidentity.NewDefaultAzureCredential(nil) @@ -54,7 +55,6 @@ func newServicePrincipalTokenFromCredentials(config *Config, env *azure.Environm return nil, fmt.Errorf("creating the OAuth config: %w", err) } - // TODO (charliedmcb): look at updating this with the new workload identity logic. Would be nice if we could align all the auth. if config.UseManagedIdentityExtension { klog.V(2).Infoln("azure: using managed identity extension to retrieve access token") msiEndpoint, err := adal.GetMSIVMEndpoint() From d25eae549c400df2a3d5ab164455592911c603eb Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 08:04:41 -0800 Subject: [PATCH 15/27] fix github action for now by making two commands --- .github/actions/e2e/create-cluster/action.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 5f0e01acf..a85e2b758 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -57,8 +57,9 @@ runs: - name: create workload id shell: bash run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - - name: update azure perms + - name: update perms shell: bash - run: | - AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm - AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr + run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-per + - name: update perms for acr + shell: bash + run: AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr From 119e02c7ce3d1875e2aacffc8a2fa094ef53e978 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 08:09:34 -0800 Subject: [PATCH 16/27] fix action --- .github/actions/e2e/create-cluster/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index a85e2b758..7b8083aaa 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -59,7 +59,7 @@ runs: run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - name: update perms shell: bash - run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-per + run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm - name: update perms for acr shell: bash run: AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr From 63a32e60beed6262cf8720901e40743e287c4a5a Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 08:11:12 -0800 Subject: [PATCH 17/27] Revert "fix action" This reverts commit 119e02c7ce3d1875e2aacffc8a2fa094ef53e978. --- .github/actions/e2e/create-cluster/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 7b8083aaa..a85e2b758 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -59,7 +59,7 @@ runs: run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - name: update perms shell: bash - run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm + run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-per - name: update perms for acr shell: bash run: AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr From e5a5ea3cad6f755d68eea8d76be80b1bf962d760 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 08:11:24 -0800 Subject: [PATCH 18/27] Revert "fix github action for now by making two commands" This reverts commit d25eae549c400df2a3d5ab164455592911c603eb. --- .github/actions/e2e/create-cluster/action.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index a85e2b758..5f0e01acf 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -57,9 +57,8 @@ runs: - name: create workload id shell: bash run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - - name: update perms + - name: update azure perms shell: bash - run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-per - - name: update perms for acr - shell: bash - run: AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr + run: | + AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm + AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr From d16dbfdb9138884e5e1e59542cda6b2d44465c37 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 14:53:46 -0800 Subject: [PATCH 19/27] updating multi-line action --- .github/actions/e2e/create-cluster/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 5f0e01acf..ddfae1f41 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -59,6 +59,6 @@ runs: run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - name: update azure perms shell: bash - run: | + run: | AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr From 443d530ef0e0f24fba13c42c88f53d859b62e1b1 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 14:57:55 -0800 Subject: [PATCH 20/27] get correct indentation --- .github/actions/e2e/create-cluster/action.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index ddfae1f41..ab812ce62 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -60,5 +60,5 @@ runs: - name: update azure perms shell: bash run: | - AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm - AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr + AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm + AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr From 8181727e66791b416513efb35fd6ee1daf1efd97 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 15:10:51 -0800 Subject: [PATCH 21/27] add in sleep --- .github/actions/e2e/create-cluster/action.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index ab812ce62..5e37f6d61 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -57,6 +57,9 @@ runs: - name: create workload id shell: bash run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi + - name: wait for id to settle + shell: bash + run: sleep 10 - name: update azure perms shell: bash run: | From e8651880d2d6946afae7db0be4ef38a495b18f6f Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 9 Jan 2024 15:23:55 -0800 Subject: [PATCH 22/27] update msi wait action --- .github/actions/e2e/create-cluster/action.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 5e37f6d61..40211d0c9 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -57,9 +57,9 @@ runs: - name: create workload id shell: bash run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - - name: wait for id to settle + - name: pause for msi to become available shell: bash - run: sleep 10 + run: sleep 5 - name: update azure perms shell: bash run: | From 76182a748fe88a68e7beafc7beb1a23cb7a0107f Mon Sep 17 00:00:00 2001 From: Charlie McBride <33269602+charliedmcb@users.noreply.github.com> Date: Wed, 10 Jan 2024 18:36:58 +0000 Subject: [PATCH 23/27] make presubmit --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 1d68aa1a7..e3426509c 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( github.com/go-logr/zapr v1.3.0 github.com/go-playground/validator/v10 v10.13.0 github.com/imdario/mergo v0.3.16 + github.com/jongio/azidext/go/azidext v0.5.0 github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/onsi/ginkgo/v2 v2.13.2 github.com/onsi/gomega v1.29.0 @@ -92,7 +93,6 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/jongio/azidext/go/azidext v0.5.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/kelseyhightower/envconfig v1.4.0 // indirect diff --git a/go.sum b/go.sum index 874354921..ea0dd7147 100644 --- a/go.sum +++ b/go.sum @@ -283,6 +283,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= +github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= github.com/jongio/azidext/go/azidext v0.5.0 h1:uPInXD4NZ3J0k79FPwIA0YXknFn+WcqZqSgs3/jPgvQ= github.com/jongio/azidext/go/azidext v0.5.0/go.mod h1:TVRX/hJhzbsCKaOIzicH6a8IvOH0hpjWk/JwZZgtXeU= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= From 2ef690123b585f2e3112fa2c8f2839ade454443e Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Wed, 10 Jan 2024 10:53:49 -0800 Subject: [PATCH 24/27] remove alias from note, and fix action --- .github/actions/e2e/create-cluster/action.yaml | 2 +- Makefile-az.mk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 40211d0c9..dc1752a09 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -59,7 +59,7 @@ runs: run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - name: pause for msi to become available shell: bash - run: sleep 5 + run: sleep 10 - name: update azure perms shell: bash run: | diff --git a/Makefile-az.mk b/Makefile-az.mk index b6832439e..3f131a188 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -117,7 +117,7 @@ az-rmvmss-vms: ## Delete all VMs in VMSS Flex (use with care!) az vmss delete-instances --name $(AZURE_CLUSTER_NAME)-vmss --resource-group $(AZURE_RESOURCE_GROUP_MC) --instance-ids '*' az-perm: ## Create role assignments to let Karpenter manage VMs and Network - # Note (charliedmcb): need to be principalId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API" + # Note: need to be principalId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API" $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor" az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" From 13b41a380f3690890bde086a1e7c636b3e95d1bc Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Wed, 17 Jan 2024 11:45:04 -0800 Subject: [PATCH 25/27] change to have create MSI happen before create cluster --- .github/actions/e2e/create-cluster/action.yaml | 10 +++++----- Makefile-az.mk | 10 ++++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index dc1752a09..1ebae2d06 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -45,6 +45,9 @@ runs: - name: az set sub shell: bash run: az account set --subscription ${{ inputs.subscription-id }} + - name: create workload msi + shell: bash + run: AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-msi - name: create cluster shell: bash run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} AZURE_LOCATION=${{ inputs.location }} make az-mkaks-cilium @@ -54,12 +57,9 @@ runs: client-id: ${{ inputs.client-id }} tenant-id: ${{ inputs.tenant-id }} subscription-id: ${{ inputs.subscription-id }} - - name: create workload id - shell: bash - run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi - - name: pause for msi to become available + - name: create federated cred shell: bash - run: sleep 10 + run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} make az-create-federated-cred - name: update azure perms shell: bash run: | diff --git a/Makefile-az.mk b/Makefile-az.mk index 3f131a188..2f59d7580 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -14,7 +14,7 @@ KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID -az-all: az-login az-mkaks-cilium az-create-workload-id-msi az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload +az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines az-login: ## Login into Azure @@ -41,11 +41,13 @@ az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane c az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter -az-create-workload-id-msi: - $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) - +az-create-workload-msi: # create the workload MSI that is the backing for the karpenter pod auth az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" + +az-create-federated-cred: + $(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv)) + # create federated credential linked to the karpenter service account for auth usage az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${SYSTEM_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange From 77f28fb985d1723f068a1fb66b57a795730b8745 Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 23 Jan 2024 12:46:01 -0800 Subject: [PATCH 26/27] update naming to UseCredentialFromEnvironment --- Makefile-az.mk | 2 +- pkg/auth/autorest_auth.go | 2 +- pkg/auth/config.go | 22 +++++++++++----------- pkg/auth/cred.go | 2 +- skaffold.yaml | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 2f59d7580..b6247c037 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -96,7 +96,7 @@ az-patch-skaffold-azureoverlay: az-patch-skaffold az-fetch-network-info # old identity path is still the default, so need to override the values values with new logic. # TODO (chmcbrid): update the new logic path as the default. $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_NEW_CRED_WORKFLOW")) .value = "true"' skaffold.yaml + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_CREDENTIAL_FROM_ENVIRONMENT")) .value = "true"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_MANAGED_IDENTITY_EXTENSION")) .value = "false"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = ""' skaffold.yaml diff --git a/pkg/auth/autorest_auth.go b/pkg/auth/autorest_auth.go index 95a32d895..133e631a7 100644 --- a/pkg/auth/autorest_auth.go +++ b/pkg/auth/autorest_auth.go @@ -31,7 +31,7 @@ import ( func NewAuthorizer(config *Config, env *azure.Environment) (autorest.Authorizer, error) { // TODO (charliedmcb): need to get track 2 support for the skewer API, and align all auth under workload identity in the same way within cred.go - if config.UseNewCredWorkflow { + if config.UseCredentialFromEnvironment { klog.V(2).Infoln("auth: using workload identity for new authorizer") cred, err := azidentity.NewDefaultAzureCredential(nil) if err != nil { diff --git a/pkg/auth/config.go b/pkg/auth/config.go index 57014107a..b2db025a3 100644 --- a/pkg/auth/config.go +++ b/pkg/auth/config.go @@ -68,13 +68,13 @@ type Config struct { AuthMethod string `json:"authMethod" yaml:"authMethod"` // Settings for a service principal. - AADClientID string `json:"aadClientId" yaml:"aadClientId"` - AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"` - AADClientCertPath string `json:"aadClientCertPath" yaml:"aadClientCertPath"` - AADClientCertPassword string `json:"aadClientCertPassword" yaml:"aadClientCertPassword"` - UseNewCredWorkflow bool `json:"useNewCredWorkflow" yaml:"useNewCredWorkflow"` - UseManagedIdentityExtension bool `json:"useManagedIdentityExtension" yaml:"useManagedIdentityExtension"` - UserAssignedIdentityID string `json:"userAssignedIdentityID" yaml:"userAssignedIdentityID"` + AADClientID string `json:"aadClientId" yaml:"aadClientId"` + AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"` + AADClientCertPath string `json:"aadClientCertPath" yaml:"aadClientCertPath"` + AADClientCertPassword string `json:"aadClientCertPassword" yaml:"aadClientCertPassword"` + UseCredentialFromEnvironment bool `json:"useCredentialFromEnvironment" yaml:"useCredentialFromEnvironment"` + UseManagedIdentityExtension bool `json:"useManagedIdentityExtension" yaml:"useManagedIdentityExtension"` + UserAssignedIdentityID string `json:"userAssignedIdentityID" yaml:"userAssignedIdentityID"` //Configs only for AKS ClusterName string `json:"clusterName" yaml:"clusterName"` @@ -115,13 +115,13 @@ func (cfg *Config) BaseVars() { } func (cfg *Config) prepareID() error { - useNewCredWorkflowFromEnv := os.Getenv("ARM_USE_NEW_CRED_WORKFLOW") - if len(useNewCredWorkflowFromEnv) > 0 { - shouldUse, err := strconv.ParseBool(useNewCredWorkflowFromEnv) + useCredentialFromEnvironmentFromEnv := os.Getenv("ARM_USE_CREDENTIAL_FROM_ENVIRONMENT") + if len(useCredentialFromEnvironmentFromEnv) > 0 { + shouldUse, err := strconv.ParseBool(useCredentialFromEnvironmentFromEnv) if err != nil { return err } - cfg.UseNewCredWorkflow = shouldUse + cfg.UseCredentialFromEnvironment = shouldUse } useManagedIdentityExtensionFromEnv := os.Getenv("ARM_USE_MANAGED_IDENTITY_EXTENSION") if len(useManagedIdentityExtensionFromEnv) > 0 { diff --git a/pkg/auth/cred.go b/pkg/auth/cred.go index bb96b7ed5..bd1df61fd 100644 --- a/pkg/auth/cred.go +++ b/pkg/auth/cred.go @@ -30,7 +30,7 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) { return nil, fmt.Errorf("failed to create credential, nil config provided") } - if cfg.UseNewCredWorkflow { + if cfg.UseCredentialFromEnvironment { klog.V(2).Infoln("cred: using workload identity for new credential") return azidentity.NewDefaultAzureCredential(nil) } diff --git a/skaffold.yaml b/skaffold.yaml index a1225907e..1d4105ff9 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -49,7 +49,7 @@ manifests: value: "Please run make az-all" - name: LOCATION value: westus2 - - name: ARM_USE_NEW_CRED_WORKFLOW + - name: ARM_USE_CREDENTIAL_FROM_ENVIRONMENT value: "false" - name: ARM_USE_MANAGED_IDENTITY_EXTENSION value: "true" From d7a4608b17223be11a12f39604288f3877aa4cad Mon Sep 17 00:00:00 2001 From: Charlie McBride Date: Tue, 23 Jan 2024 12:55:53 -0800 Subject: [PATCH 27/27] add in old logic for savm --- Makefile-az.mk | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index b6247c037..033fc7154 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -15,7 +15,7 @@ AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload -az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines +az-all-savm: az-login az-mkaks-savm az-perm-savm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines az-login: ## Login into Azure az login @@ -127,6 +127,15 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here @echo Consider "make az-patch-skaffold"! +az-perm-savm: ## Create role assignments to let Karpenter manage VMs and Network + # Note: savm has not been converted over to use a workload identity + $(eval AZURE_OBJECT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.objectId")) + az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor" + az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" + az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator" + az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here + @echo Consider "make az-patch-skaffold"! + az-perm-acr: $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) $(eval AZURE_ACR_ID=$(shell az acr show --name $(AZURE_ACR_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".id"))