Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(identity): add support for using workload id for karpenter pod #84

Merged
merged 34 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
c7c84d7
add in new make targets for workload id
Jan 4, 2024
f04c8c6
update working code
charliedmcb Jan 5, 2024
3a7ae87
saving possible working version
charliedmcb Jan 5, 2024
455adea
plug in the needed make commands and update acr perms for workload id
charliedmcb Jan 5, 2024
4cd29e0
update to allow sku client to still use SP based off of the workload id
charliedmcb Jan 5, 2024
da40a1c
update to principalId for e2e flow
charliedmcb Jan 8, 2024
74d0074
adding in more logging
Jan 9, 2024
33b7d7f
update github actions
Jan 9, 2024
847324f
revert bicep file
Jan 9, 2024
a4661f5
update TODO comment
Jan 9, 2024
d25eae5
fix github action for now by making two commands
Jan 9, 2024
119e02c
fix action
Jan 9, 2024
63a32e6
Revert "fix action"
Jan 9, 2024
e5a5ea3
Revert "fix github action for now by making two commands"
Jan 9, 2024
48bdef2
update sku to use new workload id auth patterning as the backing with…
charliedmcb Jan 9, 2024
1bb045c
Merge branch 'main' into charliedmcb/useWorkloadIdForKarpenter
charliedmcb Jan 9, 2024
f89ddc0
support savm path
Jan 9, 2024
854ea84
fix bicept format
charliedmcb Jan 9, 2024
e05e108
update so only azureoverlay path is on the new workload id for now
Jan 9, 2024
d16dbfd
updating multi-line action
Jan 9, 2024
443d530
get correct indentation
Jan 9, 2024
8181727
add in sleep
Jan 9, 2024
e865188
update msi wait action
Jan 9, 2024
76182a7
make presubmit
charliedmcb Jan 10, 2024
2ef6901
remove alias from note, and fix action
Jan 10, 2024
13b41a3
change to have create MSI happen before create cluster
Jan 17, 2024
14eae26
Merge branch 'main' into charliedmcb/useWorkloadIdForKarpenter
charliedmcb Jan 17, 2024
04452c9
Merge branch 'main' into charliedmcb/useWorkloadIdForKarpenter
charliedmcb Jan 18, 2024
77f28fb
update naming to UseCredentialFromEnvironment
Jan 23, 2024
d7a4608
add in old logic for savm
Jan 23, 2024
6a36134
Merge branch 'main' into charliedmcb/useWorkloadIdForKarpenter
charliedmcb Jan 25, 2024
a87eb6b
Merge branch 'main' into charliedmcb/useWorkloadIdForKarpenter
charliedmcb Jan 25, 2024
0067105
Merge branch 'main' into charliedmcb/useWorkloadIdForKarpenter
charliedmcb Jan 30, 2024
6b87a35
Merge branch 'main' into charliedmcb/useWorkloadIdForKarpenter
charliedmcb Jan 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/actions/e2e/create-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ runs:
client-id: ${{ inputs.client-id }}
tenant-id: ${{ inputs.tenant-id }}
subscription-id: ${{ inputs.subscription-id }}
- name: create workload id
shell: bash
run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-create-workload-id-msi
- name: update azure perms
shell: bash
run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm
run: |
AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-perm
AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_ACR_NAME=${{ inputs.acr_name }} make az-perm-acr
47 changes: 36 additions & 11 deletions Makefile-az.mk
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ endif
AZURE_CLUSTER_NAME ?= karpenter
AZURE_RESOURCE_GROUP_MC = MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION)

az-all: az-login az-mkaks-cilium az-perm az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload
az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines
KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa
AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi
KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID

az-all: az-login az-mkaks-cilium az-create-workload-id-msi az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload
az-all-savm: az-login az-mkaks-savm az-perm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines

az-login: ## Login into Azure
az login
Expand All @@ -32,10 +36,19 @@ az-mkaks: az-mkacr ## Create test AKS cluster (with --vm-set-type AvailabilitySe

az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane cilium, --network-plugin cilium, and --network-plugin-mode overlay)
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \
--enable-oidc-issuer --enable-workload-identity
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing
skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter

az-create-workload-id-msi:
$(eval AKS_OIDC_ISSUER=$(shell az aks show -n "${AZURE_CLUSTER_NAME}" -g "${AZURE_RESOURCE_GROUP}" --query "oidcIssuerProfile.issuerUrl" -otsv))

# create the workload MSI that is the backing for the karpenter pod auth
az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}"
# create federated credential linked to the karpenter service account for auth usage
az identity federated-credential create --name ${KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME} --identity-name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"${SYSTEM_NAMESPACE}":"${KARPENTER_SERVICE_ACCOUNT_NAME}" --audience api://AzureADTokenExchange

az-mkaks-savm: az-mkrg ## Create experimental cluster with standalone VMs (+ ACR)
az deployment group create --resource-group $(AZURE_RESOURCE_GROUP) --template-file hack/azure/aks-savm.bicep --parameters aksname=$(AZURE_CLUSTER_NAME) acrname=$(AZURE_ACR_NAME)
az aks get-credentials --resource-group $(AZURE_RESOURCE_GROUP) --name $(AZURE_CLUSTER_NAME) --overwrite-existing
Expand Down Expand Up @@ -78,6 +91,18 @@ az-patch-skaffold-azureoverlay: az-patch-skaffold az-fetch-network-info
yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml
yq -i '.manifests.helm.releases[0].overrides.settings.azure.networkPlugin = "azure"' skaffold.yaml

# old identity path is still the default, so need to override the values values with new logic.
# TODO (chmcbrid): update the new logic path as the default.
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv))
yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_NEW_CRED_WORKFLOW")) .value = "true"' skaffold.yaml
yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USE_MANAGED_IDENTITY_EXTENSION")) .value = "false"' skaffold.yaml
yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="ARM_USER_ASSIGNED_IDENTITY_ID")) .value = ""' skaffold.yaml

yq -i '.manifests.helm.releases[0].overrides.serviceAccount.annotations."azure.workload.identity/client-id" = "$(KARPENTER_USER_ASSIGNED_CLIENT_ID)"' skaffold.yaml
yq -i '.manifests.helm.releases[0].overrides.serviceAccount.name = "$(KARPENTER_SERVICE_ACCOUNT_NAME)"' skaffold.yaml

yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml

az-fetch-network-info:
$(eval AZURE_VNET_NAME=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].name"))
yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_VNET_NAME")) .value = "$(AZURE_VNET_NAME)"' skaffold.yaml
Expand All @@ -92,18 +117,18 @@ az-rmvmss-vms: ## Delete all VMs in VMSS Flex (use with care!)
az vmss delete-instances --name $(AZURE_CLUSTER_NAME)-vmss --resource-group $(AZURE_RESOURCE_GROUP_MC) --instance-ids '*'

az-perm: ## Create role assignments to let Karpenter manage VMs and Network
# Note (charliedmcb): need to be objectId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API"
$(eval AZURE_OBJECT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.objectId"))
az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor"
az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor"
az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator"
az role assignment create --assignee $(AZURE_OBJECT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here
# Note (charliedmcb): need to be principalId for E2E workflow as the pipeline identity doesn't have permissions to "query Graph API"
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Virtual Machine Contributor"
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor"
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator"
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here
@echo Consider "make az-patch-skaffold"!

az-perm-acr:
$(eval AZURE_CLIENT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.clientId"))
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
$(eval AZURE_ACR_ID=$(shell az acr show --name $(AZURE_ACR_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".id"))
az role assignment create --assignee $(AZURE_CLIENT_ID) --scope $(AZURE_ACR_ID) --role "AcrPull"
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope $(AZURE_ACR_ID) --role "AcrPull"

az-aks-check-acr:
az aks check-acr --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --acr $(AZURE_ACR_NAME)
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 // indirect
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jongio/azidext/go/azidext v0.5.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kelseyhightower/envconfig v1.4.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jongio/azidext/go/azidext v0.5.0 h1:uPInXD4NZ3J0k79FPwIA0YXknFn+WcqZqSgs3/jPgvQ=
github.com/jongio/azidext/go/azidext v0.5.0/go.mod h1:TVRX/hJhzbsCKaOIzicH6a8IvOH0hpjWk/JwZZgtXeU=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
Expand Down
4 changes: 2 additions & 2 deletions hack/azure/aks-savm.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ resource vnet 'Microsoft.Network/virtualNetworks@2022-05-01' = {
//resource podsubnet 'subnets' existing = { name: 'podsubnet' }
}

resource aks 'Microsoft.ContainerService/managedClusters@2022-07-01' = {
resource aks 'Microsoft.ContainerService/managedClusters@2023-01-02-preview' = {
location: location
name: aksname
identity: {
Expand Down Expand Up @@ -67,7 +67,7 @@ resource aks 'Microsoft.ContainerService/managedClusters@2022-07-01' = {
serviceCidr: '10.0.0.0/16'
dnsServiceIP: '10.0.0.10'
dockerBridgeCidr: '172.17.0.1/16'
}
}
}
}

Expand Down
15 changes: 14 additions & 1 deletion pkg/auth/autorest_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,22 @@ import (
"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/adal"
"github.com/Azure/go-autorest/autorest/azure"
klog "k8s.io/klog/v2"
"k8s.io/klog/v2"

"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/jongio/azidext/go/azidext"
)

func NewAuthorizer(config *Config, env *azure.Environment) (autorest.Authorizer, error) {
if config.UseNewCredWorkflow {
klog.V(2).Infoln("auth: using workload identity for new authorizer")
cred, err := azidentity.NewDefaultAzureCredential(nil)
if err != nil {
return nil, fmt.Errorf("default cred: %w", err)
}
return azidext.NewTokenCredentialAdapter(cred, []string{azidext.DefaultManagementScope}), nil
}

token, err := newServicePrincipalTokenFromCredentials(config, env)
if err != nil {
return nil, fmt.Errorf("retrieve service principal token: %w", err)
Expand All @@ -42,6 +54,7 @@ func newServicePrincipalTokenFromCredentials(config *Config, env *azure.Environm
return nil, fmt.Errorf("creating the OAuth config: %w", err)
}

// TODO (charliedmcb): look at updating this with the new workload identity logic. Would be nice if we could align all the auth.
if config.UseManagedIdentityExtension {
klog.V(2).Infoln("azure: using managed identity extension to retrieve access token")
msiEndpoint, err := adal.GetMSIVMEndpoint()
Expand Down
13 changes: 11 additions & 2 deletions pkg/auth/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ type Config struct {
AADClientSecret string `json:"aadClientSecret" yaml:"aadClientSecret"`
AADClientCertPath string `json:"aadClientCertPath" yaml:"aadClientCertPath"`
AADClientCertPassword string `json:"aadClientCertPassword" yaml:"aadClientCertPassword"`
UseNewCredWorkflow bool `json:"useNewCredWorkflow" yaml:"useNewCredWorkflow"`
UseManagedIdentityExtension bool `json:"useManagedIdentityExtension" yaml:"useManagedIdentityExtension"`
UserAssignedIdentityID string `json:"userAssignedIdentityID" yaml:"userAssignedIdentityID"`

Expand All @@ -87,7 +88,7 @@ type Config struct {

func (cfg *Config) PrepareConfig() error {
cfg.BaseVars()
err := cfg.prepareMSI()
err := cfg.prepareID()
if err != nil {
return err
}
Expand All @@ -113,7 +114,15 @@ func (cfg *Config) BaseVars() {
// cfg.VnetGuid = os.Getenv("AZURE_VNET_GUID") // This field needs to be resolved inside of karpenter, so we will get it in the azClient initialization
}

func (cfg *Config) prepareMSI() error {
func (cfg *Config) prepareID() error {
useNewCredWorkflowFromEnv := os.Getenv("ARM_USE_NEW_CRED_WORKFLOW")
if len(useNewCredWorkflowFromEnv) > 0 {
shouldUse, err := strconv.ParseBool(useNewCredWorkflowFromEnv)
if err != nil {
return err
}
cfg.UseNewCredWorkflow = shouldUse
}
useManagedIdentityExtensionFromEnv := os.Getenv("ARM_USE_MANAGED_IDENTITY_EXTENSION")
if len(useManagedIdentityExtensionFromEnv) > 0 {
shouldUse, err := strconv.ParseBool(useManagedIdentityExtensionFromEnv)
Expand Down
8 changes: 8 additions & 0 deletions pkg/auth/cred.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"k8s.io/klog/v2"
)

// NewCredential provides a token credential for msi and service principal auth
Expand All @@ -29,7 +30,13 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) {
return nil, fmt.Errorf("failed to create credential, nil config provided")
}

if cfg.UseNewCredWorkflow {
klog.V(2).Infoln("cred: using workload identity for new credential")
return azidentity.NewDefaultAzureCredential(nil)
}

if cfg.UseManagedIdentityExtension || cfg.AADClientID == "msi" {
klog.V(2).Infoln("cred: using msi for new credential")
msiCred, err := azidentity.NewManagedIdentityCredential(&azidentity.ManagedIdentityCredentialOptions{
ID: azidentity.ClientID(cfg.UserAssignedIdentityID),
})
Expand All @@ -39,6 +46,7 @@ func NewCredential(cfg *Config) (azcore.TokenCredential, error) {
return msiCred, nil
}
// service principal case
klog.V(2).Infoln("cred: using sp for new credential")
cred, err := azidentity.NewClientSecretCredential(cfg.TenantID, cfg.AADClientID, cfg.AADClientSecret, nil)
if err != nil {
return nil, err
Expand Down
2 changes: 2 additions & 0 deletions skaffold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ manifests:
value: ""
- name: LOCATION
value: westus2
- name: ARM_USE_NEW_CRED_WORKFLOW
value: "false"
- name: ARM_USE_MANAGED_IDENTITY_EXTENSION
value: "true"
- name: ARM_USER_ASSIGNED_IDENTITY_ID
Expand Down
Loading