From 5392c8f93cfb567a356fe04b97d043fe77063174 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Thu, 28 Mar 2024 17:11:09 -0700 Subject: [PATCH 01/23] feat: allow karpenter to use a custom vnet Please enter the commit message for your changes. Lines starting --- Makefile-az.mk | 19 ++- pkg/auth/config.go | 10 +- pkg/auth/config_test.go | 6 - .../imagefamily/bootstrap/aksbootstrap.go | 25 +--- pkg/providers/imagefamily/resolver.go | 36 +++++ pkg/providers/instance/azure_client.go | 9 +- pkg/providers/instancetype/suite_test.go | 83 ++++++++---- pkg/test/environment.go | 14 +- pkg/utils/subnet_parser.go | 56 ++++++++ pkg/utils/subnet_parser_test.go | 127 ++++++++++++++++++ skaffold.yaml | 4 - 11 files changed, 300 insertions(+), 89 deletions(-) create mode 100644 pkg/utils/subnet_parser.go create mode 100644 pkg/utils/subnet_parser_test.go diff --git a/Makefile-az.mk b/Makefile-az.mk index 04e14b30e..1c11c68b9 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -82,17 +82,17 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="KUBELET_BOOTSTRAP_TOKEN")).value = "$(BOOTSTRAP_TOKEN)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="SSH_PUBLIC_KEY")).value = "$(SSH_PUBLIC_KEY)"' skaffold.yaml -az-patch-skaffold-kubenet: az-patch-skaffold az-fetch-network-info - $(eval AZURE_SUBNET_ID=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].id")) +az-patch-skaffold-kubenet: az-patch-skaffold + $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "kubenet"' skaffold.yaml -az-patch-skaffold-azure: az-patch-skaffold az-fetch-network-info +az-patch-skaffold-azure: az-patch-skaffold $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml -az-patch-skaffold-azureoverlay: az-patch-skaffold az-fetch-network-info - $(eval AZURE_SUBNET_ID=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].id")) +az-patch-skaffold-azureoverlay: az-patch-skaffold + $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "azure"' skaffold.yaml @@ -108,12 +108,6 @@ az-patch-skaffold-azureoverlay: az-patch-skaffold az-fetch-network-info yq -i '.manifests.helm.releases[0].overrides.podLabels ."azure.workload.identity/use" = "true"' skaffold.yaml -az-fetch-network-info: - $(eval AZURE_VNET_NAME=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].name")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_VNET_NAME")) .value = "$(AZURE_VNET_NAME)"' skaffold.yaml - $(eval AZURE_SUBNET_NAME=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].name")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_NAME")) .value = "$(AZURE_SUBNET_NAME)"' skaffold.yaml - az-mkvmssflex: ## Create VMSS Flex (optional, only if creating VMs referencing this VMSS) az vmss create --name $(AZURE_CLUSTER_NAME)-vmss --resource-group $(AZURE_RESOURCE_GROUP_MC) --location $(AZURE_LOCATION) \ --instance-count 0 --orchestration-mode Flexible --platform-fault-domain-count 1 --zones 1 2 3 @@ -235,6 +229,9 @@ az-rmnodeclaims: ## kubectl delete all nodeclaims; don't wait for finalizers (us az-taintsystemnodes: ## Taint all system nodepool nodes kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --selector='kubernetes.azure.com/mode=system' --overwrite +az-taintnodes: ## Run e2etests + kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all --overwrite + az-e2etests: ## Run e2etests kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all --overwrite TEST_SUITE=Utilization make e2etests diff --git a/pkg/auth/config.go b/pkg/auth/config.go index b2db025a3..b9eda69fb 100644 --- a/pkg/auth/config.go +++ b/pkg/auth/config.go @@ -81,9 +81,7 @@ type Config struct { //Config only for AKS NodeResourceGroup string `json:"nodeResourceGroup" yaml:"nodeResourceGroup"` //SubnetId is the resource ID of the subnet that VM network interfaces should use - SubnetID string `json:"subnetId" yaml:"subnetId"` - VnetName string `json:"vnetName" yaml:"vnetName"` - SubnetName string `json:"subnetName" yaml:"subnetName"` + SubnetID string `json:"subnetId" yaml:"subnetId"` } func (cfg *Config) PrepareConfig() error { @@ -109,8 +107,6 @@ func (cfg *Config) BaseVars() { cfg.ClusterName = os.Getenv("AZURE_CLUSTER_NAME") cfg.NodeResourceGroup = os.Getenv("AZURE_NODE_RESOURCE_GROUP") cfg.SubnetID = os.Getenv("AZURE_SUBNET_ID") - cfg.SubnetName = os.Getenv("AZURE_SUBNET_NAME") - cfg.VnetName = os.Getenv("AZURE_VNET_NAME") // cfg.VnetGuid = os.Getenv("AZURE_VNET_GUID") // This field needs to be resolved inside of karpenter, so we will get it in the azClient initialization } @@ -187,8 +183,6 @@ func (cfg *Config) TrimSpace() { cfg.ClusterName = strings.TrimSpace(cfg.ClusterName) cfg.NodeResourceGroup = strings.TrimSpace(cfg.NodeResourceGroup) cfg.SubnetID = strings.TrimSpace(cfg.SubnetID) - cfg.SubnetName = strings.TrimSpace(cfg.SubnetName) - cfg.VnetName = strings.TrimSpace(cfg.VnetName) } func (cfg *Config) validate() error { @@ -200,8 +194,6 @@ func (cfg *Config) validate() error { // Even though the config doesnt use some of these, // its good to validate they were set in the environment {cfg.SubnetID, "subnet ID"}, - {cfg.SubnetName, "subnet name"}, - {cfg.VnetName, "vnet name"}, } for _, field := range fields { diff --git a/pkg/auth/config_test.go b/pkg/auth/config_test.go index 3b35790d9..12384adfc 100644 --- a/pkg/auth/config_test.go +++ b/pkg/auth/config_test.go @@ -41,8 +41,6 @@ func TestBuildAzureConfig(t *testing.T) { ResourceGroup: "my-rg", NodeResourceGroup: "my-node-rg", SubnetID: "12345", - SubnetName: "my-subnet", - VnetName: "my-vnet", VMType: "vmss", }, wantErr: false, @@ -62,8 +60,6 @@ func TestBuildAzureConfig(t *testing.T) { ResourceGroup: "my-rg", NodeResourceGroup: "my-node-rg", SubnetID: "12345", - SubnetName: "my-subnet", - VnetName: "my-vnet", VMType: "vm", }, wantErr: false, @@ -98,8 +94,6 @@ func TestBuildAzureConfig(t *testing.T) { ResourceGroup: "my-rg", NodeResourceGroup: "my-node-rg", SubnetID: "12345", - SubnetName: "my-subnet", - VnetName: "my-vnet", VMType: "vmss", UseManagedIdentityExtension: true, UserAssignedIdentityID: "12345", diff --git a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go index 0ba59a481..9de7a02b0 100644 --- a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go +++ b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go @@ -21,7 +21,6 @@ import ( _ "embed" "encoding/base64" "fmt" - "os" "strings" "text/template" @@ -389,16 +388,8 @@ var ( } ) -// Node Labels for Vnet const ( - vnetDataPlaneLabel = "kubernetes.azure.com/ebpf-dataplane" - vnetNetworkNameLabel = "kubernetes.azure.com/network-name" vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" - vnetSubscriptionIDLabel = "kubernetes.azure.com/network-subscription" - vnetGUIDLabel = "kubernetes.azure.com/nodenetwork-vnetguid" - vnetPodNetworkTypeLabel = "kubernetes.azure.com/podnetwork-type" - ciliumDataPlane = "cilium" - overlayNetworkType = "overlay" globalAKSMirror = "https://acs-mirror.azureedge.net" ) @@ -464,21 +455,9 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { kubeletLabels := lo.Assign(kubeletNodeLabelsBase, a.Labels) getAgentbakerGeneratedLabels(a.ResourceGroup, kubeletLabels) - //Adding vnet-related labels to the nodeLabels. - azureVnetGUID := os.Getenv("AZURE_VNET_GUID") - azureVnetName := os.Getenv("AZURE_VNET_NAME") - azureSubnetName := os.Getenv("AZURE_SUBNET_NAME") - - vnetLabels := map[string]string{ - vnetDataPlaneLabel: ciliumDataPlane, - vnetNetworkNameLabel: azureVnetName, - vnetSubnetNameLabel: azureSubnetName, - vnetSubscriptionIDLabel: a.SubscriptionID, - vnetGUIDLabel: azureVnetGUID, - vnetPodNetworkTypeLabel: overlayNetworkType, - } - kubeletLabels = lo.Assign(kubeletLabels, vnetLabels) + nbv.Subnet = a.Labels[vnetSubnetNameLabel] + nbv.KubeletNodeLabels = strings.Join(lo.MapToSlice(kubeletLabels, func(k, v string) string { return fmt.Sprintf("%s=%s", k, v) }), ",") diff --git a/pkg/providers/imagefamily/resolver.go b/pkg/providers/imagefamily/resolver.go index 2c292e959..6b6766b8f 100644 --- a/pkg/providers/imagefamily/resolver.go +++ b/pkg/providers/imagefamily/resolver.go @@ -18,6 +18,7 @@ package imagefamily import ( "context" + "os" core "k8s.io/api/core/v1" "knative.dev/pkg/logging" @@ -28,6 +29,7 @@ import ( "github.com/Azure/karpenter-provider-azure/pkg/providers/imagefamily/bootstrap" "github.com/Azure/karpenter-provider-azure/pkg/providers/instancetype" template "github.com/Azure/karpenter-provider-azure/pkg/providers/launchtemplate/parameters" + "github.com/Azure/karpenter-provider-azure/pkg/utils" "github.com/samber/lo" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -37,12 +39,24 @@ const ( networkPluginAzure = "azure" networkPluginKubenet = "kubenet" + networkPolicyCilium = "cilium" + // defaultKubernetesMaxPodsAzure is the maximum number of pods to run on a node for Azure CNI Overlay. defaultKubernetesMaxPodsAzure = 250 // defaultKubernetesMaxPodsKubenet is the maximum number of pods to run on a node for Kubenet. defaultKubernetesMaxPodsKubenet = 100 // defaultKubernetesMaxPods is the maximum number of pods on a node. defaultKubernetesMaxPods = 110 + + // AzureCNI VNET Labels + vnetDataPlaneLabel = "kubernetes.azure.com/ebpf-dataplane" + vnetNetworkNameLabel = "kubernetes.azure.com/network-name" + vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" + vnetSubscriptionIDLabel = "kubernetes.azure.com/network-subscription" + vnetGUIDLabel = "kubernetes.azure.com/nodenetwork-vnetguid" + vnetPodNetworkTypeLabel = "kubernetes.azure.com/podnetwork-type" + + overlayNetworkType = "overlay" ) // Resolver is able to fill-in dynamic launch template parameters @@ -95,6 +109,12 @@ func (r Resolver) Resolve(ctx context.Context, nodeClass *v1alpha2.AKSNodeClass, instancetype.MemoryAvailable: instanceType.Overhead.EvictionThreshold.Memory().String()} kubeletConfig.MaxPods = lo.ToPtr(getMaxPods(staticParameters.NetworkPlugin)) + if staticParameters.NetworkPlugin == networkPluginAzure { + for k, v := range getAzureCNILabels(nodeClass) { + staticParameters.Labels[k] = v + } + } + logging.FromContext(ctx).Infof("Resolved image %s for instance type %s", imageID, instanceType.Name) template := &template.Parameters{ StaticParameters: staticParameters, @@ -130,3 +150,19 @@ func getMaxPods(networkPlugin string) int32 { } return defaultKubernetesMaxPods } + +// getVnetLabelValues returns the labels for AzureCNI for the vnet and subnet. This function assumes we assert in the auth config that AZURE_VNET_GUID and AZURE_SUBNET_ID are set. +// See how split logic works here: https://go.dev/play/p/l3l7Zrg_pdd. +func getAzureCNILabels(_ *v1alpha2.AKSNodeClass) map[string]string { + // TODO(bsoghigian): this should be refactored to lo.Ternary(nodeClass.Spec.VnetSubnetID != nil, lo.FromPtr(nodeClass.Spec.VnetSubnetID), os.Getenv("AZURE_SUBNET_ID")) when we add VnetSubnetID to the nodeclass + vnetSubnetComponents, _ := utils.GetVnetSubnetIDComponents(os.Getenv("AZURE_SUBNET_ID")) + vnetLabels := map[string]string{ + vnetDataPlaneLabel: networkPolicyCilium, + vnetNetworkNameLabel: vnetSubnetComponents.VNetName, + vnetSubnetNameLabel: vnetSubnetComponents.SubnetName, + vnetSubscriptionIDLabel: vnetSubnetComponents.SubscriptionID, + vnetGUIDLabel: os.Getenv("AZURE_VNET_GUID"), + vnetPodNetworkTypeLabel: overlayNetworkType, + } + return vnetLabels +} diff --git a/pkg/providers/instance/azure_client.go b/pkg/providers/instance/azure_client.go index 52509c17d..52486ba55 100644 --- a/pkg/providers/instance/azure_client.go +++ b/pkg/providers/instance/azure_client.go @@ -35,6 +35,7 @@ import ( "github.com/Azure/karpenter-provider-azure/pkg/providers/imagefamily" "github.com/Azure/karpenter-provider-azure/pkg/providers/instance/skuclient" "github.com/Azure/karpenter-provider-azure/pkg/providers/loadbalancer" + "github.com/Azure/karpenter-provider-azure/pkg/utils" armopts "github.com/Azure/karpenter-provider-azure/pkg/utils/opts" klog "k8s.io/klog/v2" @@ -114,12 +115,16 @@ func CreateAZClient(ctx context.Context, cfg *auth.Config) (*AZClient, error) { } func handleVNET(cfg *auth.Config, vnetClient *armnetwork.VirtualNetworksClient) error { - vnet, err := vnetClient.Get(context.Background(), cfg.NodeResourceGroup, cfg.VnetName, nil) + subnetParts, err := utils.GetVnetSubnetIDComponents(cfg.SubnetID) + if err != nil { + return err + } + vnet, err := vnetClient.Get(context.Background(), subnetParts.ResourceGroupName, subnetParts.VNetName, nil) if err != nil { return err } if vnet.Properties == nil || vnet.Properties.ResourceGUID == nil { - return fmt.Errorf("vnet %s does not have a resource GUID", cfg.VnetName) + return fmt.Errorf("vnet %s does not have a resource GUID", subnetParts.VNetName) } os.Setenv("AZURE_VNET_GUID", lo.FromPtr(vnet.Properties.ResourceGUID)) return nil diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 9de6ac78a..ed9d8ce20 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -110,15 +110,12 @@ var _ = AfterSuite(func() { }) var _ = Describe("InstanceType Provider", func() { - var nodeClass *v1alpha2.AKSNodeClass var nodePool *corev1beta1.NodePool BeforeEach(func() { os.Setenv("AZURE_VNET_GUID", "test-vnet-guid") - os.Setenv("AZURE_VNET_NAME", "aks-vnet-00000000") - os.Setenv("AZURE_SUBNET_NAME", "test-subnet-name") - + os.Setenv("AZURE_SUBNET_ID", test.DefaultVnetSubnetID) nodeClass = test.AKSNodeClass() nodePool = coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ @@ -142,6 +139,39 @@ var _ = Describe("InstanceType Provider", func() { ExpectCleanedUp(ctx, env.Client) }) + Context("Subnet", func() { + It("should use the AZURE_SUBNET_ID", func() { + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + pod := coretest.UnschedulablePod() + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + nic := azureEnv.NetworkInterfacesAPI.NetworkInterfacesCreateOrUpdateBehavior.CalledWithInput.Pop() + Expect(nic).NotTo(BeNil()) + Expect(lo.FromPtr(nic.Interface.Properties.IPConfigurations[0].Properties.Subnet.ID)).To(Equal("/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub")) + }) + It("should produce all required azure cni labels", func() { + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + pod := coretest.UnschedulablePod() + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + + Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) + vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM + customData := *vm.Properties.OSProfile.CustomData + Expect(customData).ToNot(BeNil()) + decodedBytes, err := base64.StdEncoding.DecodeString(customData) + Expect(err).To(Succeed()) + decodedString := string(decodedBytes[:]) + Expect(decodedString).To(SatisfyAll( + ContainSubstring("kubernetes.azure.com/ebpf-dataplane=cilium"), + ContainSubstring("kubernetes.azure.com/network-name=karpentervnet"), + ContainSubstring("kubernetes.azure.com/network-subnet=karpentersub"), + ContainSubstring("kubernetes.azure.com/network-subscription=12345678-1234-1234-1234-123456789012"), + ContainSubstring("kubernetes.azure.com/nodenetwork-vnetguid=test-vnet-guid"), + ContainSubstring("kubernetes.azure.com/podnetwork-type=overlay"), + )) + }) + }) Context("VM Creation Failures", func() { It("should delete the network interface on failure to create the vm", func() { ErrMsg := "test error" @@ -531,6 +561,26 @@ var _ = Describe("InstanceType Provider", func() { Expect(kubeletFlags).To(ContainSubstring("--image-gc-high-threshold=30")) Expect(kubeletFlags).To(ContainSubstring("--cpu-cfs-quota=true")) }) + It("should not contain the azure cni vnet labels", func() { + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + pod := coretest.UnschedulablePod() + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + + Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) + vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM + customData := *vm.Properties.OSProfile.CustomData + Expect(customData).ToNot(BeNil()) + decodedBytes, err := base64.StdEncoding.DecodeString(customData) + Expect(err).To(Succeed()) + decodedString := string(decodedBytes[:]) + Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/ebpf-dataplane=")) + Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/network-name=")) + Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/network-subnet=")) + Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/network-subscription=")) + Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/nodenetwork-vnetguid=")) + Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/podnetwork-type=")) + }) It("should support provisioning with kubeletConfig, computeResources and maxPods specified", func() { nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ PodsPerCore: lo.ToPtr(int32(110)), @@ -592,31 +642,6 @@ var _ = Describe("InstanceType Provider", func() { }) }) - Context("Provisioner with VNetNodeLabel", func() { - It("should support provisioning with VNet node labels", func() { - ExpectApplied(ctx, env.Client, nodePool, nodeClass) - pod := coretest.UnschedulablePod() - ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) - ExpectScheduled(ctx, env.Client, pod) - - Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) - vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM - customData := *vm.Properties.OSProfile.CustomData - Expect(customData).ToNot(BeNil()) - decodedBytes, err := base64.StdEncoding.DecodeString(customData) - Expect(err).To(Succeed()) - decodedString := string(decodedBytes[:]) - Expect(decodedString).To(SatisfyAll( - ContainSubstring("kubernetes.azure.com/ebpf-dataplane=cilium"), - ContainSubstring("kubernetes.azure.com/network-name=aks-vnet-00000000"), - ContainSubstring("kubernetes.azure.com/network-subnet=test-subnet-name"), - ContainSubstring("kubernetes.azure.com/network-subscription=test-subscription"), - ContainSubstring("kubernetes.azure.com/nodenetwork-vnetguid=test-vnet-guid"), - ContainSubstring("kubernetes.azure.com/podnetwork-type=overlay"), - )) - }) - }) - Context("Unavailable Offerings", func() { It("should not allocate a vm in a zone marked as unavailable", func() { azureEnv.UnavailableOfferingsCache.MarkUnavailable(ctx, "ZonalAllocationFailure", "Standard_D2_v2", fmt.Sprintf("%s-1", fake.Region), corev1beta1.CapacityTypeSpot) diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 2cf3fe0c0..83546b535 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -41,7 +41,11 @@ func init() { corev1beta1.NormalizedLabels = lo.Assign(corev1beta1.NormalizedLabels, map[string]string{"topology.disk.csi.azure.com/zone": corev1.LabelTopologyZone}) } -var resourceGroup = "test-resourceGroup" +var ( + resourceGroup = "test-resourceGroup" + + DefaultVnetSubnetID = "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub" +) type Environment struct { // API @@ -137,10 +141,10 @@ func NewRegionalEnvironment(ctx context.Context, env *coretest.Environment, regi launchTemplateProvider, loadBalancerProvider, unavailableOfferingsCache, - region, // region - resourceGroup, // resourceGroup - "", // subnet - "", // subscriptionID + region, // region + resourceGroup, // resourceGroup + DefaultVnetSubnetID, // subnet + "", // subscriptionID ) return &Environment{ diff --git a/pkg/utils/subnet_parser.go b/pkg/utils/subnet_parser.go new file mode 100644 index 000000000..b43e1871c --- /dev/null +++ b/pkg/utils/subnet_parser.go @@ -0,0 +1,56 @@ +/* +Portions Copyright (c) Microsoft Corporation. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package utils + +import ( + "fmt" + "strings" +) + +// this parsing function replaces three different functions in different packages that all had bugs. Please don't use a regex to parse these +type vnetSubnetResource struct { + SubscriptionID string + ResourceGroupName string + VNetName string + SubnetName string +} + +// GetSubnetResourceID constructs the subnet resource id +func GetSubnetResourceID(subscriptionID, resourceGroupName, virtualNetworkName, subnetName string) string { + // an example subnet resource: /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/virtualNetworks/{virtualNetworkName}/subnets/{subnetName} + return fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/virtualNetworks/%s/subnets/%s", subscriptionID, resourceGroupName, virtualNetworkName, subnetName) +} + +func GetVnetSubnetIDComponents(vnetSubnetID string) (vnetSubnetResource, error) { + parts := strings.Split(vnetSubnetID, "/") + if len(parts) != 11 { + return vnetSubnetResource{}, fmt.Errorf("invalid vnet subnet id: %s", vnetSubnetID) + } + + vs := vnetSubnetResource{ + SubscriptionID: parts[2], + ResourceGroupName: parts[4], + VNetName: parts[8], + SubnetName: parts[10], + } + + //this is a cheap way of ensure all the names match + mirror := GetSubnetResourceID(vs.SubscriptionID, vs.ResourceGroupName, vs.VNetName, vs.SubnetName) + if !strings.EqualFold(mirror, vnetSubnetID) { + return vnetSubnetResource{}, fmt.Errorf("invalid vnet subnet id: %s", vnetSubnetID) + } + return vs, nil +} diff --git a/pkg/utils/subnet_parser_test.go b/pkg/utils/subnet_parser_test.go new file mode 100644 index 000000000..dd83c7b37 --- /dev/null +++ b/pkg/utils/subnet_parser_test.go @@ -0,0 +1,127 @@ +/* +Portions Copyright (c) Microsoft Corporation. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCustomvnet(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "GetVnetSubnetIDComponents") +} + +func Benchmark(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := GetVnetSubnetIDComponents("/subscriptions/00000000-0000-0000-0000-0000000000/resourceGroups/myrg/providers/Microsoft.Network/virtualNetworks/my-vnet/subnets/default1") + if err != nil { + b.Fatal(err) + } + } +} + +var _ = Describe("GetVnetSubnetIDComponents", func() { + It("should return correct subnet id components", func() { + subnetResource, err := GetVnetSubnetIDComponents("/subscriptions/00000000-0000-0000-0000-0000000000/resourceGroups/myrg/providers/Microsoft.Network/virtualNetworks/my-vnet/subnets/default1") + Expect(err).ToNot(HaveOccurred()) + subscriptionID := subnetResource.SubscriptionID + resourceGroupName := subnetResource.ResourceGroupName + vNetName := subnetResource.VNetName + subnetName := subnetResource.SubnetName + + Expect(subscriptionID).To(Equal("00000000-0000-0000-0000-0000000000")) + Expect(resourceGroupName).To(Equal("myrg")) + Expect(vNetName).To(Equal("my-vnet")) + Expect(subnetName).To(Equal("default1")) + }) + It("should return error when unable to parse vnet subnet id", func() { + // "/subscriptions/00000000-0000-0000-0000-0000000000/resourceGroups/myrg/providers/Microsoft.Network/virtualNetworks/my-vnet/subnets/default1" + customVnetSubnetID := "someSubnetID" // invalid format + _, err := GetVnetSubnetIDComponents(customVnetSubnetID) + Expect(err).To(HaveOccurred()) + + // "resourceGr" instead of "resourceGroups" in customVnetSubnetID + customVnetSubnetID = "/subscriptions/00000000-0000-0000-0000-0000000000/resourceGr/myrg/providers/Microsoft.Network/virtualNetworks/my-vnet/subnets/default1" + _, err = GetVnetSubnetIDComponents(customVnetSubnetID) + Expect(err).To(HaveOccurred()) + }) + + It("Is reflexive", func() { + vnetsubnetid := GetSubnetResourceID("sam", "red", "violet", "subaru") + vnet, err := GetVnetSubnetIDComponents(vnetsubnetid) + Expect(err).To(BeNil()) + + Expect(vnet.SubscriptionID).To(Equal("sam")) + Expect(vnet.ResourceGroupName).To(Equal("red")) + Expect(vnet.VNetName).To(Equal("violet")) + Expect(vnet.SubnetName).To(Equal("subaru")) + }) + + It("real world wierdness (subnets is repeated broke old regex)", func() { + vnetsubnetid := "/subscriptions/00000000-0000-0000-0000-0000000000/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/sillygeese-VNET/subnets/subnets/AKSMgmtv2-Subnet" + _, err := GetVnetSubnetIDComponents(vnetsubnetid) + Expect(err).ToNot(BeNil()) + }) + + It("Is case insensitive (subnetparser.GetVnetSubnetIDComponents)", func() { + vnetsubnetid := "/SubscRiptionS/mySubscRiption/ResourceGroupS/myResourceGroup/ProviDerS/MicrOsofT.NetWorK/VirtualNetwOrkS/myVirtualNetwork/SubNetS/mySubnet" + vnet, err := GetVnetSubnetIDComponents(vnetsubnetid) + Expect(err).ToNot(HaveOccurred()) + Expect(vnet.SubscriptionID).To(Equal("mySubscRiption")) + Expect(vnet.ResourceGroupName).To(Equal("myResourceGroup")) + Expect(vnet.VNetName).To(Equal("myVirtualNetwork")) + Expect(vnet.SubnetName).To(Equal("mySubnet")) + }) + + It("Fails when appropriate", func() { + _, err := GetVnetSubnetIDComponents("what/a/bunch/of/junk") + Expect(err).ToNot(BeNil()) + _, err = GetVnetSubnetIDComponents("/subscriptions/sam/resourceGroups/red/providers/Microsoft.Network/virtualNetworks/soclose") + Expect(err).ToNot(BeNil()) + }) + + It("Test GetVNETSubnetIDComponents", func() { + vnetSubnetID := "/subscriptions/SUB_ID/resourceGroups/RG_NAME/providers/Microsoft.Network/virtualNetworks/VNET_NAME/subnets/SUBNET_NAME" + vs, err := GetVnetSubnetIDComponents(vnetSubnetID) + Expect(err).To(BeNil()) + Expect(vs.SubscriptionID).To(Equal("SUB_ID")) + Expect(vs.ResourceGroupName).To(Equal("RG_NAME")) + Expect(vs.VNetName).To(Equal("VNET_NAME")) + Expect(vs.SubnetName).To(Equal("SUBNET_NAME")) + + // case-insensitive match + vnetSubnetID = "/SubscriPtioNS/SUB_ID/REsourceGroupS/RG_NAME/ProViderS/MicrosoFT.NetWorK/VirtualNetWorKS/VNET_NAME/SubneTS/SUBNET_NAME" + vs, err = GetVnetSubnetIDComponents(vnetSubnetID) + Expect(err).To(BeNil()) + Expect(vs.SubscriptionID).To(Equal("SUB_ID")) + Expect(vs.ResourceGroupName).To(Equal("RG_NAME")) + Expect(vs.VNetName).To(Equal("VNET_NAME")) + Expect(vs.SubnetName).To(Equal("SUBNET_NAME")) + + //wtwo bad ones + vnetSubnetID = "/providers/Microsoft.Network/virtualNetworks/VNET_NAME/subnets/SUBNET_NAME" + _, err = GetVnetSubnetIDComponents(vnetSubnetID) + Expect(err).ToNot(BeNil()) + + vnetSubnetID = "badVnetSubnetID" + _, err = GetVnetSubnetIDComponents(vnetSubnetID) + Expect(err).ToNot(BeNil()) + }) +}) diff --git a/skaffold.yaml b/skaffold.yaml index 10c99a523..efde9c36c 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -63,10 +63,6 @@ manifests: value: "Please run make az-all" - name: LEADER_ELECT # disable leader election for better debugging experience value: "false" - - name: AZURE_VNET_NAME - value: "Please run make az-all" - - name: AZURE_SUBNET_NAME - value: "Please run make az-all" # disable HTTP/2 to reduce ARM throttling on large-scale tests; # with this in place write (and read) QPS can be increased too #- name: GODEBUG From 720418e9f1610fef96eb2bcfb49a21944e665d64 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Sat, 30 Mar 2024 15:50:37 -0700 Subject: [PATCH 02/23] Update pkg/utils/subnet_parser_test.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- pkg/utils/subnet_parser_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/utils/subnet_parser_test.go b/pkg/utils/subnet_parser_test.go index dd83c7b37..0fb55ab32 100644 --- a/pkg/utils/subnet_parser_test.go +++ b/pkg/utils/subnet_parser_test.go @@ -74,7 +74,7 @@ var _ = Describe("GetVnetSubnetIDComponents", func() { Expect(vnet.SubnetName).To(Equal("subaru")) }) - It("real world wierdness (subnets is repeated broke old regex)", func() { + It("real world weirdness (subnets is repeated broke old regex)", func() { vnetsubnetid := "/subscriptions/00000000-0000-0000-0000-0000000000/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/sillygeese-VNET/subnets/subnets/AKSMgmtv2-Subnet" _, err := GetVnetSubnetIDComponents(vnetsubnetid) Expect(err).ToNot(BeNil()) From c024e58e9b29165f8942101bb626a9c4f77bf1d3 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Sat, 30 Mar 2024 21:00:24 -0700 Subject: [PATCH 03/23] fix: adding back rebased away makefile changes --- Makefile-az.mk | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 1c11c68b9..58bdfe868 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -16,6 +16,9 @@ AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload + +az-all-custom-vnet: az-login az-create-workload-msi az-mkaks-custom-vnet az-create-federated-cred az-perm az-perm-subnet az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload + az-all-savm: az-login az-mkaks-savm az-perm-savm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines az-login: ## Login into Azure @@ -44,6 +47,20 @@ az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane c az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter +az-mkvnet: + az group create --name $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) + az network vnet create --name $(AZURE_CLUSTER_NAME)-vnet --resource-group $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) --address-prefixes "10.1.0.0/16" + +az-mksubnet: + az network vnet subnet create --name $(AZURE_CLUSTER_NAME)-subnet --resource-group $(AZURE_RESOURCE_GROUP)-vnet --vnet-name $(AZURE_CLUSTER_NAME)-vnet --address-prefixes "10.1.0.0/24" + +az-mkaks-custom-vnet: az-mkacr ## Create test AKS cluster with custom VNET + az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \ + --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \ + --enable-oidc-issuer --enable-workload-identity --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)-vnet/providers/Microsoft.Network/virtualNetworks/$(AZURE_CLUSTER_NAME)-vnet/subnets/$(AZURE_CLUSTER_NAME)-subnet" + az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing + skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter + az-create-workload-msi: az-mkrg # create the workload MSI that is the backing for the karpenter pod auth az identity create --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --resource-group "${AZURE_RESOURCE_GROUP}" --location "${AZURE_LOCATION}" @@ -95,7 +112,7 @@ az-patch-skaffold-azureoverlay: az-patch-skaffold $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "azure"' skaffold.yaml - + # old identity path is still the default, so need to override the values values with new logic. # TODO (chmcbrid): update the new logic path as the default. $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'clientId' -otsv)) @@ -122,8 +139,16 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Network Contributor" az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP_MC) --role "Managed Identity Operator" az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here + @echo Consider "make az-patch-skaffold"! +az-perm-subnet: + # give Network Contributor permission to the subnet rg for the AKS cluster + $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) + $(eval SUBNET_RESOURCE_GROUP=$(shell az network vnet subnet show --id $(AZURE_SUBNET_ID) | jq -r ".resourceGroup")) + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(SUBNET_RESOURCE_GROUP) --role "Network Contributor" + az-perm-savm: ## Create role assignments to let Karpenter manage VMs and Network # Note: savm has not been converted over to use a workload identity $(eval AZURE_OBJECT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.objectId")) From 1cdaa77629fc06d2caa92239855af9d921465457 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Sun, 31 Mar 2024 19:24:15 -0700 Subject: [PATCH 04/23] fix: resolving configuration for SubnetID from options, and removing vnetGUID os.Getenv() values --- pkg/auth/config.go | 6 --- pkg/auth/config_test.go | 3 -- pkg/operator/operator.go | 43 +++++++++++++++++-- pkg/operator/options/options.go | 5 ++- .../imagefamily/bootstrap/aksbootstrap.go | 7 ++- pkg/providers/imagefamily/resolver.go | 22 +++++----- pkg/providers/instance/azure_client.go | 31 ------------- pkg/providers/instancetype/suite_test.go | 23 ---------- pkg/test/environment.go | 2 +- pkg/test/options.go | 2 +- pkg/utils/subnet_parser.go | 3 +- 11 files changed, 62 insertions(+), 85 deletions(-) diff --git a/pkg/auth/config.go b/pkg/auth/config.go index b9eda69fb..83c927663 100644 --- a/pkg/auth/config.go +++ b/pkg/auth/config.go @@ -80,8 +80,6 @@ type Config struct { ClusterName string `json:"clusterName" yaml:"clusterName"` //Config only for AKS NodeResourceGroup string `json:"nodeResourceGroup" yaml:"nodeResourceGroup"` - //SubnetId is the resource ID of the subnet that VM network interfaces should use - SubnetID string `json:"subnetId" yaml:"subnetId"` } func (cfg *Config) PrepareConfig() error { @@ -106,8 +104,6 @@ func (cfg *Config) BaseVars() { cfg.AADClientCertPassword = os.Getenv("ARM_CLIENT_CERT_PASSWORD") cfg.ClusterName = os.Getenv("AZURE_CLUSTER_NAME") cfg.NodeResourceGroup = os.Getenv("AZURE_NODE_RESOURCE_GROUP") - cfg.SubnetID = os.Getenv("AZURE_SUBNET_ID") - // cfg.VnetGuid = os.Getenv("AZURE_VNET_GUID") // This field needs to be resolved inside of karpenter, so we will get it in the azClient initialization } func (cfg *Config) prepareID() error { @@ -182,7 +178,6 @@ func (cfg *Config) TrimSpace() { cfg.AADClientCertPassword = strings.TrimSpace(cfg.AADClientCertPassword) cfg.ClusterName = strings.TrimSpace(cfg.ClusterName) cfg.NodeResourceGroup = strings.TrimSpace(cfg.NodeResourceGroup) - cfg.SubnetID = strings.TrimSpace(cfg.SubnetID) } func (cfg *Config) validate() error { @@ -193,7 +188,6 @@ func (cfg *Config) validate() error { {cfg.VMType, "VM type"}, // Even though the config doesnt use some of these, // its good to validate they were set in the environment - {cfg.SubnetID, "subnet ID"}, } for _, field := range fields { diff --git a/pkg/auth/config_test.go b/pkg/auth/config_test.go index 12384adfc..01231bd1b 100644 --- a/pkg/auth/config_test.go +++ b/pkg/auth/config_test.go @@ -40,7 +40,6 @@ func TestBuildAzureConfig(t *testing.T) { SubscriptionID: "12345", ResourceGroup: "my-rg", NodeResourceGroup: "my-node-rg", - SubnetID: "12345", VMType: "vmss", }, wantErr: false, @@ -59,7 +58,6 @@ func TestBuildAzureConfig(t *testing.T) { SubscriptionID: "12345", ResourceGroup: "my-rg", NodeResourceGroup: "my-node-rg", - SubnetID: "12345", VMType: "vm", }, wantErr: false, @@ -93,7 +91,6 @@ func TestBuildAzureConfig(t *testing.T) { SubscriptionID: "12345", ResourceGroup: "my-rg", NodeResourceGroup: "my-node-rg", - SubnetID: "12345", VMType: "vmss", UseManagedIdentityExtension: true, UserAssignedIdentityID: "12345", diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 914ec91d1..874b5ef92 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -30,6 +30,8 @@ import ( corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/operator/scheme" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork" + "github.com/Azure/karpenter-provider-azure/pkg/apis" "github.com/Azure/karpenter-provider-azure/pkg/auth" azurecache "github.com/Azure/karpenter-provider-azure/pkg/cache" @@ -40,6 +42,8 @@ import ( "github.com/Azure/karpenter-provider-azure/pkg/providers/launchtemplate" "github.com/Azure/karpenter-provider-azure/pkg/providers/loadbalancer" "github.com/Azure/karpenter-provider-azure/pkg/providers/pricing" + "github.com/Azure/karpenter-provider-azure/pkg/utils" + armopts "github.com/Azure/karpenter-provider-azure/pkg/utils/opts" "sigs.k8s.io/karpenter/pkg/operator" ) @@ -64,11 +68,14 @@ type Operator struct { func NewOperator(ctx context.Context, operator *operator.Operator) (context.Context, *Operator) { azConfig, err := GetAZConfig() - lo.Must0(err, "creating Azure config") // TODO: I assume we prefer this over the cleaner azConfig := lo.Must(GetAzConfig()), as this has a helpful error message? + lo.Must0(err, "creating Azure config") // NOTE: we prefer this over the cleaner azConfig := lo.Must(GetAzConfig()), as when initializing the client there are helpful error messages in initializing clients and the azure config azClient, err := instance.CreateAZClient(ctx, azConfig) lo.Must0(err, "creating Azure client") + vnetGUID, err := getVNETGUID(azConfig, options.FromContext(ctx).SubnetID) + lo.Must0(err, "getting VNET GUID") + unavailableOfferingsCache := azurecache.NewUnavailableOfferings() pricingProvider := pricing.NewProvider( ctx, @@ -83,7 +90,12 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont azClient.ImageVersionsClient, azConfig.Location, ) - imageResolver := imagefamily.New(operator.GetClient(), imageProvider) + imageResolver := imagefamily.New( + operator.GetClient(), + imageProvider, + options.FromContext(ctx).SubnetID, + vnetGUID, + ) launchTemplateProvider := launchtemplate.NewProvider( ctx, imageResolver, @@ -116,7 +128,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont unavailableOfferingsCache, azConfig.Location, azConfig.NodeResourceGroup, - azConfig.SubnetID, + options.FromContext(ctx).SubnetID, azConfig.SubscriptionID, ) @@ -156,3 +168,28 @@ func getCABundle(restConfig *rest.Config) (*string, error) { } return ptr.String(base64.StdEncoding.EncodeToString(transportConfig.TLS.CAData)), nil } + +func getVNETGUID(cfg *auth.Config, subnetID string) (string, error) { + creds, err := auth.NewCredential(cfg) + if err != nil { + return "", err + } + opts := armopts.DefaultArmOpts() + vnetClient, err := armnetwork.NewVirtualNetworksClient(cfg.SubscriptionID, creds, opts) + if err != nil { + return "", err + } + + subnetParts, err := utils.GetVnetSubnetIDComponents(subnetID) + if err != nil { + return "", err + } + vnet, err := vnetClient.Get(context.Background(), subnetParts.ResourceGroupName, subnetParts.VNetName, nil) + if err != nil { + return "", err + } + if vnet.Properties == nil || vnet.Properties.ResourceGUID == nil { + return "", fmt.Errorf("vnet %s does not have a resource GUID", subnetParts.VNetName) + } + return *vnet.Properties.ResourceGUID, nil +} diff --git a/pkg/operator/options/options.go b/pkg/operator/options/options.go index 9875d7189..e70787e8b 100644 --- a/pkg/operator/options/options.go +++ b/pkg/operator/options/options.go @@ -63,11 +63,13 @@ type Options struct { VMMemoryOverheadPercent float64 ClusterID string KubeletClientTLSBootstrapToken string // => TLSBootstrapToken in bootstrap (may need to be per node/nodepool) - SSHPublicKey string // ssh.publicKeys.keyData => VM SSH public key // TODO: move to node template? + SSHPublicKey string // ssh.publicKeys.keyData => VM SSH public key // TODO: move to v1alpha2.AKSNodeClass? NetworkPlugin string // => NetworkPlugin in bootstrap NetworkPolicy string // => NetworkPolicy in bootstrap NodeIdentities []string // => Applied onto each VM + SubnetID string // => VnetSubnetID set on the systempool that we will fall back to if not specified in the nodeclass + setFlags map[string]bool } @@ -79,6 +81,7 @@ func (o *Options) AddFlags(fs *coreoptions.FlagSet) { fs.StringVar(&o.SSHPublicKey, "ssh-public-key", env.WithDefaultString("SSH_PUBLIC_KEY", ""), "[REQUIRED] VM SSH public key.") fs.StringVar(&o.NetworkPlugin, "network-plugin", env.WithDefaultString("NETWORK_PLUGIN", "azure"), "The network plugin used by the cluster.") fs.StringVar(&o.NetworkPolicy, "network-policy", env.WithDefaultString("NETWORK_POLICY", ""), "The network policy used by the cluster.") + fs.StringVar(&o.SubnetID, "default-subnet-id", env.WithDefaultString("AZURE_SUBNET_ID", ""), "The default subnet ID to use for new nodes.") fs.Var(newNodeIdentitiesValue(env.WithDefaultString("NODE_IDENTITIES", ""), &o.NodeIdentities), "node-identities", "User assigned identities for nodes.") } diff --git a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go index 9de7a02b0..b87892420 100644 --- a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go +++ b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go @@ -389,8 +389,8 @@ var ( ) const ( - vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" - globalAKSMirror = "https://acs-mirror.azureedge.net" + vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" + globalAKSMirror = "https://acs-mirror.azureedge.net" ) func (a AKS) aksBootstrapScript() (string, error) { @@ -455,8 +455,7 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { kubeletLabels := lo.Assign(kubeletNodeLabelsBase, a.Labels) getAgentbakerGeneratedLabels(a.ResourceGroup, kubeletLabels) - - nbv.Subnet = a.Labels[vnetSubnetNameLabel] + nbv.Subnet = a.Labels[vnetSubnetNameLabel] nbv.KubeletNodeLabels = strings.Join(lo.MapToSlice(kubeletLabels, func(k, v string) string { return fmt.Sprintf("%s=%s", k, v) diff --git a/pkg/providers/imagefamily/resolver.go b/pkg/providers/imagefamily/resolver.go index 6b6766b8f..b8f87e57d 100644 --- a/pkg/providers/imagefamily/resolver.go +++ b/pkg/providers/imagefamily/resolver.go @@ -18,7 +18,6 @@ package imagefamily import ( "context" - "os" core "k8s.io/api/core/v1" "knative.dev/pkg/logging" @@ -62,6 +61,8 @@ const ( // Resolver is able to fill-in dynamic launch template parameters type Resolver struct { imageProvider *Provider + vnetGUID string + vnetSubnetID string } // ImageFamily can be implemented to override the default logic for generating dynamic launch template parameters @@ -81,9 +82,11 @@ type ImageFamily interface { } // New constructs a new launch template Resolver -func New(_ client.Client, imageProvider *Provider) *Resolver { +func New(_ client.Client, imageProvider *Provider, vnetSubnetID, vnetGUID string) *Resolver { return &Resolver{ imageProvider: imageProvider, + vnetSubnetID: vnetSubnetID, + vnetGUID: vnetGUID, } } @@ -109,10 +112,8 @@ func (r Resolver) Resolve(ctx context.Context, nodeClass *v1alpha2.AKSNodeClass, instancetype.MemoryAvailable: instanceType.Overhead.EvictionThreshold.Memory().String()} kubeletConfig.MaxPods = lo.ToPtr(getMaxPods(staticParameters.NetworkPlugin)) - if staticParameters.NetworkPlugin == networkPluginAzure { - for k, v := range getAzureCNILabels(nodeClass) { - staticParameters.Labels[k] = v - } + for k, v := range r.getAzureCNILabels(nodeClass) { + staticParameters.Labels[k] = v } logging.FromContext(ctx).Infof("Resolved image %s for instance type %s", imageID, instanceType.Name) @@ -151,17 +152,16 @@ func getMaxPods(networkPlugin string) int32 { return defaultKubernetesMaxPods } -// getVnetLabelValues returns the labels for AzureCNI for the vnet and subnet. This function assumes we assert in the auth config that AZURE_VNET_GUID and AZURE_SUBNET_ID are set. -// See how split logic works here: https://go.dev/play/p/l3l7Zrg_pdd. -func getAzureCNILabels(_ *v1alpha2.AKSNodeClass) map[string]string { +// getAzureCNILabels returns the labels for Azure CNI overlay +func (r *Resolver) getAzureCNILabels(_ *v1alpha2.AKSNodeClass) map[string]string { // TODO(bsoghigian): this should be refactored to lo.Ternary(nodeClass.Spec.VnetSubnetID != nil, lo.FromPtr(nodeClass.Spec.VnetSubnetID), os.Getenv("AZURE_SUBNET_ID")) when we add VnetSubnetID to the nodeclass - vnetSubnetComponents, _ := utils.GetVnetSubnetIDComponents(os.Getenv("AZURE_SUBNET_ID")) + vnetSubnetComponents, _ := utils.GetVnetSubnetIDComponents(r.vnetSubnetID) vnetLabels := map[string]string{ vnetDataPlaneLabel: networkPolicyCilium, vnetNetworkNameLabel: vnetSubnetComponents.VNetName, vnetSubnetNameLabel: vnetSubnetComponents.SubnetName, vnetSubscriptionIDLabel: vnetSubnetComponents.SubscriptionID, - vnetGUIDLabel: os.Getenv("AZURE_VNET_GUID"), + vnetGUIDLabel: r.vnetGUID, vnetPodNetworkTypeLabel: overlayNetworkType, } return vnetLabels diff --git a/pkg/providers/instance/azure_client.go b/pkg/providers/instance/azure_client.go index 52486ba55..025e02caa 100644 --- a/pkg/providers/instance/azure_client.go +++ b/pkg/providers/instance/azure_client.go @@ -18,12 +18,6 @@ package instance import ( "context" - "fmt" - "os" - - // nolint SA1019 - deprecated package - - "github.com/samber/lo" "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute" @@ -35,7 +29,6 @@ import ( "github.com/Azure/karpenter-provider-azure/pkg/providers/imagefamily" "github.com/Azure/karpenter-provider-azure/pkg/providers/instance/skuclient" "github.com/Azure/karpenter-provider-azure/pkg/providers/loadbalancer" - "github.com/Azure/karpenter-provider-azure/pkg/utils" armopts "github.com/Azure/karpenter-provider-azure/pkg/utils/opts" klog "k8s.io/klog/v2" @@ -114,22 +107,6 @@ func CreateAZClient(ctx context.Context, cfg *auth.Config) (*AZClient, error) { return azClient, nil } -func handleVNET(cfg *auth.Config, vnetClient *armnetwork.VirtualNetworksClient) error { - subnetParts, err := utils.GetVnetSubnetIDComponents(cfg.SubnetID) - if err != nil { - return err - } - vnet, err := vnetClient.Get(context.Background(), subnetParts.ResourceGroupName, subnetParts.VNetName, nil) - if err != nil { - return err - } - if vnet.Properties == nil || vnet.Properties.ResourceGUID == nil { - return fmt.Errorf("vnet %s does not have a resource GUID", subnetParts.VNetName) - } - os.Setenv("AZURE_VNET_GUID", lo.FromPtr(vnet.Properties.ResourceGUID)) - return nil -} - func NewAZClient(ctx context.Context, cfg *auth.Config, env *azure.Environment) (*AZClient, error) { cred, err := auth.NewCredential(cfg) if err != nil { @@ -148,14 +125,6 @@ func NewAZClient(ctx context.Context, cfg *auth.Config, env *azure.Environment) } klog.V(5).Infof("Created network interface client %v using token credential", interfacesClient) - vnetClient, err := armnetwork.NewVirtualNetworksClient(cfg.SubscriptionID, cred, opts) - if err != nil { - return nil, err - } - err = handleVNET(cfg, vnetClient) - if err != nil { - return nil, err - } virtualMachinesClient, err := armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, cred, opts) if err != nil { return nil, err diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index ed9d8ce20..aeca3744e 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -23,7 +23,6 @@ import ( "fmt" "io" "net/http" - "os" "strings" "testing" "time" @@ -114,8 +113,6 @@ var _ = Describe("InstanceType Provider", func() { var nodePool *corev1beta1.NodePool BeforeEach(func() { - os.Setenv("AZURE_VNET_GUID", "test-vnet-guid") - os.Setenv("AZURE_SUBNET_ID", test.DefaultVnetSubnetID) nodeClass = test.AKSNodeClass() nodePool = coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ @@ -561,26 +558,6 @@ var _ = Describe("InstanceType Provider", func() { Expect(kubeletFlags).To(ContainSubstring("--image-gc-high-threshold=30")) Expect(kubeletFlags).To(ContainSubstring("--cpu-cfs-quota=true")) }) - It("should not contain the azure cni vnet labels", func() { - ExpectApplied(ctx, env.Client, nodePool, nodeClass) - pod := coretest.UnschedulablePod() - ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) - ExpectScheduled(ctx, env.Client, pod) - - Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) - vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM - customData := *vm.Properties.OSProfile.CustomData - Expect(customData).ToNot(BeNil()) - decodedBytes, err := base64.StdEncoding.DecodeString(customData) - Expect(err).To(Succeed()) - decodedString := string(decodedBytes[:]) - Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/ebpf-dataplane=")) - Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/network-name=")) - Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/network-subnet=")) - Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/network-subscription=")) - Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/nodenetwork-vnetguid=")) - Expect(decodedString).ToNot(ContainSubstring("kubernetes.azure.com/podnetwork-type=")) - }) It("should support provisioning with kubeletConfig, computeResources and maxPods specified", func() { nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ PodsPerCore: lo.ToPtr(int32(110)), diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 83546b535..4c5b61f11 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -107,7 +107,7 @@ func NewRegionalEnvironment(ctx context.Context, env *coretest.Environment, regi // Providers pricingProvider := pricing.NewProvider(ctx, pricingAPI, region, make(chan struct{})) imageFamilyProvider := imagefamily.NewProvider(env.KubernetesInterface, kubernetesVersionCache, communityImageVersionsAPI, region) - imageFamilyResolver := imagefamily.New(env.Client, imageFamilyProvider) + imageFamilyResolver := imagefamily.New(env.Client, imageFamilyProvider, DefaultVnetSubnetID, "test-vnet-guid") instanceTypesProvider := instancetype.NewProvider(region, instanceTypeCache, skuClientSingleton, pricingProvider, unavailableOfferingsCache) launchTemplateProvider := launchtemplate.NewProvider( ctx, diff --git a/pkg/test/options.go b/pkg/test/options.go index 0fcb6a251..d7cc75f18 100644 --- a/pkg/test/options.go +++ b/pkg/test/options.go @@ -51,7 +51,7 @@ func Options(overrides ...OptionsFields) *azoptions.Options { KubeletClientTLSBootstrapToken: lo.FromPtrOr(options.KubeletClientTLSBootstrapToken, "test-token"), SSHPublicKey: lo.FromPtrOr(options.SSHPublicKey, "test-ssh-public-key"), NetworkPlugin: lo.FromPtrOr(options.NetworkPlugin, "azure"), - NetworkPolicy: lo.FromPtrOr(options.NetworkPolicy, ""), + NetworkPolicy: lo.FromPtrOr(options.NetworkPolicy, "cilium"), VMMemoryOverheadPercent: lo.FromPtrOr(options.VMMemoryOverheadPercent, 0.075), NodeIdentities: options.NodeIdentities, } diff --git a/pkg/utils/subnet_parser.go b/pkg/utils/subnet_parser.go index b43e1871c..73c4ea821 100644 --- a/pkg/utils/subnet_parser.go +++ b/pkg/utils/subnet_parser.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package utils import ( From 7e9eb936bae78d947e5f39383a2cafcb122147cf Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Sun, 31 Mar 2024 19:37:14 -0700 Subject: [PATCH 05/23] chore: removing the comment --- Makefile-az.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 58bdfe868..24cb37e45 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -254,7 +254,7 @@ az-rmnodeclaims: ## kubectl delete all nodeclaims; don't wait for finalizers (us az-taintsystemnodes: ## Taint all system nodepool nodes kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --selector='kubernetes.azure.com/mode=system' --overwrite -az-taintnodes: ## Run e2etests +az-taintnodes: kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all --overwrite az-e2etests: ## Run e2etests From 178453e658011f92b1743b2c6c4f195283d07ea4 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Mon, 1 Apr 2024 17:51:06 -0700 Subject: [PATCH 06/23] Update pkg/operator/options/options.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- pkg/operator/options/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/operator/options/options.go b/pkg/operator/options/options.go index e70787e8b..a11180cd6 100644 --- a/pkg/operator/options/options.go +++ b/pkg/operator/options/options.go @@ -68,7 +68,7 @@ type Options struct { NetworkPolicy string // => NetworkPolicy in bootstrap NodeIdentities []string // => Applied onto each VM - SubnetID string // => VnetSubnetID set on the systempool that we will fall back to if not specified in the nodeclass + SubnetID string // => VnetSubnetID to use (for nodes in Azure CNI Overlay and Azure CNI + pod subnet; for for nodes and pods in Azure CNI), unless overriden via AKSNodeClass setFlags map[string]bool } From e9571662aabe1db47b202f1c301b37c128497106 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Mon, 1 Apr 2024 18:53:11 -0700 Subject: [PATCH 07/23] refactor: options validation for subnet id and moving everything to launch template from resolver --- Makefile-az.mk | 20 ++++---- pkg/operator/operator.go | 3 +- pkg/operator/options/options.go | 2 +- pkg/operator/options/options_validation.go | 10 ++++ pkg/operator/options/suite_test.go | 2 + pkg/providers/imagefamily/azlinux.go | 1 + .../imagefamily/bootstrap/aksbootstrap.go | 10 ++-- .../imagefamily/bootstrap/bootstrap.go | 1 + pkg/providers/imagefamily/resolver.go | 39 +-------------- pkg/providers/imagefamily/ubuntu_2204.go | 1 + pkg/providers/instancetype/suite_test.go | 2 +- .../launchtemplate/launchtemplate.go | 50 +++++++++++++++++-- .../launchtemplate/parameters/types.go | 3 ++ pkg/test/environment.go | 3 +- pkg/test/options.go | 2 + skaffold.yaml | 2 +- 16 files changed, 89 insertions(+), 62 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 24cb37e45..8fb447f5c 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -52,12 +52,12 @@ az-mkvnet: az network vnet create --name $(AZURE_CLUSTER_NAME)-vnet --resource-group $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) --address-prefixes "10.1.0.0/16" az-mksubnet: - az network vnet subnet create --name $(AZURE_CLUSTER_NAME)-subnet --resource-group $(AZURE_RESOURCE_GROUP)-vnet --vnet-name $(AZURE_CLUSTER_NAME)-vnet --address-prefixes "10.1.0.0/24" + az network vnet subnet create --name nodesubnet --resource-group $(AZURE_RESOURCE_GROUP)-vnet --vnet-name $(AZURE_CLUSTER_NAME)-vnet --address-prefixes "10.1.0.0/24" az-mkaks-custom-vnet: az-mkacr ## Create test AKS cluster with custom VNET az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \ --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \ - --enable-oidc-issuer --enable-workload-identity --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)-vnet/providers/Microsoft.Network/virtualNetworks/$(AZURE_CLUSTER_NAME)-vnet/subnets/$(AZURE_CLUSTER_NAME)-subnet" + --enable-oidc-issuer --enable-workload-identity --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)-vnet/providers/Microsoft.Network/virtualNetworks/$(AZURE_CLUSTER_NAME)-vnet/subnets/nodesubnet" az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter @@ -100,17 +100,17 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="SSH_PUBLIC_KEY")).value = "$(SSH_PUBLIC_KEY)"' skaffold.yaml az-patch-skaffold-kubenet: az-patch-skaffold - $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml + $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")) .value = "$(VNET_SUBNET_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "kubenet"' skaffold.yaml az-patch-skaffold-azure: az-patch-skaffold - $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml + $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")) .value = "$(VNET_SUBNET_ID)"' skaffold.yaml az-patch-skaffold-azureoverlay: az-patch-skaffold - $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="AZURE_SUBNET_ID")) .value = "$(AZURE_SUBNET_ID)"' skaffold.yaml + $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")) .value = "$(VNET_SUBNET_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "azure"' skaffold.yaml # old identity path is still the default, so need to override the values values with new logic. @@ -144,8 +144,8 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network az-perm-subnet: # give Network Contributor permission to the subnet rg for the AKS cluster - $(eval AZURE_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - $(eval SUBNET_RESOURCE_GROUP=$(shell az network vnet subnet show --id $(AZURE_SUBNET_ID) | jq -r ".resourceGroup")) + $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) + $(eval SUBNET_RESOURCE_GROUP=$(shell az network vnet subnet show --id $(VNET_SUBNET_ID) | jq -r ".resourceGroup")) $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(SUBNET_RESOURCE_GROUP) --role "Network Contributor" diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 874b5ef92..f51e14b34 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -93,8 +93,6 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont imageResolver := imagefamily.New( operator.GetClient(), imageProvider, - options.FromContext(ctx).SubnetID, - vnetGUID, ) launchTemplateProvider := launchtemplate.NewProvider( ctx, @@ -107,6 +105,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont azConfig.UserAssignedIdentityID, azConfig.NodeResourceGroup, azConfig.Location, + vnetGUID, ) instanceTypeProvider := instancetype.NewProvider( azConfig.Location, diff --git a/pkg/operator/options/options.go b/pkg/operator/options/options.go index e70787e8b..f6d2eb6ac 100644 --- a/pkg/operator/options/options.go +++ b/pkg/operator/options/options.go @@ -81,7 +81,7 @@ func (o *Options) AddFlags(fs *coreoptions.FlagSet) { fs.StringVar(&o.SSHPublicKey, "ssh-public-key", env.WithDefaultString("SSH_PUBLIC_KEY", ""), "[REQUIRED] VM SSH public key.") fs.StringVar(&o.NetworkPlugin, "network-plugin", env.WithDefaultString("NETWORK_PLUGIN", "azure"), "The network plugin used by the cluster.") fs.StringVar(&o.NetworkPolicy, "network-policy", env.WithDefaultString("NETWORK_POLICY", ""), "The network policy used by the cluster.") - fs.StringVar(&o.SubnetID, "default-subnet-id", env.WithDefaultString("AZURE_SUBNET_ID", ""), "The default subnet ID to use for new nodes.") + fs.StringVar(&o.SubnetID, "default-vnet-subnet-id", env.WithDefaultString("VNET_SUBNET_ID", ""), "The default subnet ID to use for new nodes. This must be a valid subnet arm id that does not overlap with the service cidr or the pod cidr") fs.Var(newNodeIdentitiesValue(env.WithDefaultString("NODE_IDENTITIES", ""), &o.NodeIdentities), "node-identities", "User assigned identities for nodes.") } diff --git a/pkg/operator/options/options_validation.go b/pkg/operator/options/options_validation.go index 7b202d080..09fad242f 100644 --- a/pkg/operator/options/options_validation.go +++ b/pkg/operator/options/options_validation.go @@ -20,6 +20,7 @@ import ( "fmt" "net/url" + "github.com/Azure/karpenter-provider-azure/pkg/utils" "github.com/go-playground/validator/v10" "go.uber.org/multierr" ) @@ -30,10 +31,19 @@ func (o Options) Validate() error { o.validateRequiredFields(), o.validateEndpoint(), o.validateVMMemoryOverheadPercent(), + o.validateVnetSubnetID(), validate.Struct(o), ) } +func (o Options) validateVnetSubnetID() error { + _, err := utils.GetVnetSubnetIDComponents(o.SubnetID) + if err != nil { + return fmt.Errorf("default-vnet-subnet-id is invalid: %w", err) + } + return nil +} + func (o Options) validateEndpoint() error { if o.ClusterEndpoint == "" { return nil diff --git a/pkg/operator/options/suite_test.go b/pkg/operator/options/suite_test.go index f5d7ecfd9..45a1cd1cb 100644 --- a/pkg/operator/options/suite_test.go +++ b/pkg/operator/options/suite_test.go @@ -92,6 +92,7 @@ var _ = Describe("Options", func() { os.Setenv("NETWORK_PLUGIN", "env-network-plugin") os.Setenv("NETWORK_POLICY", "env-network-policy") os.Setenv("NODE_IDENTITIES", "/subscriptions/1234/resourceGroups/mcrg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/envid1,/subscriptions/1234/resourceGroups/mcrg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/envid2") + os.Setenv("VNET_SUBNET_ID", "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub") fs = &coreoptions.FlagSet{ FlagSet: flag.NewFlagSet("karpenter", flag.ContinueOnError), } @@ -107,6 +108,7 @@ var _ = Describe("Options", func() { SSHPublicKey: lo.ToPtr("env-ssh-public-key"), NetworkPlugin: lo.ToPtr("env-network-plugin"), NetworkPolicy: lo.ToPtr("env-network-policy"), + SubnetID: "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub", NodeIdentities: []string{"/subscriptions/1234/resourceGroups/mcrg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/envid1", "/subscriptions/1234/resourceGroups/mcrg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/envid2"}, })) }) diff --git a/pkg/providers/imagefamily/azlinux.go b/pkg/providers/imagefamily/azlinux.go index 5640becf4..c3f6702b7 100644 --- a/pkg/providers/imagefamily/azlinux.go +++ b/pkg/providers/imagefamily/azlinux.go @@ -86,6 +86,7 @@ func (u AzureLinux) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, ta GPUDriverVersion: u.Options.GPUDriverVersion, // GPUImageSHA: u.Options.GPUImageSHA, image sha only applies to ubuntu // SEE: https://github.com/Azure/AgentBaker/blob/f393d6e4d689d9204d6000c85623ad9b764e2a29/vhdbuilder/packer/install-dependencies.sh#L201 + SubnetID: u.Options.SubnetID, }, Arch: u.Options.Arch, TenantID: u.Options.TenantID, diff --git a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go index b87892420..807428f6e 100644 --- a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go +++ b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go @@ -24,6 +24,7 @@ import ( "strings" "text/template" + "github.com/Azure/karpenter-provider-azure/pkg/utils" "github.com/samber/lo" v1 "k8s.io/api/core/v1" "knative.dev/pkg/ptr" @@ -389,8 +390,7 @@ var ( ) const ( - vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" - globalAKSMirror = "https://acs-mirror.azureedge.net" + globalAKSMirror = "https://acs-mirror.azureedge.net" ) func (a AKS) aksBootstrapScript() (string, error) { @@ -441,7 +441,6 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { // calculated values nbv.EnsureNoDupePromiscuousBridge = nbv.NeedsContainerd && nbv.NetworkPlugin == "kubenet" && nbv.NetworkPolicy != "calico" nbv.NetworkSecurityGroup = fmt.Sprintf("aks-agentpool-%s-nsg", a.ClusterID) - nbv.VirtualNetwork = fmt.Sprintf("aks-vnet-%s", a.ClusterID) nbv.RouteTable = fmt.Sprintf("aks-agentpool-%s-routetable", a.ClusterID) if a.GPUNode { @@ -455,7 +454,10 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { kubeletLabels := lo.Assign(kubeletNodeLabelsBase, a.Labels) getAgentbakerGeneratedLabels(a.ResourceGroup, kubeletLabels) - nbv.Subnet = a.Labels[vnetSubnetNameLabel] + subnetParts, _ := utils.GetVnetSubnetIDComponents(a.SubnetID) + nbv.Subnet = subnetParts.SubnetName + nbv.VirtualNetworkResourceGroup = subnetParts.ResourceGroupName + nbv.VirtualNetwork = subnetParts.VNetName nbv.KubeletNodeLabels = strings.Join(lo.MapToSlice(kubeletLabels, func(k, v string) string { return fmt.Sprintf("%s=%s", k, v) diff --git a/pkg/providers/imagefamily/bootstrap/bootstrap.go b/pkg/providers/imagefamily/bootstrap/bootstrap.go index 8505d1b41..2bf1fab8d 100644 --- a/pkg/providers/imagefamily/bootstrap/bootstrap.go +++ b/pkg/providers/imagefamily/bootstrap/bootstrap.go @@ -32,6 +32,7 @@ type Options struct { GPUNode bool GPUDriverVersion string GPUImageSHA string + SubnetID string } // Bootstrapper can be implemented to generate a bootstrap script diff --git a/pkg/providers/imagefamily/resolver.go b/pkg/providers/imagefamily/resolver.go index b8f87e57d..b318c7d50 100644 --- a/pkg/providers/imagefamily/resolver.go +++ b/pkg/providers/imagefamily/resolver.go @@ -28,7 +28,6 @@ import ( "github.com/Azure/karpenter-provider-azure/pkg/providers/imagefamily/bootstrap" "github.com/Azure/karpenter-provider-azure/pkg/providers/instancetype" template "github.com/Azure/karpenter-provider-azure/pkg/providers/launchtemplate/parameters" - "github.com/Azure/karpenter-provider-azure/pkg/utils" "github.com/samber/lo" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -38,31 +37,17 @@ const ( networkPluginAzure = "azure" networkPluginKubenet = "kubenet" - networkPolicyCilium = "cilium" - // defaultKubernetesMaxPodsAzure is the maximum number of pods to run on a node for Azure CNI Overlay. defaultKubernetesMaxPodsAzure = 250 // defaultKubernetesMaxPodsKubenet is the maximum number of pods to run on a node for Kubenet. defaultKubernetesMaxPodsKubenet = 100 // defaultKubernetesMaxPods is the maximum number of pods on a node. defaultKubernetesMaxPods = 110 - - // AzureCNI VNET Labels - vnetDataPlaneLabel = "kubernetes.azure.com/ebpf-dataplane" - vnetNetworkNameLabel = "kubernetes.azure.com/network-name" - vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" - vnetSubscriptionIDLabel = "kubernetes.azure.com/network-subscription" - vnetGUIDLabel = "kubernetes.azure.com/nodenetwork-vnetguid" - vnetPodNetworkTypeLabel = "kubernetes.azure.com/podnetwork-type" - - overlayNetworkType = "overlay" ) // Resolver is able to fill-in dynamic launch template parameters type Resolver struct { imageProvider *Provider - vnetGUID string - vnetSubnetID string } // ImageFamily can be implemented to override the default logic for generating dynamic launch template parameters @@ -82,11 +67,9 @@ type ImageFamily interface { } // New constructs a new launch template Resolver -func New(_ client.Client, imageProvider *Provider, vnetSubnetID, vnetGUID string) *Resolver { +func New(_ client.Client, imageProvider *Provider) *Resolver { return &Resolver{ imageProvider: imageProvider, - vnetSubnetID: vnetSubnetID, - vnetGUID: vnetGUID, } } @@ -111,11 +94,6 @@ func (r Resolver) Resolve(ctx context.Context, nodeClass *v1alpha2.AKSNodeClass, kubeletConfig.EvictionHard = map[string]string{ instancetype.MemoryAvailable: instanceType.Overhead.EvictionThreshold.Memory().String()} kubeletConfig.MaxPods = lo.ToPtr(getMaxPods(staticParameters.NetworkPlugin)) - - for k, v := range r.getAzureCNILabels(nodeClass) { - staticParameters.Labels[k] = v - } - logging.FromContext(ctx).Infof("Resolved image %s for instance type %s", imageID, instanceType.Name) template := &template.Parameters{ StaticParameters: staticParameters, @@ -151,18 +129,3 @@ func getMaxPods(networkPlugin string) int32 { } return defaultKubernetesMaxPods } - -// getAzureCNILabels returns the labels for Azure CNI overlay -func (r *Resolver) getAzureCNILabels(_ *v1alpha2.AKSNodeClass) map[string]string { - // TODO(bsoghigian): this should be refactored to lo.Ternary(nodeClass.Spec.VnetSubnetID != nil, lo.FromPtr(nodeClass.Spec.VnetSubnetID), os.Getenv("AZURE_SUBNET_ID")) when we add VnetSubnetID to the nodeclass - vnetSubnetComponents, _ := utils.GetVnetSubnetIDComponents(r.vnetSubnetID) - vnetLabels := map[string]string{ - vnetDataPlaneLabel: networkPolicyCilium, - vnetNetworkNameLabel: vnetSubnetComponents.VNetName, - vnetSubnetNameLabel: vnetSubnetComponents.SubnetName, - vnetSubscriptionIDLabel: vnetSubnetComponents.SubscriptionID, - vnetGUIDLabel: r.vnetGUID, - vnetPodNetworkTypeLabel: overlayNetworkType, - } - return vnetLabels -} diff --git a/pkg/providers/imagefamily/ubuntu_2204.go b/pkg/providers/imagefamily/ubuntu_2204.go index 5c0177206..a7c3b8ee2 100644 --- a/pkg/providers/imagefamily/ubuntu_2204.go +++ b/pkg/providers/imagefamily/ubuntu_2204.go @@ -85,6 +85,7 @@ func (u Ubuntu2204) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, ta GPUNode: u.Options.GPUNode, GPUDriverVersion: u.Options.GPUDriverVersion, GPUImageSHA: u.Options.GPUImageSHA, + SubnetID: u.Options.SubnetID, }, Arch: u.Options.Arch, TenantID: u.Options.TenantID, diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index aeca3744e..c90d4a9c7 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -137,7 +137,7 @@ var _ = Describe("InstanceType Provider", func() { }) Context("Subnet", func() { - It("should use the AZURE_SUBNET_ID", func() { + It("should use the VNET_SUBNET_ID", func() { ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 19725edef..d2657b95a 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -36,6 +36,16 @@ import ( const ( karpenterManagedTagKey = "karpenter.azure.com/cluster" + + networkDataplaneCilium = "cilium" + vnetDataPlaneLabel = "kubernetes.azure.com/ebpf-dataplane" + vnetNetworkNameLabel = "kubernetes.azure.com/network-name" + vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" + vnetSubscriptionIDLabel = "kubernetes.azure.com/network-subscription" + vnetGUIDLabel = "kubernetes.azure.com/nodenetwork-vnetguid" + vnetPodNetworkTypeLabel = "kubernetes.azure.com/podnetwork-type" + + networkModeOverlay = "overlay" ) type Template struct { @@ -54,12 +64,13 @@ type Provider struct { userAssignedIdentityID string resourceGroup string location string + vnetGUID string } // TODO: add caching of launch templates func NewProvider(_ context.Context, imageFamily *imagefamily.Resolver, imageProvider *imagefamily.Provider, caBundle *string, clusterEndpoint string, - tenantID, subscriptionID, userAssignedIdentityID, resourceGroup, location string, + tenantID, subscriptionID, userAssignedIdentityID, resourceGroup, location, vnetGUID string, ) *Provider { return &Provider{ imageFamily: imageFamily, @@ -71,12 +82,17 @@ func NewProvider(_ context.Context, imageFamily *imagefamily.Resolver, imageProv userAssignedIdentityID: userAssignedIdentityID, resourceGroup: resourceGroup, location: location, + vnetGUID: vnetGUID, } } func (p *Provider) GetTemplate(ctx context.Context, nodeClass *v1alpha2.AKSNodeClass, nodeClaim *corev1beta1.NodeClaim, instanceType *cloudprovider.InstanceType, additionalLabels map[string]string) (*Template, error) { - staticParameters := p.getStaticParameters(ctx, instanceType, nodeClass, lo.Assign(nodeClaim.Labels, additionalLabels)) + staticParameters, err := p.getStaticParameters(ctx, instanceType, nodeClass, lo.Assign(nodeClaim.Labels, additionalLabels)) + if err != nil { + return nil, err + } + kubeServerVersion, err := p.imageProvider.KubeServerVersion(ctx) if err != nil { return nil, err @@ -94,11 +110,18 @@ func (p *Provider) GetTemplate(ctx context.Context, nodeClass *v1alpha2.AKSNodeC return launchTemplate, nil } -func (p *Provider) getStaticParameters(ctx context.Context, instanceType *cloudprovider.InstanceType, nodeClass *v1alpha2.AKSNodeClass, labels map[string]string) *parameters.StaticParameters { +func (p *Provider) getStaticParameters(ctx context.Context, instanceType *cloudprovider.InstanceType, nodeClass *v1alpha2.AKSNodeClass, labels map[string]string) (*parameters.StaticParameters, error) { var arch string = corev1beta1.ArchitectureAmd64 if err := instanceType.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64))); err == nil { arch = corev1beta1.ArchitectureArm64 } + vnetLabels, err := p.getVnetInfoLabels(ctx, nodeClass) + if err != nil { + return nil, err + } + for key, value := range vnetLabels { + labels[key] = value + } return ¶meters.StaticParameters{ ClusterName: options.FromContext(ctx).ClusterName, @@ -120,7 +143,8 @@ func (p *Provider) getStaticParameters(ctx context.Context, instanceType *cloudp KubeletClientTLSBootstrapToken: options.FromContext(ctx).KubeletClientTLSBootstrapToken, NetworkPlugin: options.FromContext(ctx).NetworkPlugin, NetworkPolicy: options.FromContext(ctx).NetworkPolicy, - } + SubnetID: options.FromContext(ctx).SubnetID, + }, nil } func (p *Provider) createLaunchTemplate(_ context.Context, options *parameters.Parameters) (*Template, error) { @@ -147,3 +171,21 @@ func mergeTags(tags ...map[string]string) (result map[string]*string) { return strings.ReplaceAll(key, "/", "_"), to.StringPtr(value) }) } + +// getVnetInfoLabels returns the labels for Azure CNI +func (p *Provider) getVnetInfoLabels(ctx context.Context, _ *v1alpha2.AKSNodeClass) (map[string]string, error) { + // TODO(bsoghigian): this should be refactored to lo.Ternary(nodeClass.Spec.VnetSubnetID != nil, lo.FromPtr(nodeClass.Spec.VnetSubnetID), os.Getenv("AZURE_SUBNET_ID")) when we add VnetSubnetID to the nodeclass + vnetSubnetComponents, err := utils.GetVnetSubnetIDComponents(options.FromContext(ctx).SubnetID) + if err != nil { + return nil, err + } + vnetLabels := map[string]string{ + vnetDataPlaneLabel: networkDataplaneCilium, + vnetNetworkNameLabel: vnetSubnetComponents.VNetName, + vnetSubnetNameLabel: vnetSubnetComponents.SubnetName, + vnetSubscriptionIDLabel: vnetSubnetComponents.SubscriptionID, + vnetGUIDLabel: p.vnetGUID, + vnetPodNetworkTypeLabel: networkModeOverlay, + } + return vnetLabels, nil +} diff --git a/pkg/providers/launchtemplate/parameters/types.go b/pkg/providers/launchtemplate/parameters/types.go index b228ef5b7..238ce0710 100644 --- a/pkg/providers/launchtemplate/parameters/types.go +++ b/pkg/providers/launchtemplate/parameters/types.go @@ -41,6 +41,9 @@ type StaticParameters struct { NetworkPolicy string KubernetesVersion string + // VNET + SubnetID string + Tags map[string]string Labels map[string]string } diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 4c5b61f11..2b5be99e8 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -107,7 +107,7 @@ func NewRegionalEnvironment(ctx context.Context, env *coretest.Environment, regi // Providers pricingProvider := pricing.NewProvider(ctx, pricingAPI, region, make(chan struct{})) imageFamilyProvider := imagefamily.NewProvider(env.KubernetesInterface, kubernetesVersionCache, communityImageVersionsAPI, region) - imageFamilyResolver := imagefamily.New(env.Client, imageFamilyProvider, DefaultVnetSubnetID, "test-vnet-guid") + imageFamilyResolver := imagefamily.New(env.Client, imageFamilyProvider) instanceTypesProvider := instancetype.NewProvider(region, instanceTypeCache, skuClientSingleton, pricingProvider, unavailableOfferingsCache) launchTemplateProvider := launchtemplate.NewProvider( ctx, @@ -120,6 +120,7 @@ func NewRegionalEnvironment(ctx context.Context, env *coretest.Environment, regi "test-userAssignedIdentity", resourceGroup, region, + "test-vnet-guid", ) loadBalancerProvider := loadbalancer.NewProvider( loadBalancersAPI, diff --git a/pkg/test/options.go b/pkg/test/options.go index d7cc75f18..ac182acd1 100644 --- a/pkg/test/options.go +++ b/pkg/test/options.go @@ -35,6 +35,7 @@ type OptionsFields struct { NetworkPolicy *string VMMemoryOverheadPercent *float64 NodeIdentities []string + SubnetID string } func Options(overrides ...OptionsFields) *azoptions.Options { @@ -54,5 +55,6 @@ func Options(overrides ...OptionsFields) *azoptions.Options { NetworkPolicy: lo.FromPtrOr(options.NetworkPolicy, "cilium"), VMMemoryOverheadPercent: lo.FromPtrOr(options.VMMemoryOverheadPercent, 0.075), NodeIdentities: options.NodeIdentities, + SubnetID: DefaultVnetSubnetID, } } diff --git a/skaffold.yaml b/skaffold.yaml index efde9c36c..6584a5a05 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -59,7 +59,7 @@ manifests: value: "Please run make az-all" - name: AZURE_NODE_RESOURCE_GROUP value: "Please run make az-all" - - name: AZURE_SUBNET_ID # the id of subnet to create network interfaces on + - name: VNET_SUBNET_ID # the id of subnet to create network interfaces on value: "Please run make az-all" - name: LEADER_ELECT # disable leader election for better debugging experience value: "false" From 775d10af29ca1416e48f05be8166754e8199dbc3 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 3 Apr 2024 11:23:15 -0700 Subject: [PATCH 08/23] fix: should fix e2e tests in theory --- Makefile-az.mk | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 8fb447f5c..bae7559c6 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -1,4 +1,4 @@ -AZURE_LOCATION ?= westus2 +ZURE_LOCATION ?= westus2 COMMON_NAME ?= karpenter ifeq ($(CODESPACES),true) AZURE_RESOURCE_GROUP ?= $(CODESPACE_NAME) @@ -15,9 +15,9 @@ KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID -az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload +az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-patch-vnet-subnet-id az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload -az-all-custom-vnet: az-login az-create-workload-msi az-mkaks-custom-vnet az-create-federated-cred az-perm az-perm-subnet az-perm-acr az-patch-skaffold-azureoverlay az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload +az-all-custom-vnet: az-login az-create-workload-msi az-mkaks-custom-vnet az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-patch-subnet-custom az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload az-all-savm: az-login az-mkaks-savm az-perm-savm az-patch-skaffold-azure az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - StandaloneVirtualMachines @@ -79,7 +79,7 @@ az-mkaks-savm: az-mkrg ## Create experimental cluster with standalone VMs (+ ACR az-rmrg: ## Destroy test ACR and AKS cluster by deleting the resource group (use with care!) az group delete --name $(AZURE_RESOURCE_GROUP) -az-patch-skaffold: ## Update Azure client env vars and settings in skaffold config +az-patch-skaffold: ## Update Azur client env vars and settings in skaffold config $(eval AZURE_CLIENT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.clientId")) $(eval CLUSTER_ENDPOINT=$(shell kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')) # bootstrap token @@ -99,18 +99,29 @@ az-patch-skaffold: ## Update Azure client env vars and settings in skaffold con yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="KUBELET_BOOTSTRAP_TOKEN")).value = "$(BOOTSTRAP_TOKEN)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="SSH_PUBLIC_KEY")).value = "$(SSH_PUBLIC_KEY)"' skaffold.yaml -az-patch-skaffold-kubenet: az-patch-skaffold + +az-patch-subnet-custom: $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")) .value = "$(VNET_SUBNET_ID)"' skaffold.yaml + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) + $(eval SUBNET_RESOURCE_GROUP=$(shell az network vnet subnet show --id $(VNET_SUBNET_ID) | jq -r ".resourceGroup")) + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(SUBNET_RESOURCE_GROUP) --role "Network Contributor" + yq e -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")).value = "$(VNET_SUBNET_ID)"' skaffold.yaml + +az-patch-vnet-subnet-id: + $(eval VNET_SUBNET_ID=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].id")) + $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) + $(eval SUBNET_RESOURCE_GROUP=$(shell az network vnet subnet show --id $(VNET_SUBNET_ID) | jq -r ".resourceGroup")) + az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(SUBNET_RESOURCE_GROUP) --role "Network Contributor" + yq e -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")).value = "$(VNET_SUBNET_ID)"' skaffold.yaml + + +az-patch-skaffold-kubenet: az-patch-vnet-subnet-id az-patch-skaffold yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "kubenet"' skaffold.yaml -az-patch-skaffold-azure: az-patch-skaffold - $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")) .value = "$(VNET_SUBNET_ID)"' skaffold.yaml +az-patch-skaffold-azure: az-patch-vnet-subnet-id az-patch-skaffold + yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "azure"' skaffold.yaml az-patch-skaffold-azureoverlay: az-patch-skaffold - $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")) .value = "$(VNET_SUBNET_ID)"' skaffold.yaml yq -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="NETWORK_PLUGIN").value) = "azure"' skaffold.yaml # old identity path is still the default, so need to override the values values with new logic. @@ -142,12 +153,6 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network @echo Consider "make az-patch-skaffold"! -az-perm-subnet: - # give Network Contributor permission to the subnet rg for the AKS cluster - $(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId")) - $(eval SUBNET_RESOURCE_GROUP=$(shell az network vnet subnet show --id $(VNET_SUBNET_ID) | jq -r ".resourceGroup")) - $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) - az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(SUBNET_RESOURCE_GROUP) --role "Network Contributor" az-perm-savm: ## Create role assignments to let Karpenter manage VMs and Network # Note: savm has not been converted over to use a workload identity From d3ce13aaf03e4b848f40044bc36f85984ac2cde3 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 3 Apr 2024 14:18:34 -0700 Subject: [PATCH 09/23] fix: add back the a --- Makefile-az.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index bae7559c6..47d525662 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -1,4 +1,4 @@ -ZURE_LOCATION ?= westus2 +AZURE_LOCATION ?= westus2 COMMON_NAME ?= karpenter ifeq ($(CODESPACES),true) AZURE_RESOURCE_GROUP ?= $(CODESPACE_NAME) From 529e74b7dacc3949cd7241b78f584692c896c6a0 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Wed, 3 Apr 2024 16:36:30 -0700 Subject: [PATCH 10/23] Update pkg/operator/options/options.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- pkg/operator/options/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/operator/options/options.go b/pkg/operator/options/options.go index a7c5a067b..9e8b2a28c 100644 --- a/pkg/operator/options/options.go +++ b/pkg/operator/options/options.go @@ -81,7 +81,7 @@ func (o *Options) AddFlags(fs *coreoptions.FlagSet) { fs.StringVar(&o.SSHPublicKey, "ssh-public-key", env.WithDefaultString("SSH_PUBLIC_KEY", ""), "[REQUIRED] VM SSH public key.") fs.StringVar(&o.NetworkPlugin, "network-plugin", env.WithDefaultString("NETWORK_PLUGIN", "azure"), "The network plugin used by the cluster.") fs.StringVar(&o.NetworkPolicy, "network-policy", env.WithDefaultString("NETWORK_POLICY", ""), "The network policy used by the cluster.") - fs.StringVar(&o.SubnetID, "default-vnet-subnet-id", env.WithDefaultString("VNET_SUBNET_ID", ""), "The default subnet ID to use for new nodes. This must be a valid subnet arm id that does not overlap with the service cidr or the pod cidr") + fs.StringVar(&o.SubnetID, "vnet-subnet-id", env.WithDefaultString("VNET_SUBNET_ID", ""), "The default subnet ID to use for new nodes. This must be a valid ARM resource ID for subnet that does not overlap with the service CIDR or the pod CIDR") fs.Var(newNodeIdentitiesValue(env.WithDefaultString("NODE_IDENTITIES", ""), &o.NodeIdentities), "node-identities", "User assigned identities for nodes.") } From f3639c3f89d5c6fa178e86f0192a8853143212a9 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Wed, 3 Apr 2024 16:36:44 -0700 Subject: [PATCH 11/23] Update pkg/providers/imagefamily/azlinux.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- pkg/providers/imagefamily/azlinux.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/providers/imagefamily/azlinux.go b/pkg/providers/imagefamily/azlinux.go index c3f6702b7..3d36d8cc2 100644 --- a/pkg/providers/imagefamily/azlinux.go +++ b/pkg/providers/imagefamily/azlinux.go @@ -84,8 +84,8 @@ func (u AzureLinux) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, ta CABundle: caBundle, GPUNode: u.Options.GPUNode, GPUDriverVersion: u.Options.GPUDriverVersion, - // GPUImageSHA: u.Options.GPUImageSHA, image sha only applies to ubuntu - // SEE: https://github.com/Azure/AgentBaker/blob/f393d6e4d689d9204d6000c85623ad9b764e2a29/vhdbuilder/packer/install-dependencies.sh#L201 + // GPUImageSHA: u.Options.GPUImageSHA - GPU image SHA only applies to Ubuntu + // See: https://github.com/Azure/AgentBaker/blob/f393d6e4d689d9204d6000c85623ad9b764e2a29/vhdbuilder/packer/install-dependencies.sh#L201 SubnetID: u.Options.SubnetID, }, Arch: u.Options.Arch, From 888b98f98b1ed6e8b144e2284bb79d3a47a7988c Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 3 Apr 2024 17:23:23 -0700 Subject: [PATCH 12/23] test: updating e2e to use new subnet patching logic --- .github/actions/e2e/install-karpenter/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/e2e/install-karpenter/action.yaml b/.github/actions/e2e/install-karpenter/action.yaml index 12a14cbab..91b49e0a1 100644 --- a/.github/actions/e2e/install-karpenter/action.yaml +++ b/.github/actions/e2e/install-karpenter/action.yaml @@ -45,7 +45,7 @@ runs: run: az account set --subscription ${{ inputs.subscription-id }} - name: patch skaffold and cilium shell: bash - run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-patch-skaffold-azureoverlay + run: AZURE_CLUSTER_NAME=${{ inputs.cluster_name }} AZURE_RESOURCE_GROUP=${{ inputs.resource_group }} AZURE_LOCATION=${{ inputs.location }} make az-patch-skaffold-azureoverlay az-patch-vnet-subnet-id - name: deploy karpenter to cluster shell: bash run: AZURE_ACR_NAME=${{ inputs.acr_name }} make az-run From 4553e4654bd11eba6d991d165dd47a9610984532 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 3 Apr 2024 20:19:46 -0700 Subject: [PATCH 13/23] fix: addressing makefile comments --- Makefile-az.mk | 16 ++++++++++------ pkg/operator/options/options.go | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 47d525662..cf4d4afd2 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -15,6 +15,10 @@ KARPENTER_SERVICE_ACCOUNT_NAME ?= karpenter-sa AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME ?= karpentermsi KARPENTER_FEDERATED_IDENTITY_CREDENTIAL_NAME ?= KARPENTER_FID +CUSTOM_VNET_NAME ?= $(AZURE_CLUSTER_NAME)-vnet +CUSTOM_SUBNET_NAME ?= nodesubnet + + az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-patch-vnet-subnet-id az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload az-all-custom-vnet: az-login az-create-workload-msi az-mkaks-custom-vnet az-create-federated-cred az-perm az-perm-acr az-patch-skaffold-azureoverlay az-patch-subnet-custom az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload @@ -47,17 +51,17 @@ az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane c az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter -az-mkvnet: - az group create --name $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) - az network vnet create --name $(AZURE_CLUSTER_NAME)-vnet --resource-group $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) --address-prefixes "10.1.0.0/16" +az-mkvnet: # Creates a vnet in the addr range of 10.1.0.0/16 + az group create --name $(CUSTOM_VNET_NAME) --location $(AZURE_LOCATION) + az network vnet create --name $(CUSTOM_VNET_NAME) --resource-group $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) --address-prefixes "10.1.0.0/16" -az-mksubnet: - az network vnet subnet create --name nodesubnet --resource-group $(AZURE_RESOURCE_GROUP)-vnet --vnet-name $(AZURE_CLUSTER_NAME)-vnet --address-prefixes "10.1.0.0/24" +az-mksubnet: # Creates a subnet with the range of 10.1.0.0/24 + az network vnet subnet create --name $(CUSTOM_SUBNET_NAME) --resource-group $(CUSTOM_VNET_NAME) --vnet-name $(CUSTOM_VNET_NAME) --address-prefixes "10.1.0.0/24" az-mkaks-custom-vnet: az-mkacr ## Create test AKS cluster with custom VNET az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \ --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \ - --enable-oidc-issuer --enable-workload-identity --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)-vnet/providers/Microsoft.Network/virtualNetworks/$(AZURE_CLUSTER_NAME)-vnet/subnets/nodesubnet" + --enable-oidc-issuer --enable-workload-identity --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(CUSTOM_VNET_NAME)/providers/Microsoft.Network/virtualNetworks/$(CUSTOM_VNET_NAME)/subnets/$(CUSTOM_SUBNET_NAME)" az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter diff --git a/pkg/operator/options/options.go b/pkg/operator/options/options.go index 9e8b2a28c..5ede66943 100644 --- a/pkg/operator/options/options.go +++ b/pkg/operator/options/options.go @@ -68,7 +68,7 @@ type Options struct { NetworkPolicy string // => NetworkPolicy in bootstrap NodeIdentities []string // => Applied onto each VM - SubnetID string // => VnetSubnetID to use (for nodes in Azure CNI Overlay and Azure CNI + pod subnet; for for nodes and pods in Azure CNI), unless overriden via AKSNodeClass + SubnetID string // => VnetSubnetID to use (for nodes in Azure CNI Overlay and Azure CNI + pod subnet; for for nodes and pods in Azure CNI), unless overridden via AKSNodeClass setFlags map[string]bool } From 29b0aa7c908544ac8115a601da36a9a46b536c9d Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Fri, 5 Apr 2024 00:54:26 -0700 Subject: [PATCH 14/23] Update pkg/providers/launchtemplate/launchtemplate.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- pkg/providers/launchtemplate/launchtemplate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index d2657b95a..67fc8ef18 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -172,7 +172,7 @@ func mergeTags(tags ...map[string]string) (result map[string]*string) { }) } -// getVnetInfoLabels returns the labels for Azure CNI +// getVnetInfoLabels returns VNet info labels func (p *Provider) getVnetInfoLabels(ctx context.Context, _ *v1alpha2.AKSNodeClass) (map[string]string, error) { // TODO(bsoghigian): this should be refactored to lo.Ternary(nodeClass.Spec.VnetSubnetID != nil, lo.FromPtr(nodeClass.Spec.VnetSubnetID), os.Getenv("AZURE_SUBNET_ID")) when we add VnetSubnetID to the nodeclass vnetSubnetComponents, err := utils.GetVnetSubnetIDComponents(options.FromContext(ctx).SubnetID) From 4b393a2fc139ba99f8f9841d563829d32148732b Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Fri, 5 Apr 2024 00:54:36 -0700 Subject: [PATCH 15/23] Update Makefile-az.mk Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- Makefile-az.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index cf4d4afd2..57876daa2 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -55,7 +55,7 @@ az-mkvnet: # Creates a vnet in the addr range of 10.1.0.0/16 az group create --name $(CUSTOM_VNET_NAME) --location $(AZURE_LOCATION) az network vnet create --name $(CUSTOM_VNET_NAME) --resource-group $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) --address-prefixes "10.1.0.0/16" -az-mksubnet: # Creates a subnet with the range of 10.1.0.0/24 +az-mksubnet: ## Create a subnet with address range of 10.1.0.0/24 az network vnet subnet create --name $(CUSTOM_SUBNET_NAME) --resource-group $(CUSTOM_VNET_NAME) --vnet-name $(CUSTOM_VNET_NAME) --address-prefixes "10.1.0.0/24" az-mkaks-custom-vnet: az-mkacr ## Create test AKS cluster with custom VNET From 20d555d91b96b3c3252fe3737db3fefee4f47d1e Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Fri, 5 Apr 2024 00:54:45 -0700 Subject: [PATCH 16/23] Update Makefile-az.mk Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- Makefile-az.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 57876daa2..7a10e25d9 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -83,7 +83,7 @@ az-mkaks-savm: az-mkrg ## Create experimental cluster with standalone VMs (+ ACR az-rmrg: ## Destroy test ACR and AKS cluster by deleting the resource group (use with care!) az group delete --name $(AZURE_RESOURCE_GROUP) -az-patch-skaffold: ## Update Azur client env vars and settings in skaffold config +az-patch-skaffold: ## Update Azure client env vars and settings in skaffold config $(eval AZURE_CLIENT_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".identityProfile.kubeletidentity.clientId")) $(eval CLUSTER_ENDPOINT=$(shell kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')) # bootstrap token From 530c6b8800418e2b0a60f25ff13a026f043067ff Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Fri, 5 Apr 2024 00:54:51 -0700 Subject: [PATCH 17/23] Update Makefile-az.mk Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- Makefile-az.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 7a10e25d9..e690b5f71 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -58,7 +58,7 @@ az-mkvnet: # Creates a vnet in the addr range of 10.1.0.0/16 az-mksubnet: ## Create a subnet with address range of 10.1.0.0/24 az network vnet subnet create --name $(CUSTOM_SUBNET_NAME) --resource-group $(CUSTOM_VNET_NAME) --vnet-name $(CUSTOM_VNET_NAME) --address-prefixes "10.1.0.0/24" -az-mkaks-custom-vnet: az-mkacr ## Create test AKS cluster with custom VNET +az-mkaks-custom-vnet: az-mkacr ## Create test AKS cluster with custom VNet az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \ --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \ --enable-oidc-issuer --enable-workload-identity --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(CUSTOM_VNET_NAME)/providers/Microsoft.Network/virtualNetworks/$(CUSTOM_VNET_NAME)/subnets/$(CUSTOM_SUBNET_NAME)" From 6863a375c853a6ac1bf94dbecbbd18ba853d3215 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Fri, 5 Apr 2024 00:55:02 -0700 Subject: [PATCH 18/23] Update Makefile-az.mk Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- Makefile-az.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index e690b5f71..29974fe93 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -51,7 +51,7 @@ az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane c az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter -az-mkvnet: # Creates a vnet in the addr range of 10.1.0.0/16 +az-mkvnet: ## Create a VNet with address range of 10.1.0.0/16 az group create --name $(CUSTOM_VNET_NAME) --location $(AZURE_LOCATION) az network vnet create --name $(CUSTOM_VNET_NAME) --resource-group $(AZURE_RESOURCE_GROUP)-vnet --location $(AZURE_LOCATION) --address-prefixes "10.1.0.0/16" From 8316a778b6ef1a8d3d119953a4a0870e78628877 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Fri, 5 Apr 2024 01:00:31 -0700 Subject: [PATCH 19/23] Update pkg/operator/options/options_validation.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- pkg/operator/options/options_validation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/operator/options/options_validation.go b/pkg/operator/options/options_validation.go index 09fad242f..b56b3a21e 100644 --- a/pkg/operator/options/options_validation.go +++ b/pkg/operator/options/options_validation.go @@ -39,7 +39,7 @@ func (o Options) Validate() error { func (o Options) validateVnetSubnetID() error { _, err := utils.GetVnetSubnetIDComponents(o.SubnetID) if err != nil { - return fmt.Errorf("default-vnet-subnet-id is invalid: %w", err) + return fmt.Errorf("vnet-subnet-id is invalid: %w", err) } return nil } From 18669fc949bc4b8820d99d95fb6f95f55f0d9f18 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Fri, 5 Apr 2024 01:03:45 -0700 Subject: [PATCH 20/23] Update pkg/providers/launchtemplate/launchtemplate.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- pkg/providers/launchtemplate/launchtemplate.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 67fc8ef18..0a9ff240a 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -115,6 +115,7 @@ func (p *Provider) getStaticParameters(ctx context.Context, instanceType *cloudp if err := instanceType.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64))); err == nil { arch = corev1beta1.ArchitectureArm64 } + // TODO: make conditional on either Azure CNI Overlay or pod subnet vnetLabels, err := p.getVnetInfoLabels(ctx, nodeClass) if err != nil { return nil, err From f5a2e17aed65f47e0a4f7b8bfb0de13f3de9a1ab Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Fri, 5 Apr 2024 01:36:13 -0700 Subject: [PATCH 21/23] addressing comments --- Makefile-az.mk | 3 ++- pkg/operator/options/options_validation.go | 3 +++ pkg/providers/launchtemplate/launchtemplate.go | 2 +- pkg/test/environment.go | 10 ++++------ pkg/test/options.go | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 29974fe93..e7833f235 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -61,7 +61,8 @@ az-mksubnet: ## Create a subnet with address range of 10.1.0.0/24 az-mkaks-custom-vnet: az-mkacr ## Create test AKS cluster with custom VNet az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \ --enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \ - --enable-oidc-issuer --enable-workload-identity --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(CUSTOM_VNET_NAME)/providers/Microsoft.Network/virtualNetworks/$(CUSTOM_VNET_NAME)/subnets/$(CUSTOM_SUBNET_NAME)" + --enable-oidc-issuer --enable-workload-identity \ + --vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(CUSTOM_VNET_NAME)/providers/Microsoft.Network/virtualNetworks/$(CUSTOM_VNET_NAME)/subnets/$(CUSTOM_SUBNET_NAME)" az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing skaffold config set default-repo $(AZURE_ACR_NAME).azurecr.io/karpenter diff --git a/pkg/operator/options/options_validation.go b/pkg/operator/options/options_validation.go index b56b3a21e..9ed1bb21c 100644 --- a/pkg/operator/options/options_validation.go +++ b/pkg/operator/options/options_validation.go @@ -77,5 +77,8 @@ func (o Options) validateRequiredFields() error { if o.SSHPublicKey == "" { return fmt.Errorf("missing field, ssh-public-key") } + if o.SubnetID == "" { + return fmt.Errorf("missing field, vnet-subnet-id") + } return nil } diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 0a9ff240a..83397b2c7 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -115,7 +115,7 @@ func (p *Provider) getStaticParameters(ctx context.Context, instanceType *cloudp if err := instanceType.Requirements.Compatible(scheduling.NewRequirements(scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64))); err == nil { arch = corev1beta1.ArchitectureArm64 } - // TODO: make conditional on either Azure CNI Overlay or pod subnet + // TODO: make conditional on either Azure CNI Overlay or pod subnet vnetLabels, err := p.getVnetInfoLabels(ctx, nodeClass) if err != nil { return nil, err diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 2b5be99e8..87a942ef3 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -43,8 +43,6 @@ func init() { var ( resourceGroup = "test-resourceGroup" - - DefaultVnetSubnetID = "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub" ) type Environment struct { @@ -142,10 +140,10 @@ func NewRegionalEnvironment(ctx context.Context, env *coretest.Environment, regi launchTemplateProvider, loadBalancerProvider, unavailableOfferingsCache, - region, // region - resourceGroup, // resourceGroup - DefaultVnetSubnetID, // subnet - "", // subscriptionID + region, + resourceGroup, + testOptions.SubnetID, + "", // subscriptionID ) return &Environment{ diff --git a/pkg/test/options.go b/pkg/test/options.go index ac182acd1..de910df89 100644 --- a/pkg/test/options.go +++ b/pkg/test/options.go @@ -35,7 +35,7 @@ type OptionsFields struct { NetworkPolicy *string VMMemoryOverheadPercent *float64 NodeIdentities []string - SubnetID string + SubnetID *string } func Options(overrides ...OptionsFields) *azoptions.Options { @@ -55,6 +55,6 @@ func Options(overrides ...OptionsFields) *azoptions.Options { NetworkPolicy: lo.FromPtrOr(options.NetworkPolicy, "cilium"), VMMemoryOverheadPercent: lo.FromPtrOr(options.VMMemoryOverheadPercent, 0.075), NodeIdentities: options.NodeIdentities, - SubnetID: DefaultVnetSubnetID, + SubnetID: lo.FromPtrOr(options.SubnetID, "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub"), } } From 83ebb14e574a99fb1f0f7826a8427378105af7fb Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Fri, 5 Apr 2024 10:17:16 -0700 Subject: [PATCH 22/23] fix: removing all vnet labels that are not essential for survivial --- Makefile-az.mk | 2 +- pkg/operator/operator.go | 4 ++-- pkg/operator/options/suite_test.go | 2 +- pkg/providers/instancetype/suite_test.go | 3 --- pkg/providers/launchtemplate/launchtemplate.go | 8 +------- 5 files changed, 5 insertions(+), 14 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index e7833f235..80754b3ec 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -112,7 +112,7 @@ az-patch-subnet-custom: az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(SUBNET_RESOURCE_GROUP) --role "Network Contributor" yq e -i '(.manifests.helm.releases[0].overrides.controller.env[] | select(.name=="VNET_SUBNET_ID")).value = "$(VNET_SUBNET_ID)"' skaffold.yaml -az-patch-vnet-subnet-id: +az-patch-vnet-subnet-id: ## Patch VNET_SUBNET_ID in skaffold.yaml $(eval VNET_SUBNET_ID=$(shell az network vnet list --resource-group $(AZURE_RESOURCE_GROUP_MC) | jq -r ".[0].subnets[0].id")) $(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv)) $(eval SUBNET_RESOURCE_GROUP=$(shell az network vnet subnet show --id $(VNET_SUBNET_ID) | jq -r ".resourceGroup")) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index f51e14b34..4f7e70276 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -73,7 +73,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont azClient, err := instance.CreateAZClient(ctx, azConfig) lo.Must0(err, "creating Azure client") - vnetGUID, err := getVNETGUID(azConfig, options.FromContext(ctx).SubnetID) + vnetGUID, err := getVnetGUID(azConfig, options.FromContext(ctx).SubnetID) lo.Must0(err, "getting VNET GUID") unavailableOfferingsCache := azurecache.NewUnavailableOfferings() @@ -168,7 +168,7 @@ func getCABundle(restConfig *rest.Config) (*string, error) { return ptr.String(base64.StdEncoding.EncodeToString(transportConfig.TLS.CAData)), nil } -func getVNETGUID(cfg *auth.Config, subnetID string) (string, error) { +func getVnetGUID(cfg *auth.Config, subnetID string) (string, error) { creds, err := auth.NewCredential(cfg) if err != nil { return "", err diff --git a/pkg/operator/options/suite_test.go b/pkg/operator/options/suite_test.go index 45a1cd1cb..b223e75f4 100644 --- a/pkg/operator/options/suite_test.go +++ b/pkg/operator/options/suite_test.go @@ -108,7 +108,7 @@ var _ = Describe("Options", func() { SSHPublicKey: lo.ToPtr("env-ssh-public-key"), NetworkPlugin: lo.ToPtr("env-network-plugin"), NetworkPolicy: lo.ToPtr("env-network-policy"), - SubnetID: "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub", + SubnetID: lo.ToPtr("/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub"), NodeIdentities: []string{"/subscriptions/1234/resourceGroups/mcrg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/envid1", "/subscriptions/1234/resourceGroups/mcrg/providers/Microsoft.ManagedIdentity/userAssignedIdentities/envid2"}, })) }) diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index c90d4a9c7..1c551d120 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -160,10 +160,7 @@ var _ = Describe("InstanceType Provider", func() { Expect(err).To(Succeed()) decodedString := string(decodedBytes[:]) Expect(decodedString).To(SatisfyAll( - ContainSubstring("kubernetes.azure.com/ebpf-dataplane=cilium"), - ContainSubstring("kubernetes.azure.com/network-name=karpentervnet"), ContainSubstring("kubernetes.azure.com/network-subnet=karpentersub"), - ContainSubstring("kubernetes.azure.com/network-subscription=12345678-1234-1234-1234-123456789012"), ContainSubstring("kubernetes.azure.com/nodenetwork-vnetguid=test-vnet-guid"), ContainSubstring("kubernetes.azure.com/podnetwork-type=overlay"), )) diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 83397b2c7..f1f7e5455 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -120,9 +120,7 @@ func (p *Provider) getStaticParameters(ctx context.Context, instanceType *cloudp if err != nil { return nil, err } - for key, value := range vnetLabels { - labels[key] = value - } + labels = lo.Assign(labels, vnetLabels) return ¶meters.StaticParameters{ ClusterName: options.FromContext(ctx).ClusterName, @@ -173,7 +171,6 @@ func mergeTags(tags ...map[string]string) (result map[string]*string) { }) } -// getVnetInfoLabels returns VNet info labels func (p *Provider) getVnetInfoLabels(ctx context.Context, _ *v1alpha2.AKSNodeClass) (map[string]string, error) { // TODO(bsoghigian): this should be refactored to lo.Ternary(nodeClass.Spec.VnetSubnetID != nil, lo.FromPtr(nodeClass.Spec.VnetSubnetID), os.Getenv("AZURE_SUBNET_ID")) when we add VnetSubnetID to the nodeclass vnetSubnetComponents, err := utils.GetVnetSubnetIDComponents(options.FromContext(ctx).SubnetID) @@ -181,10 +178,7 @@ func (p *Provider) getVnetInfoLabels(ctx context.Context, _ *v1alpha2.AKSNodeCla return nil, err } vnetLabels := map[string]string{ - vnetDataPlaneLabel: networkDataplaneCilium, - vnetNetworkNameLabel: vnetSubnetComponents.VNetName, vnetSubnetNameLabel: vnetSubnetComponents.SubnetName, - vnetSubscriptionIDLabel: vnetSubnetComponents.SubscriptionID, vnetGUIDLabel: p.vnetGUID, vnetPodNetworkTypeLabel: networkModeOverlay, } From 6a81ccd2263dc6eed0a4798be5d632b5899892bb Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Fri, 5 Apr 2024 16:32:10 -0700 Subject: [PATCH 23/23] fix: adding cilium label back to vnet labels so cilium agent can be provisioned on the nodes --- pkg/providers/instancetype/suite_test.go | 1 + pkg/providers/launchtemplate/launchtemplate.go | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 1c551d120..6dd94b4a9 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -160,6 +160,7 @@ var _ = Describe("InstanceType Provider", func() { Expect(err).To(Succeed()) decodedString := string(decodedBytes[:]) Expect(decodedString).To(SatisfyAll( + ContainSubstring("kubernetes.azure.com/ebpf-dataplane=cilium"), ContainSubstring("kubernetes.azure.com/network-subnet=karpentersub"), ContainSubstring("kubernetes.azure.com/nodenetwork-vnetguid=test-vnet-guid"), ContainSubstring("kubernetes.azure.com/podnetwork-type=overlay"), diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index f1f7e5455..3c5e5ac95 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -39,9 +39,7 @@ const ( networkDataplaneCilium = "cilium" vnetDataPlaneLabel = "kubernetes.azure.com/ebpf-dataplane" - vnetNetworkNameLabel = "kubernetes.azure.com/network-name" vnetSubnetNameLabel = "kubernetes.azure.com/network-subnet" - vnetSubscriptionIDLabel = "kubernetes.azure.com/network-subscription" vnetGUIDLabel = "kubernetes.azure.com/nodenetwork-vnetguid" vnetPodNetworkTypeLabel = "kubernetes.azure.com/podnetwork-type" @@ -122,6 +120,15 @@ func (p *Provider) getStaticParameters(ctx context.Context, instanceType *cloudp } labels = lo.Assign(labels, vnetLabels) + // TODO: Make conditional on epbf dataplane + // This label is required for the cilium agent daemonset because + // we select the nodes for the daemonset based on this label + // - key: kubernetes.azure.com/ebpf-dataplane + // operator: In + // values: + // - cilium + labels[vnetDataPlaneLabel] = networkDataplaneCilium + return ¶meters.StaticParameters{ ClusterName: options.FromContext(ctx).ClusterName, ClusterEndpoint: p.clusterEndpoint,