Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Azure Linux Image Family Support #72

Merged
merged 27 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
fb259de
feat: AzureLinux UserData defined
Bryce-Soghigian Dec 18, 2023
7b5d2a8
fix: adding proper public gallery reference
Bryce-Soghigian Dec 18, 2023
469db16
feat: adding AzureLinux to ImageFamily CRD API Validation
Bryce-Soghigian Dec 18, 2023
8c89e19
test(e2e): added utilization and gpu suites for azure linux
Bryce-Soghigian Dec 18, 2023
ea8348d
test(e2e): added e2e scenarios for arm64 bootstrapping to validate az…
Bryce-Soghigian Dec 18, 2023
f6f7dbc
feat: filtering supported gpus by image family
Bryce-Soghigian Dec 18, 2023
97c33c7
style: gofmt
Bryce-Soghigian Dec 18, 2023
ce209a9
test(codegen): adding additional GPU skus for testing isSupportedGPU
Bryce-Soghigian Dec 18, 2023
77c78f0
test(codegen): adding test to validate we don't regress in codegen
Bryce-Soghigian Dec 18, 2023
b3cbdeb
test(unit): GPU Filtering test, and testing for skugen regressions
Bryce-Soghigian Dec 18, 2023
62da464
test(unit): testing we find GPU for AzureLinux list
Bryce-Soghigian Dec 18, 2023
4d1dc1d
style: renaming suite from arm64 to arm
Bryce-Soghigian Dec 18, 2023
1eceaf0
Merge branch 'main' into bsoghigian/azlinux
Bryce-Soghigian Dec 21, 2023
1aca115
Merge branch 'main' of github.com:Azure/karpenter into bsoghigian/azl…
Bryce-Soghigian Dec 24, 2023
50ba327
Apply suggestions from code review
Bryce-Soghigian Jan 4, 2024
3d3a65e
fix: moving filter for gpu
Bryce-Soghigian Jan 5, 2024
911a75e
style: refactoring configuration for GPU features and Arch to come fr…
Bryce-Soghigian Jan 5, 2024
24594a4
test: refactored tests to use sets
Bryce-Soghigian Jan 8, 2024
b4769b2
ci
Bryce-Soghigian Jan 10, 2024
c25c61a
test: parameterizing the test for sku fake
Bryce-Soghigian Jan 10, 2024
05e1a09
test(e2e): added DescribeTable style tests for parameterization of Ut…
Bryce-Soghigian Jan 10, 2024
e291e4a
test: removing arm suite in favor of DescribeTable counter part
Bryce-Soghigian Jan 10, 2024
ee9e69c
CI
Bryce-Soghigian Jan 10, 2024
2f89e0a
separation of concerns
Bryce-Soghigian Jan 11, 2024
c1c456e
CI
Bryce-Soghigian Jan 11, 2024
3bf8c15
Merge branch 'main' into bsoghigian/azlinux
Bryce-Soghigian Jan 11, 2024
2866794
Merge branch 'main' into bsoghigian/azlinux
Bryce-Soghigian Jan 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hack/codegen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ skugen() {
NO_UPDATE=" pkg/fake/zz_generated.sku.$location.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)"
SUBJECT="SKUGEN"

go run hack/code/instancetype_testdata_gen.go -- "${GENERATED_FILE}" "$location" "Standard_B1s,Standard_A0,Standard_D2_v2,Standard_D2_v3,Standard_DS2_v2,Standard_D2s_v3,Standard_D2_v5,Standard_F16s_v2,Standard_NC24ads_A100_v4,Standard_M8-2ms,Standard_D4s_v3,Standard_D64s_v3,Standard_DC8s_v3"
go run hack/code/instancetype_testdata_gen.go -- "${GENERATED_FILE}" "$location" "Standard_B1s,Standard_A0,Standard_D2_v2,Standard_D2_v3,Standard_DS2_v2,Standard_D2s_v3,Standard_D2_v5,Standard_F16s_v2,Standard_NC6s,Standard_NC6s_v3,Standard_NC16as_T4_v3,Standard_NC24ads_A100_v4,Standard_M8-2ms,Standard_D4s_v3,Standard_D64s_v3,Standard_DC8s_v3"
go fmt "${GENERATED_FILE}"

GIT_DIFF=$(git diff --stat "${GENERATED_FILE}")
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/crds/karpenter.azure.com_aksnodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ spec:
description: ImageFamily is the image family that instances use.
enum:
- Ubuntu2204
- AzureLinux
type: string
imageVersion:
description: ImageVersion is the image version that instances use.
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/v1alpha2/aksnodeclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ type AKSNodeClassSpec struct {
ImageID *string `json:"-"`
// ImageFamily is the image family that instances use.
// +kubebuilder:default=Ubuntu2204
// +kubebuilder:validation:Enum:={Ubuntu2204}
// +kubebuilder:validation:Enum:={Ubuntu2204,AzureLinux}
ImageFamily *string `json:"imageFamily,omitempty"`
// ImageVersion is the image version that instances use.
// +optional
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/v1alpha2/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,8 @@ var (

NodeClaimLinkedAnnotationKey = v1alpha5.MachineLinkedAnnotationKey // still using the one from v1alpha5
)

const (
Ubuntu2204ImageFamily = "Ubuntu2204"
AzureLinuxImageFamily = "AzureLinux"
)
71 changes: 71 additions & 0 deletions pkg/fake/skus_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
Portions Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package fake

import (
"testing"

"k8s.io/apimachinery/pkg/util/sets"
)

func TestSKUExistence(t *testing.T) {
testSKUExistenceForRegion(t, "eastus", sets.New(
"Standard_A0",
"Standard_B1s",
"Standard_D2s_v3",
"Standard_D2_v2",
"Standard_D2_v3",
"Standard_D2_v5",
"Standard_D4s_v3",
"Standard_D64s_v3",
"Standard_DC8s_v3",
"Standard_DS2_v2",
"Standard_F16s_v2",
"Standard_M8-2ms",
"Standard_NC24ads_A100_v4",
"Standard_NC6s_v3",
"Standard_NC16as_T4_v3",
))
testSKUExistenceForRegion(t, "westcentralus", sets.New(
"Standard_A0",
"Standard_B1s",
"Standard_D2s_v3",
"Standard_D2_v2",
"Standard_D2_v3",
"Standard_D2_v5",
"Standard_D4s_v3",
"Standard_D64s_v3",
"Standard_DS2_v2",
"Standard_F16s_v2",
))
}

func testSKUExistenceForRegion(t *testing.T, region string, expectedSKUs sets.Set[string]) {
generatedSKUs := ResourceSkus[region]

skuSets := make(sets.Set[string])
for _, sku := range generatedSKUs {
skuName := *sku.Name
skuSets.Insert(skuName)
}

for _, expectedSKU := range expectedSKUs.UnsortedList() {
if exists := skuSets.Has(expectedSKU); !exists {
t.Errorf("SKU not found in %v: %v", region, expectedSKU)
}
}
}
117 changes: 116 additions & 1 deletion pkg/fake/zz_generated.sku.eastus.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
)

// generated at 2023-12-12T02:02:44Z
// generated at 2023-12-18T15:28:53Z

func init() {
// ResourceSkus is a list of selected VM SKUs for a given region
Expand All @@ -39,6 +39,17 @@ func init() {
APIVersions: &[]string{},
Costs: &[]compute.ResourceSkuCosts{},
Restrictions: &[]compute.ResourceSkuRestrictions{
{
Type: compute.ResourceSkuRestrictionsType("Location"),
Values: &[]string{"eastus"},
RestrictionInfo: &compute.ResourceSkuRestrictionInfo{
Locations: &[]string{
"eastus",
},
Zones: &[]string{},
},
ReasonCode: "NotAvailableForSubscription",
},
{
Type: compute.ResourceSkuRestrictionsType("Zone"),
Values: &[]string{"eastus"},
Expand Down Expand Up @@ -601,6 +612,53 @@ func init() {
},
},
},
{
Name: lo.ToPtr("Standard_NC16as_T4_v3"),
Tier: lo.ToPtr("Standard"),
Kind: lo.ToPtr(""),
Size: lo.ToPtr("NC16as_T4_v3"),
Family: lo.ToPtr("Standard NCASv3_T4 Family"),
ResourceType: lo.ToPtr("virtualMachines"),
APIVersions: &[]string{},
Costs: &[]compute.ResourceSkuCosts{},
Restrictions: &[]compute.ResourceSkuRestrictions{},
Capabilities: &[]compute.ResourceSkuCapabilities{
{Name: lo.ToPtr("MaxResourceVolumeMB"), Value: lo.ToPtr("360448")},
{Name: lo.ToPtr("OSVhdSizeMB"), Value: lo.ToPtr("1047552")},
{Name: lo.ToPtr("vCPUs"), Value: lo.ToPtr("16")},
{Name: lo.ToPtr("MemoryPreservingMaintenanceSupported"), Value: lo.ToPtr("False")},
{Name: lo.ToPtr("HyperVGenerations"), Value: lo.ToPtr("V1,V2")},
{Name: lo.ToPtr("MemoryGB"), Value: lo.ToPtr("110")},
{Name: lo.ToPtr("MaxDataDiskCount"), Value: lo.ToPtr("32")},
{Name: lo.ToPtr("CpuArchitectureType"), Value: lo.ToPtr("x64")},
{Name: lo.ToPtr("LowPriorityCapable"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("PremiumIO"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("VMDeploymentTypes"), Value: lo.ToPtr("IaaS")},
{Name: lo.ToPtr("vCPUsAvailable"), Value: lo.ToPtr("16")},
{Name: lo.ToPtr("GPUs"), Value: lo.ToPtr("1")},
{Name: lo.ToPtr("vCPUsPerCore"), Value: lo.ToPtr("1")},
{Name: lo.ToPtr("CombinedTempDiskAndCachedIOPS"), Value: lo.ToPtr("16320")},
{Name: lo.ToPtr("CombinedTempDiskAndCachedReadBytesPerSecond"), Value: lo.ToPtr("251658240")},
{Name: lo.ToPtr("CombinedTempDiskAndCachedWriteBytesPerSecond"), Value: lo.ToPtr("251658240")},
{Name: lo.ToPtr("CachedDiskBytes"), Value: lo.ToPtr("154619000000")},
{Name: lo.ToPtr("UncachedDiskIOPS"), Value: lo.ToPtr("24480")},
{Name: lo.ToPtr("UncachedDiskBytesPerSecond"), Value: lo.ToPtr("368640000")},
{Name: lo.ToPtr("EphemeralOSDiskSupported"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("EncryptionAtHostSupported"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("CapacityReservationSupported"), Value: lo.ToPtr("False")},
{Name: lo.ToPtr("AcceleratedNetworkingEnabled"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("RdmaEnabled"), Value: lo.ToPtr("False")},
{Name: lo.ToPtr("MaxNetworkInterfaces"), Value: lo.ToPtr("8")},
},
Locations: &[]string{"eastus"},
LocationInfo: &[]compute.ResourceSkuLocationInfo{{Location: lo.ToPtr("eastus"), Zones: &[]string{
"1",
"2",
"3",
},
},
},
},
{
Name: lo.ToPtr("Standard_NC24ads_A100_v4"),
Tier: lo.ToPtr("Standard"),
Expand Down Expand Up @@ -647,5 +705,62 @@ func init() {
},
},
},
{
Name: lo.ToPtr("Standard_NC6s_v3"),
Tier: lo.ToPtr("Standard"),
Kind: lo.ToPtr(""),
Size: lo.ToPtr("NC6s_v3"),
Family: lo.ToPtr("standardNCSv3Family"),
ResourceType: lo.ToPtr("virtualMachines"),
APIVersions: &[]string{},
Costs: &[]compute.ResourceSkuCosts{},
Restrictions: &[]compute.ResourceSkuRestrictions{
{
Type: compute.ResourceSkuRestrictionsType("Zone"),
Values: &[]string{"eastus"},
RestrictionInfo: &compute.ResourceSkuRestrictionInfo{
Locations: &[]string{
"eastus",
},
Zones: &[]string{
"1",
"2",
"3",
},
},
ReasonCode: "NotAvailableForSubscription",
},
},
Capabilities: &[]compute.ResourceSkuCapabilities{
{Name: lo.ToPtr("MaxResourceVolumeMB"), Value: lo.ToPtr("344064")},
{Name: lo.ToPtr("OSVhdSizeMB"), Value: lo.ToPtr("1047552")},
{Name: lo.ToPtr("vCPUs"), Value: lo.ToPtr("6")},
{Name: lo.ToPtr("MemoryPreservingMaintenanceSupported"), Value: lo.ToPtr("False")},
{Name: lo.ToPtr("HyperVGenerations"), Value: lo.ToPtr("V1,V2")},
{Name: lo.ToPtr("MemoryGB"), Value: lo.ToPtr("112")},
{Name: lo.ToPtr("MaxDataDiskCount"), Value: lo.ToPtr("12")},
{Name: lo.ToPtr("CpuArchitectureType"), Value: lo.ToPtr("x64")},
{Name: lo.ToPtr("LowPriorityCapable"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("PremiumIO"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("VMDeploymentTypes"), Value: lo.ToPtr("IaaS")},
{Name: lo.ToPtr("vCPUsAvailable"), Value: lo.ToPtr("6")},
{Name: lo.ToPtr("GPUs"), Value: lo.ToPtr("1")},
{Name: lo.ToPtr("vCPUsPerCore"), Value: lo.ToPtr("1")},
{Name: lo.ToPtr("EphemeralOSDiskSupported"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("EncryptionAtHostSupported"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("CapacityReservationSupported"), Value: lo.ToPtr("False")},
{Name: lo.ToPtr("AcceleratedNetworkingEnabled"), Value: lo.ToPtr("True")},
{Name: lo.ToPtr("RdmaEnabled"), Value: lo.ToPtr("False")},
{Name: lo.ToPtr("MaxNetworkInterfaces"), Value: lo.ToPtr("4")},
},
Locations: &[]string{"eastus"},
LocationInfo: &[]compute.ResourceSkuLocationInfo{{Location: lo.ToPtr("eastus"), Zones: &[]string{
"1",
"2",
"3",
},
},
},
},
}
}
16 changes: 14 additions & 2 deletions pkg/fake/zz_generated.sku.westcentralus.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
)

// generated at 2023-12-12T02:03:06Z
// generated at 2023-12-18T15:29:11Z

func init() {
// ResourceSkus is a list of selected VM SKUs for a given region
Expand All @@ -38,7 +38,19 @@ func init() {
ResourceType: lo.ToPtr("virtualMachines"),
APIVersions: &[]string{},
Costs: &[]compute.ResourceSkuCosts{},
Restrictions: &[]compute.ResourceSkuRestrictions{},
Restrictions: &[]compute.ResourceSkuRestrictions{
{
Type: compute.ResourceSkuRestrictionsType("Location"),
Values: &[]string{"westcentralus"},
RestrictionInfo: &compute.ResourceSkuRestrictionInfo{
Locations: &[]string{
"westcentralus",
},
Zones: &[]string{},
},
ReasonCode: "NotAvailableForSubscription",
},
},
Capabilities: &[]compute.ResourceSkuCapabilities{
{Name: lo.ToPtr("MaxResourceVolumeMB"), Value: lo.ToPtr("20480")},
{Name: lo.ToPtr("OSVhdSizeMB"), Value: lo.ToPtr("1047552")},
Expand Down
103 changes: 103 additions & 0 deletions pkg/providers/imagefamily/azlinux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
Portions Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package imagefamily

import (
v1 "k8s.io/api/core/v1"

"github.com/Azure/karpenter/pkg/apis/v1alpha2"
"github.com/Azure/karpenter/pkg/providers/imagefamily/bootstrap"
"github.com/Azure/karpenter/pkg/providers/launchtemplate/parameters"

corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1"
"github.com/aws/karpenter-core/pkg/cloudprovider"
"github.com/aws/karpenter-core/pkg/scheduling"
)

const (
AzureLinuxGen2CommunityImage = "V2gen2"
AzureLinuxGen1CommunityImage = "V2"
AzureLinuxGen2ArmCommunityImage = "V2gen2arm64"
)

type AzureLinux struct {
Options *parameters.StaticParameters
}

func (u AzureLinux) Name() string {
return v1alpha2.AzureLinuxImageFamily
}

func (u AzureLinux) DefaultImages() []DefaultImageOutput {
// image provider will select these images in order, first match wins. This is why we chose to put AzureLinuxGen2containerd first in the defaultImages
return []DefaultImageOutput{
{
CommunityImage: AzureLinuxGen2CommunityImage,
PublicGalleryURL: AKSAzureLinuxPublicGalleryURL,
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1alpha2.LabelSKUHyperVGeneration, v1.NodeSelectorOpIn, v1alpha2.HyperVGenerationV2),
),
},
{
CommunityImage: AzureLinuxGen1CommunityImage,
PublicGalleryURL: AKSAzureLinuxPublicGalleryURL,
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureAmd64),
scheduling.NewRequirement(v1alpha2.LabelSKUHyperVGeneration, v1.NodeSelectorOpIn, v1alpha2.HyperVGenerationV1),
),
},
{
CommunityImage: AzureLinuxGen2ArmCommunityImage,
PublicGalleryURL: AKSAzureLinuxPublicGalleryURL,
Requirements: scheduling.NewRequirements(
scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, corev1beta1.ArchitectureArm64),
scheduling.NewRequirement(v1alpha2.LabelSKUHyperVGeneration, v1.NodeSelectorOpIn, v1alpha2.HyperVGenerationV2),
),
},
}
}

// UserData returns the default userdata script for the image Family
func (u AzureLinux) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ *cloudprovider.InstanceType) bootstrap.Bootstrapper {
return bootstrap.AKS{
Options: bootstrap.Options{
ClusterName: u.Options.ClusterName,
ClusterEndpoint: u.Options.ClusterEndpoint,
KubeletConfig: kubeletConfig,
Taints: taints,
Labels: labels,
CABundle: caBundle,
GPUNode: u.Options.GPUNode,
GPUDriverVersion: u.Options.GPUDriverVersion,
// GPUImageSHA: u.Options.GPUImageSHA, image sha only applies to ubuntu
// SEE: https://github.com/Azure/AgentBaker/blob/f393d6e4d689d9204d6000c85623ad9b764e2a29/vhdbuilder/packer/install-dependencies.sh#L201
},
Arch: u.Options.Arch,
TenantID: u.Options.TenantID,
SubscriptionID: u.Options.SubscriptionID,
Location: u.Options.Location,
UserAssignedIdentityID: u.Options.UserAssignedIdentityID,
ResourceGroup: u.Options.ResourceGroup,
ClusterID: u.Options.ClusterID,
APIServerName: u.Options.APIServerName,
KubeletClientTLSBootstrapToken: u.Options.KubeletClientTLSBootstrapToken,
NetworkPlugin: u.Options.NetworkPlugin,
NetworkPolicy: u.Options.NetworkPolicy,
KubernetesVersion: u.Options.KubernetesVersion,
}
}
4 changes: 3 additions & 1 deletion pkg/providers/imagefamily/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,10 @@ func (r Resolver) Resolve(ctx context.Context, nodeClass *v1alpha2.AKSNodeClass,

func getImageFamily(familyName *string, parameters *template.StaticParameters) ImageFamily {
switch lo.FromPtr(familyName) {
case Ubuntu2204ImageFamily:
case v1alpha2.Ubuntu2204ImageFamily:
return &Ubuntu2204{Options: parameters}
case v1alpha2.AzureLinuxImageFamily:
return &AzureLinux{Options: parameters}
default:
return &Ubuntu2204{Options: parameters}
}
Expand Down
Loading