From 69a3ac9dfc1d65b4fef7d0fa399197a2c6d12096 Mon Sep 17 00:00:00 2001 From: elankath Date: Thu, 7 Nov 2024 11:08:18 +0530 Subject: [PATCH 1/3] support extended resouces and ephemeral-storage in scale-from-zero --- .../cloudprovider/mcm/mcm_manager.go | 74 ++++++++++----- .../cloudprovider/mcm/mcm_manager_test.go | 94 +++++++++++++++++++ 2 files changed, 147 insertions(+), 21 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go index ec3e348b5152..b2bdb53e3ca1 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go @@ -29,9 +29,11 @@ import ( "fmt" v1appslister "k8s.io/client-go/listers/apps/v1" "k8s.io/utils/pointer" + "maps" "math/rand" "net/http" "os" + "slices" "strconv" "strings" "time" @@ -111,6 +113,12 @@ var ( machineGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machines"} machineSetGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machinesets"} machineDeploymentGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machinedeployments"} + + // ErrInvalidNodeTemplate is a sentinel error that indicates that the nodeTemplate is invalid. + ErrInvalidNodeTemplate = errors.New("invalid node template") + coreResourceNames = []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory} + gpuResourceNames = []v1.ResourceName{"gpu", gpu.ResourceNvidiaGPU} + knownResourceNames = slices.Concat(coreResourceNames, gpuResourceNames, []v1.ResourceName{v1.ResourcePods, v1.ResourceEphemeralStorage}) ) // McmManager manages the client communication for MachineDeployments. @@ -130,12 +138,13 @@ type McmManager struct { } type instanceType struct { - InstanceType string - VCPU resource.Quantity - Memory resource.Quantity - GPU resource.Quantity - EphemeralStorage resource.Quantity - PodCount resource.Quantity + InstanceType string + VCPU resource.Quantity + Memory resource.Quantity + GPU resource.Quantity + EphemeralStorage resource.Quantity + PodCount resource.Quantity + ExtendedResources apiv1.ResourceList } type nodeTemplate struct { @@ -689,11 +698,9 @@ func generateInstanceStatus(machine *v1alpha1.Machine) *cloudprovider.InstanceSt func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error { var allErrs []error - capacityAttributes := []v1.ResourceName{"cpu", "gpu", "memory"} - - for _, attribute := range capacityAttributes { + for _, attribute := range coreResourceNames { if _, ok := nodeTemplateAttributes.Capacity[attribute]; !ok { - errMessage := fmt.Errorf("CPU, GPU and memory fields are mandatory") + errMessage := fmt.Errorf("the core resource fields %q are mandatory: %w", coreResourceNames, ErrInvalidNodeTemplate) klog.Warning(errMessage) allErrs = append(allErrs, errMessage) break @@ -701,13 +708,14 @@ func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error { } if nodeTemplateAttributes.Region == "" || nodeTemplateAttributes.InstanceType == "" || nodeTemplateAttributes.Zone == "" { - errMessage := fmt.Errorf("InstanceType, Region and Zone attributes are mandatory") + errMessage := fmt.Errorf("InstanceType, Region and Zone attributes are mandatory: %w", ErrInvalidNodeTemplate) klog.Warning(errMessage) allErrs = append(allErrs, errMessage) } if allErrs != nil { - return fmt.Errorf("%s", allErrs) + return errors.Join(allErrs...) + //return fmt.Errorf("%s", allErrs) } return nil @@ -771,21 +779,26 @@ func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *Machine klog.V(1).Infof("Nodes already existing in the worker pool %s", workerPool) baseNode := filteredNodes[0] klog.V(1).Infof("Worker pool node used to form template is %s and its capacity is cpu: %s, memory:%s", baseNode.Name, baseNode.Status.Capacity.Cpu().String(), baseNode.Status.Capacity.Memory().String()) + extendedResources := filterExtendedResources(baseNode.Status.Capacity) instance = instanceType{ - VCPU: baseNode.Status.Capacity[apiv1.ResourceCPU], - Memory: baseNode.Status.Capacity[apiv1.ResourceMemory], - GPU: baseNode.Status.Capacity[gpu.ResourceNvidiaGPU], - EphemeralStorage: baseNode.Status.Capacity[apiv1.ResourceEphemeralStorage], - PodCount: baseNode.Status.Capacity[apiv1.ResourcePods], + VCPU: baseNode.Status.Capacity[apiv1.ResourceCPU], + Memory: baseNode.Status.Capacity[apiv1.ResourceMemory], + GPU: baseNode.Status.Capacity[gpu.ResourceNvidiaGPU], + EphemeralStorage: baseNode.Status.Capacity[apiv1.ResourceEphemeralStorage], + PodCount: baseNode.Status.Capacity[apiv1.ResourcePods], + ExtendedResources: extendedResources, } } else { klog.V(1).Infof("Generating node template only using nodeTemplate from MachineClass %s: template resources-> cpu: %s,memory: %s", machineClass.Name, nodeTemplateAttributes.Capacity.Cpu().String(), nodeTemplateAttributes.Capacity.Memory().String()) + extendedResources := filterExtendedResources(nodeTemplateAttributes.Capacity) instance = instanceType{ - VCPU: nodeTemplateAttributes.Capacity[apiv1.ResourceCPU], - Memory: nodeTemplateAttributes.Capacity[apiv1.ResourceMemory], - GPU: nodeTemplateAttributes.Capacity["gpu"], + VCPU: nodeTemplateAttributes.Capacity[apiv1.ResourceCPU], + Memory: nodeTemplateAttributes.Capacity[apiv1.ResourceMemory], + GPU: nodeTemplateAttributes.Capacity["gpu"], + EphemeralStorage: nodeTemplateAttributes.Capacity[apiv1.ResourceEphemeralStorage], // Numbers pods per node will depends on the CNI used and the maxPods kubelet config, default is often 110 - PodCount: resource.MustParse("110"), + PodCount: resource.MustParse("110"), + ExtendedResources: extendedResources, } } instance.InstanceType = nodeTemplateAttributes.InstanceType @@ -964,6 +977,12 @@ func (m *McmManager) buildNodeFromTemplate(name string, template *nodeTemplate) node.Status.Capacity["hugepages-1Gi"] = *resource.NewQuantity(0, resource.DecimalSI) node.Status.Capacity["hugepages-2Mi"] = *resource.NewQuantity(0, resource.DecimalSI) + // populate extended resources from nodeTemplate + if len(template.InstanceType.ExtendedResources) > 0 { + klog.V(3).Infof("Copying extended resources %v to template node.Status.Capacity", template.InstanceType.ExtendedResources) + maps.Copy(node.Status.Capacity, template.InstanceType.ExtendedResources) + } + node.Status.Allocatable = node.Status.Capacity // NodeLabels @@ -1011,3 +1030,16 @@ func isMachineFailedOrTerminating(machine *v1alpha1.Machine) bool { } return false } + +func filterExtendedResources(allResources v1.ResourceList) (extendedResources v1.ResourceList) { + extendedResources = make(v1.ResourceList) + for _, n := range knownResourceNames { + r, ok := allResources[n] + if ok { + // don't add known resources to extendedResources + continue + } + extendedResources[n] = r + } + return +} diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go index 90baac278205..8304ec08e28e 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go @@ -17,6 +17,9 @@ limitations under the License. package mcm import ( + "errors" + "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + "k8s.io/utils/ptr" "testing" "github.com/stretchr/testify/assert" @@ -134,3 +137,94 @@ func TestFilterNodes(t *testing.T) { assert.EqualValues(t, len(filteredNodes), 1) assert.Equal(t, filteredNodes, []*apiv1.Node{node2}) } + +func TestValidateNodeTemplate(t *testing.T) { + m5LargeType := createSampleInstanceType("m5.large", "sap.com/mana", resource.MustParse("300")) + nt := v1alpha1.NodeTemplate{ + InstanceType: m5LargeType.InstanceType, + Capacity: make(apiv1.ResourceList), + } + err := validateNodeTemplate(&nt) + assert.NotNil(t, err) + assert.True(t, errors.Is(err, ErrInvalidNodeTemplate)) + + nt.Region = "europe-west1" + nt.Zone = nt.Region + "-b" + + err = validateNodeTemplate(&nt) + assert.NotNil(t, err) + assert.True(t, errors.Is(err, ErrInvalidNodeTemplate)) + + if err != nil { + t.Logf("error %s", err) + } +} + +func TestBuildNodeFromTemplate(t *testing.T) { + m := &McmManager{} + namePrefix := "bingo" + m5LargeType := createSampleInstanceType("m5.large", "sap.com/mana", resource.MustParse("300")) + labels := map[string]string{ + "weapon": "light-saber", + } + nt := nodeTemplate{ + InstanceType: m5LargeType, + Architecture: ptr.To("amd64"), + Labels: labels, + } + nt.Region = "europe-west1" + nt.Zone = nt.Region + "-b" + node, err := m.buildNodeFromTemplate(namePrefix, &nt) + assert.Nil(t, err) + if err != nil { + t.Logf("error %s", err) + } + assert.True(t, isSubset(labels, node.Labels), "labels should be a subset of node.Labels") + for _, k := range coreResourceNames { + assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k) + } + + // test with gpu resource + gpuQuantity := resource.MustParse("4") + nt.InstanceType.GPU = gpuQuantity + node, err = m.buildNodeFromTemplate(namePrefix, &nt) + assert.Nil(t, err) + if err != nil { + t.Logf("error %s", err) + } + for _, k := range coreResourceNames { + assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k) + } + var hasGpuResource bool + for _, k := range gpuResourceNames { + q, ok := node.Status.Capacity[k] + if ok { + hasGpuResource = true + assert.Equal(t, gpuQuantity, q, "node.Status.Capacity has gpu resource named %q with value %s instead of %s", k, q, gpuQuantity) + } + } + assert.True(t, hasGpuResource, "node.Status.Capacity should have a gpu resource with one of the names %q", gpuResourceNames) +} + +func createSampleInstanceType(instanceTypeName string, customResourceName apiv1.ResourceName, customResourceQuantity resource.Quantity) *instanceType { + awsM5Large := AWSInstanceTypes[instanceTypeName] + extendedResources := make(apiv1.ResourceList) + extendedResources[customResourceName] = customResourceQuantity + iType := &instanceType{ + InstanceType: awsM5Large.InstanceType, + VCPU: awsM5Large.VCPU, + Memory: awsM5Large.Memory, + GPU: awsM5Large.GPU, + ExtendedResources: extendedResources, + } + return iType +} + +func isSubset[K comparable, V comparable](map1, map2 map[K]V) bool { + for k, v := range map1 { + if val, ok := map2[k]; !ok || val != v { + return false + } + } + return true +} From f42789a6e2528a434f822c340a553b9b4f0ec05a Mon Sep 17 00:00:00 2001 From: elankath Date: Tue, 12 Nov 2024 12:06:58 +0530 Subject: [PATCH 2/3] corrected resource issues --- .../cloudprovider/mcm/mcm_manager.go | 23 ++++++++----------- .../cloudprovider/mcm/mcm_manager_test.go | 23 +++++++++++++++++-- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go index b2bdb53e3ca1..0b7d59d31641 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go @@ -116,9 +116,9 @@ var ( // ErrInvalidNodeTemplate is a sentinel error that indicates that the nodeTemplate is invalid. ErrInvalidNodeTemplate = errors.New("invalid node template") - coreResourceNames = []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory} - gpuResourceNames = []v1.ResourceName{"gpu", gpu.ResourceNvidiaGPU} - knownResourceNames = slices.Concat(coreResourceNames, gpuResourceNames, []v1.ResourceName{v1.ResourcePods, v1.ResourceEphemeralStorage}) + coreResourceNames = []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, "gpu"} + extraResourceNames = []v1.ResourceName{gpu.ResourceNvidiaGPU, v1.ResourcePods, v1.ResourceEphemeralStorage} + knownResourceNames = slices.Concat(coreResourceNames, extraResourceNames) ) // McmManager manages the client communication for MachineDeployments. @@ -715,7 +715,6 @@ func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error { if allErrs != nil { return errors.Join(allErrs...) - //return fmt.Errorf("%s", allErrs) } return nil @@ -979,7 +978,7 @@ func (m *McmManager) buildNodeFromTemplate(name string, template *nodeTemplate) // populate extended resources from nodeTemplate if len(template.InstanceType.ExtendedResources) > 0 { - klog.V(3).Infof("Copying extended resources %v to template node.Status.Capacity", template.InstanceType.ExtendedResources) + klog.V(2).Infof("Copying extended resources %v to template node.Status.Capacity", template.InstanceType.ExtendedResources) maps.Copy(node.Status.Capacity, template.InstanceType.ExtendedResources) } @@ -1031,15 +1030,11 @@ func isMachineFailedOrTerminating(machine *v1alpha1.Machine) bool { return false } +// filterExtendedResources removes knownResourceNames from allResources and retains only the extendedResources. func filterExtendedResources(allResources v1.ResourceList) (extendedResources v1.ResourceList) { - extendedResources = make(v1.ResourceList) - for _, n := range knownResourceNames { - r, ok := allResources[n] - if ok { - // don't add known resources to extendedResources - continue - } - extendedResources[n] = r - } + extendedResources = allResources.DeepCopy() + maps.DeleteFunc(extendedResources, func(name v1.ResourceName, _ resource.Quantity) bool { + return slices.Contains(knownResourceNames, name) + }) return } diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go index 8304ec08e28e..c0abfbcb3a59 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go @@ -20,6 +20,8 @@ import ( "errors" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" "k8s.io/utils/ptr" + "maps" + "math/rand/v2" "testing" "github.com/stretchr/testify/assert" @@ -196,14 +198,31 @@ func TestBuildNodeFromTemplate(t *testing.T) { assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k) } var hasGpuResource bool - for _, k := range gpuResourceNames { + for _, k := range extraResourceNames { q, ok := node.Status.Capacity[k] if ok { hasGpuResource = true assert.Equal(t, gpuQuantity, q, "node.Status.Capacity has gpu resource named %q with value %s instead of %s", k, q, gpuQuantity) } } - assert.True(t, hasGpuResource, "node.Status.Capacity should have a gpu resource with one of the names %q", gpuResourceNames) + assert.True(t, hasGpuResource, "node.Status.Capacity should have a gpu resource with one of the names %q", extraResourceNames) +} + +func TestFilterExtendedResources(t *testing.T) { + resources := make(apiv1.ResourceList) + for _, n := range knownResourceNames { + resources[n] = *resource.NewQuantity(rand.Int64(), resource.DecimalSI) + } + customResources := make(apiv1.ResourceList) + customResources["resource.com/dongle"] = resource.MustParse("50") + customResources["quantum.com/memory"] = resource.MustParse("100Gi") + + allResources := resources.DeepCopy() + maps.Copy(allResources, customResources) + + extendedResources := filterExtendedResources(allResources) + t.Logf("TestFilterExtendedResources obtained: %+v", extendedResources) + assert.Equal(t, customResources, extendedResources) } func createSampleInstanceType(instanceTypeName string, customResourceName apiv1.ResourceName, customResourceQuantity resource.Quantity) *instanceType { From f6e287533b970165f077ec5eb9f23c7be5c34221 Mon Sep 17 00:00:00 2001 From: elankath Date: Tue, 12 Nov 2024 12:48:14 +0530 Subject: [PATCH 3/3] adjusted unit test TestBuildNodeFromTemplate for changes --- .../cloudprovider/mcm/mcm_manager_test.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go index c0abfbcb3a59..eff9df3d7ce7 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go @@ -19,6 +19,7 @@ package mcm import ( "errors" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" "k8s.io/utils/ptr" "maps" "math/rand/v2" @@ -182,7 +183,7 @@ func TestBuildNodeFromTemplate(t *testing.T) { t.Logf("error %s", err) } assert.True(t, isSubset(labels, node.Labels), "labels should be a subset of node.Labels") - for _, k := range coreResourceNames { + for _, k := range []apiv1.ResourceName{apiv1.ResourceMemory, apiv1.ResourceCPU} { assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k) } @@ -194,18 +195,14 @@ func TestBuildNodeFromTemplate(t *testing.T) { if err != nil { t.Logf("error %s", err) } - for _, k := range coreResourceNames { + for _, k := range []apiv1.ResourceName{apiv1.ResourceMemory, apiv1.ResourceCPU, gpu.ResourceNvidiaGPU} { assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k) } - var hasGpuResource bool - for _, k := range extraResourceNames { - q, ok := node.Status.Capacity[k] - if ok { - hasGpuResource = true - assert.Equal(t, gpuQuantity, q, "node.Status.Capacity has gpu resource named %q with value %s instead of %s", k, q, gpuQuantity) - } + actualGpuQuantity, hasGpuResource := node.Status.Capacity[gpu.ResourceNvidiaGPU] + assert.True(t, hasGpuResource, "node.Status.Capacity should have a gpu resource named %q", gpu.ResourceNvidiaGPU) + if hasGpuResource { + assert.Equal(t, gpuQuantity, actualGpuQuantity, "node.Status.Capacity should have gpu resource named %q with value %s instead of %s", gpu.ResourceDirectX, gpuQuantity, actualGpuQuantity) } - assert.True(t, hasGpuResource, "node.Status.Capacity should have a gpu resource with one of the names %q", extraResourceNames) } func TestFilterExtendedResources(t *testing.T) {