From fb59c0d262e7d010014304a4fd0c17fac254c219 Mon Sep 17 00:00:00 2001 From: Himanshu Sharma Date: Fri, 15 Sep 2023 00:43:03 +0530 Subject: [PATCH] added basic test case for Nodes() method --- .../cloudprovider/mcm/mcm_cloud_provider.go | 2 +- .../mcm/mcm_cloud_provider_test.go | 134 +++++++++++++++++- .../cloudprovider/mcm/mcm_manager.go | 27 ++-- .../cloudprovider/mcm/test_utils.go | 21 ++- 4 files changed, 154 insertions(+), 30 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go index 87aebb09a6e1..a1569d21635a 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go @@ -371,7 +371,7 @@ func (machinedeployment *MachineDeployment) Belongs(node *apiv1.Node) (bool, err } // DeleteNodes deletes the nodes from the group. It is expected that this method will not be called -// for nodes not part of ANY machine deployment. +// for nodes which are not part of ANY machine deployment. func (machinedeployment *MachineDeployment) DeleteNodes(nodes []*apiv1.Node) error { size, err := machinedeployment.mcmManager.GetMachineDeploymentSize(machinedeployment) if err != nil { diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go index 57431c64aee7..959a9b8b4918 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider_test.go @@ -20,14 +20,19 @@ import ( "context" "errors" "fmt" + "math" + "strings" + "testing" + + machinecodes "github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + customfake "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/mcm/fakeclient" + "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - customfake "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/mcm/fakeclient" - "math" - "testing" ) const ( @@ -375,7 +380,7 @@ func TestRefresh(t *testing.T) { nodeGroups: []string{nodeGroup2}, }, expect{ - machines: []*v1alpha1.Machine{newMachine("machine-1", "fakeID", nil, "machinedeployment-1", "machineset-1", "1", false)}, + machines: []*v1alpha1.Machine{newMachine("machine-1", "fakeID-1", nil, "machinedeployment-1", "machineset-1", "1", false, true)}, err: errors.Join(fmt.Errorf("could not reset priority annotation on machine machine-1, Error: %v", mcUpdateErrorMsg)), }, }, @@ -414,3 +419,124 @@ func TestRefresh(t *testing.T) { }) } } + +// (mobj, mobjPid, nodeobj) -> instance(nodeobj.pid,_) +// (mobj, mobjPid, _) -> instance("requested://",status{'creating'}) +// (mobj, _) -> instance("requested://",status{'creating'}) +// (mobj, _) with quota error -> instance("requested://",status{'creating',{'outofResourcesClass','ResourceExhausted','[ResourceExhausted] [the following errors: ]'}}) +// (mobj, _) with invalid credentials error -> instance("requested://",status{'creating'}) + +// { +// lastOperation: { +// operationType: Creating +// operationState: Failed +// operationError: ResourceExhausted +// description: "Cloud provider message - machine codes error: code = [Internal] message = [Create machine "shoot--ddci--cbc-sys-tests03-pool-c32m256-3b-z1-575b9-hlvj6" failed: The following errors occurred: [{QUOTA_EXCEEDED Quota 'N2_CPUS' exceeded. Limit: 6000.0 in region europe-west3. [] []}]]." +// } +// } +func TestNodes(t *testing.T) { + const ( + out_of_quota_machine_status_error_description = "Cloud provider message - machine codes error: code = [ResourceExhausted] message = [Create machine \"machine-with-vm-create-error-out-of-quota\" failed: The following errors occurred: [{QUOTA_EXCEEDED Quota 'N2_CPUS' exceeded. Limit: 6000.0 in region europe-west3. [] []}]]" + out_of_quota_instance_error_message = "Create machine \"machine-with-vm-create-error-out-of-quota\" failed: The following errors occurred: [{QUOTA_EXCEEDED Quota 'N2_CPUS' exceeded. Limit: 6000.0 in region europe-west3. [] []}]" + invalid_credentials_machine_status_error_description = "Cloud provider message - machine codes error: code = [Internal] message = [user is not authorized to perform this action]" + ) + type expectationPerInstance struct { + expectedProviderID string + expectedInstanceState cloudprovider.InstanceState + expectedInstanceErrorClass cloudprovider.InstanceErrorClass + expectedInstanceErrorCode string + expectedInstanceErrorMessage string + } + type expect struct { + expectationPerInstanceList []expectationPerInstance + } + type data struct { + name string + setup setup + expect expect + } + table := []data{ + { + "Correct instances should be returned for machine objects under the machinedeployment", + setup{ + nodes: []*corev1.Node{newNode("node-1", "fakeID-1", false)}, + machines: func() []*v1alpha1.Machine { + allMachines := make([]*v1alpha1.Machine, 0, 5) + allMachines = append(allMachines, newMachine("machine-with-registered-node", "fakeID-1", nil, "machinedeployment-1", "", "", false, true)) + allMachines = append(allMachines, newMachine("machine-with-vm-but-no-node", "fakeID-2", nil, "machinedeployment-1", "", "", false,false)) + allMachines = append(allMachines, newMachine("machine-with-vm-creating", "", nil, "machinedeployment-1", "", "", false,false)) + allMachines = append(allMachines, newMachine("machine-with-vm-create-error-out-of-quota", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.ResourceExhausted.String(), Description: out_of_quota_machine_status_error_description}}, "machinedeployment-1", "", "", false, false)) + allMachines = append(allMachines, newMachine("machine-with-vm-create-error-invalid-credentials", "", &v1alpha1.MachineStatus{LastOperation: v1alpha1.LastOperation{Type: v1alpha1.MachineOperationCreate, State: v1alpha1.MachineStateFailed, ErrorCode: machinecodes.Internal.String(), Description: invalid_credentials_machine_status_error_description}}, "machinedeployment-1", "", "", false,false)) + return allMachines + }(), + machineDeployments: newMachineDeployments(1, 2, nil, nil, nil), + nodeGroups: []string{nodeGroup1}, + }, + expect{ + expectationPerInstanceList: []expectationPerInstance{ + {"fakeID-1", cloudprovider.InstanceState(-1), cloudprovider.InstanceErrorClass(-1), "", ""}, + {placeholderInstanceIDForMachineObj("machine-with-vm-but-no-node"), cloudprovider.InstanceCreating, cloudprovider.InstanceErrorClass(-1), "", ""}, + {placeholderInstanceIDForMachineObj("machine-with-vm-creating"), cloudprovider.InstanceCreating, cloudprovider.InstanceErrorClass(-1), "", ""}, + {placeholderInstanceIDForMachineObj("machine-with-vm-create-error-out-of-quota"), cloudprovider.InstanceCreating, cloudprovider.OutOfResourcesErrorClass, machinecodes.ResourceExhausted.String(), out_of_quota_instance_error_message}, + // invalid credentials error is mapped to Internal code as it can't be fixed by trying another zone + {placeholderInstanceIDForMachineObj("machine-with-vm-create-error-invalid-credentials"), cloudprovider.InstanceCreating, cloudprovider.InstanceErrorClass(-1), "", ""}, + }, + }, + }, + } + + for _, entry := range table { + entry := entry // have a shallow copy of the entry for parallelization of tests + t.Run(entry.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + stop := make(chan struct{}) + defer close(stop) + controlMachineObjects, targetCoreObjects := setupEnv(&entry.setup) + m, trackers, hasSyncedCacheFns := createMcmManager(t, stop, testNamespace, nil, controlMachineObjects, targetCoreObjects) + defer trackers.Stop() + waitForCacheSync(t, stop, hasSyncedCacheFns) + + if entry.setup.targetCoreFakeResourceActions != nil { + trackers.TargetCore.SetFailAtFakeResourceActions(entry.setup.targetCoreFakeResourceActions) + } + if entry.setup.controlMachineFakeResourceActions != nil { + trackers.ControlMachine.SetFailAtFakeResourceActions(entry.setup.controlMachineFakeResourceActions) + } + + md, err := buildMachineDeploymentFromSpec(entry.setup.nodeGroups[0], m) + g.Expect(err).To(BeNil()) + + returnedInstances, err := md.Nodes() + g.Expect(err).To(BeNil()) + g.Expect(len(returnedInstances)).To(BeNumerically("==", len(entry.expect.expectationPerInstanceList))) + + for _, expectedInstance := range entry.expect.expectationPerInstanceList { + found := false + for _, gotInstance := range returnedInstances { + g.Expect(gotInstance.Id).ToNot(BeEmpty()) + if expectedInstance.expectedProviderID == gotInstance.Id { + if !strings.Contains(gotInstance.Id, "requested://") { + // must be a machine obj whose node is registered (ready or notReady) + g.Expect(gotInstance.Status).To(BeNil()) + } else { + if int(expectedInstance.expectedInstanceState) != -1 { + g.Expect(gotInstance.Status).ToNot(BeNil()) + g.Expect(gotInstance.Status.State).To(Equal(expectedInstance.expectedInstanceState)) + } + if int(expectedInstance.expectedInstanceErrorClass) != -1 || expectedInstance.expectedInstanceErrorCode != "" || expectedInstance.expectedInstanceErrorMessage != "" { + g.Expect(gotInstance.Status.ErrorInfo).ToNot(BeNil()) + g.Expect(gotInstance.Status.ErrorInfo.ErrorClass).To(Equal(expectedInstance.expectedInstanceErrorClass)) + g.Expect(gotInstance.Status.ErrorInfo.ErrorCode).To(Equal(expectedInstance.expectedInstanceErrorCode)) + g.Expect(gotInstance.Status.ErrorInfo.ErrorMessage).To(Equal(expectedInstance.expectedInstanceErrorMessage)) + } + } + found = true + break + } + } + g.Expect(found).To(BeTrue()) + } + }) + } +} diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go index 09062ac359fa..62efd9cd8281 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go @@ -623,19 +623,6 @@ func (m *McmManager) GetMachineDeploymentInstances(machinedeployment *MachineDep return nil, fmt.Errorf("unable to fetch list of Nodes %v", err) } - // (mobj, mobjPid, nodeobj) -> instance(nodeobj.pid,_) - // (mobj, mobjPid, _) -> instance("requested://",status{'creating'}) - // (mobj, _) -> instance("requested://",status{'creating',{}}) - // (mobj, _) with quota error -> instance("requested://",status{'creating',{'outofResourcesClass','ResourceExhausted','[ResourceExhausted] [the following errors: ]'}}) - - // { - // lastOperation: { - // operationType: Creating - // operationState: Failed - // operationError: ResourceExhausted - // description: "Cloud provider message - machine codes error: code = [Internal] message = [Create machine "shoot--ddci--cbc-sys-tests03-pool-c32m256-3b-z1-575b9-hlvj6" failed: The following errors occurred: [{QUOTA_EXCEEDED Quota 'N2_CPUS' exceeded. Limit: 6000.0 in region europe-west3. [] []}]]." - // } - // } var instances []cloudprovider.Instance // Bearing O(n2) complexity, assuming we will not have lot of nodes/machines, open for optimisations. for _, machine := range machineList { @@ -643,19 +630,17 @@ func (m *McmManager) GetMachineDeploymentInstances(machinedeployment *MachineDep var found bool for _, node := range nodeList { if machine.Labels["node"] == node.Name { - // ensures cloudprovider.Instance is formed only for VM registered as a node + // to ensure machine obj with VM but without node obj are marked as `Creating` cloudprovider.Instance + // NOTE: machine obj with a `notReady` node is not marked `Creating` instance.Id = node.Spec.ProviderID - instance.Status = &cloudprovider.InstanceStatus{ - State: cloudprovider.InstanceRunning, - } found = true break } } if !found { // No k8s node found - either the VM has not registered yet or MCM is unable to fulfill the request. - // Report a special ID so that the autoscaler can track it as an unregistered node. - instance.Id = fmt.Sprintf("requested://%s", machine.Name) + // Report a special placeholder ID so that the autoscaler can track it as an unregistered node. + instance.Id = placeholderInstanceIDForMachineObj(machine.Name) instance.Status = &cloudprovider.InstanceStatus{ State: cloudprovider.InstanceCreating, ErrorInfo: getErrorInfo(machine), @@ -666,6 +651,10 @@ func (m *McmManager) GetMachineDeploymentInstances(machinedeployment *MachineDep return instances, nil } +func placeholderInstanceIDForMachineObj(name string) string{ + return fmt.Sprintf("requested://%s",name) +} + // getErrorInfo returns cloudprovider.InstanceErrorInfo for the machine obj func getErrorInfo(machine *v1alpha1.Machine) *cloudprovider.InstanceErrorInfo { if machine.Status.LastOperation.Type == v1alpha1.MachineOperationCreate && machine.Status.LastOperation.State == v1alpha1.MachineStateFailed && machine.Status.LastOperation.ErrorCode == machinecodes.ResourceExhausted.String() { diff --git a/cluster-autoscaler/cloudprovider/mcm/test_utils.go b/cluster-autoscaler/cloudprovider/mcm/test_utils.go index 05f300d0fc28..9e51dc8ef8c6 100644 --- a/cluster-autoscaler/cloudprovider/mcm/test_utils.go +++ b/cluster-autoscaler/cloudprovider/mcm/test_utils.go @@ -18,11 +18,12 @@ package mcm import ( "fmt" - "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" - "k8s.io/client-go/tools/cache" "testing" "time" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/client-go/tools/cache" + machineinternal "github.com/gardener/machine-controller-manager/pkg/apis/machine" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" faketyped "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1/fake" @@ -102,14 +103,19 @@ func newMachineSets( func newMachine( name string, - providerIdGenerateName string, + providerId string, statusTemplate *v1alpha1.MachineStatus, mdName, msName string, priorityAnnotationValue string, - setDeletionTimeStamp bool, + setDeletionTimeStamp, + setNodeLabel bool, ) *v1alpha1.Machine { - m := newMachines(1, providerIdGenerateName, statusTemplate, mdName, msName, []string{priorityAnnotationValue}, []bool{setDeletionTimeStamp})[0] + m := newMachines(1, providerId, statusTemplate, mdName, msName, []string{priorityAnnotationValue}, []bool{setDeletionTimeStamp})[0] m.Name = name + m.Spec.ProviderID = providerId + if !setNodeLabel { + delete(m.Labels, "node") + } return m } @@ -141,7 +147,10 @@ func newMachines( Annotations: map[string]string{priorityAnnotationKey: priorityAnnotationValues[i]}, CreationTimestamp: metav1.Now(), }, - Spec: v1alpha1.MachineSpec{ProviderID: fmt.Sprintf("%s/i%d", providerIdGenerateName, i+1)}, + } + + if providerIdGenerateName != "" { + m.Spec = v1alpha1.MachineSpec{ProviderID: fmt.Sprintf("%s/i%d", providerIdGenerateName, i+1)} } m.Labels["node"] = fmt.Sprintf("node-%d", i+1)