From 77f919d78813a43c274356a5e4df9df635728a20 Mon Sep 17 00:00:00 2001 From: Frame Date: Mon, 9 Oct 2023 16:46:07 +0800 Subject: [PATCH] ci: add more slo controllers e2e test (#1688) Signed-off-by: saintube --- .github/workflows/e2e-k8s-1.22.yaml | 4 +- .github/workflows/e2e-k8s-1.24.yaml | 2 +- .github/workflows/e2e-k8s-latest.yaml | 2 +- config/manager/manager.yaml | 2 +- config/webhook/manifests.yaml | 2 +- hack/deploy_kind.sh | 17 +- pkg/webhook/node/validating/webhooks.go | 2 +- test/e2e/framework/test_context.go | 39 +- test/e2e/scheduling/nodenumaresource.go | 39 +- test/e2e/scheduling/preemption.go | 38 +- test/e2e/slocontroller/batchresource.go | 96 +++-- test/e2e/slocontroller/cpunormalization.go | 346 ++++++++++++++++++ .../slocontroller/be-demo.yaml | 25 ++ 13 files changed, 524 insertions(+), 90 deletions(-) create mode 100644 test/e2e/slocontroller/cpunormalization.go create mode 100644 test/e2e/testing-manifests/slocontroller/be-demo.yaml diff --git a/.github/workflows/e2e-k8s-1.22.yaml b/.github/workflows/e2e-k8s-1.22.yaml index 08120b52e..ea25d4e41 100644 --- a/.github/workflows/e2e-k8s-1.22.yaml +++ b/.github/workflows/e2e-k8s-1.22.yaml @@ -55,7 +55,7 @@ jobs: run: | set -ex kubectl cluster-info - IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} ./hack/deploy_kind.sh + IMG=koordinator-sh/koord-manager:e2e-${GITHUB_RUN_ID} KUBERNETES_VERSION="1.22" ./hack/deploy_kind.sh NODES=$(kubectl get node | wc -l) for ((i=1;i<10;i++)); do @@ -88,7 +88,7 @@ jobs: export KUBECONFIG=/home/runner/.kube/config make ginkgo set +e - EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s" + EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s -e2e-verify-service-account=false" ./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS} retVal=$? restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}') diff --git a/.github/workflows/e2e-k8s-1.24.yaml b/.github/workflows/e2e-k8s-1.24.yaml index 717ec7f84..f0dd2de51 100644 --- a/.github/workflows/e2e-k8s-1.24.yaml +++ b/.github/workflows/e2e-k8s-1.24.yaml @@ -88,7 +88,7 @@ jobs: export KUBECONFIG=/home/runner/.kube/config make ginkgo set +e - EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s" + EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s -e2e-verify-service-account=false" ./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS} retVal=$? restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}') diff --git a/.github/workflows/e2e-k8s-latest.yaml b/.github/workflows/e2e-k8s-latest.yaml index c4c3c79bd..2f3cf6687 100644 --- a/.github/workflows/e2e-k8s-latest.yaml +++ b/.github/workflows/e2e-k8s-latest.yaml @@ -86,7 +86,7 @@ jobs: export KUBECONFIG=/home/runner/.kube/config make ginkgo set +e - EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s" + EXTRA_ARGS="-koordinator-component-namespace=${COMPONENT_NS} -allowed-not-ready-nodes=1 -system-pods-startup-timeout=10s -e2e-verify-service-account=false" ./bin/ginkgo -timeout 60m -v --focus='slo-controller' test/e2e -- ${EXTRA_ARGS} retVal=$? restartCount=$(kubectl get pod -n ${COMPONENT_NS} -l koord-app=koord-manager --no-headers | head -n 1 | awk '{print $4}') diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 857425d55..f4e875024 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -48,7 +48,7 @@ spec: - --leader-election-namespace=koordinator-system - --config-namespace=koordinator-system - --v=4 - - --feature-gates=AllAlpha=false,AllBeta=false + - --feature-gates= - --sync-period=0 command: - /koord-manager diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index 57faf4d28..f7cbf6da9 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -123,7 +123,7 @@ webhooks: name: webhook-service namespace: system path: /validate-node - failurePolicy: Fail + failurePolicy: Ignore name: vnode.koordinator.sh rules: - apiGroups: diff --git a/hack/deploy_kind.sh b/hack/deploy_kind.sh index c1606616d..122cb7ee7 100755 --- a/hack/deploy_kind.sh +++ b/hack/deploy_kind.sh @@ -21,11 +21,24 @@ if [ -z "$IMG" ]; then exit 1 fi +K8S_VERSION="" +if [ -z "$KUBERNETES_VERSION" ]; then + K8S_VERSION="latest" +else + K8S_VERSION=$KUBERNETES_VERSION +fi + set -e make kustomize KUSTOMIZE=$(pwd)/bin/kustomize (cd config/manager && "${KUSTOMIZE}" edit set image manager="${IMG}") -"${KUSTOMIZE}" build config/default | sed -e 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' > /tmp/koordinator-kustomization.yaml -echo -e "resources:\n- manager.yaml" > config/manager/kustomization.yaml + +if [[ "$K8S_VERSION" == "1.22" ]]; then + sed "s/feature-gates=/feature-gates=CompatibleCSIStorageCapacity=true/g" $(pwd)/config/manager/scheduler.yaml > /tmp/scheduler.yaml && mv /tmp/scheduler.yaml $(pwd)/config/manager/scheduler.yaml + $(pwd)/hack/kustomize.sh "${KUSTOMIZE}" | sed -e 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' > /tmp/koordinator-kustomization.yaml +else + $(pwd)/hack/kustomize.sh "${KUSTOMIZE}" | sed -e 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' > /tmp/koordinator-kustomization.yaml +fi + kubectl apply -f /tmp/koordinator-kustomization.yaml diff --git a/pkg/webhook/node/validating/webhooks.go b/pkg/webhook/node/validating/webhooks.go index 5b6cd5b77..529f3a7cc 100644 --- a/pkg/webhook/node/validating/webhooks.go +++ b/pkg/webhook/node/validating/webhooks.go @@ -20,7 +20,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) -// +kubebuilder:webhook:path=/validate-node,mutating=false,failurePolicy=fail,sideEffects=None,groups="",resources=nodes,verbs=create;update,versions=v1,name=vnode.koordinator.sh,admissionReviewVersions=v1;v1beta1 +// +kubebuilder:webhook:path=/validate-node,mutating=false,failurePolicy=ignore,sideEffects=None,groups="",resources=nodes,verbs=create;update,versions=v1,name=vnode.koordinator.sh,admissionReviewVersions=v1;v1beta1 var ( // HandlerMap contains admission webhook handlers diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 4d5cce4cb..58c180cd1 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -53,24 +53,24 @@ const ( // into the code which uses the settings. // // The recommendation for those settings is: -// - They are stored in their own context structure or local -// variables. -// - The standard `flag` package is used to register them. -// The flag name should follow the pattern ..... -// where the prefix is unlikely to conflict with other tests or -// standard packages and each part is in lower camel case. For -// example, test/e2e/storage/csi/context.go could define -// storage.csi.numIterations. -// - framework/config can be used to simplify the registration of -// multiple options with a single function call: -// var storageCSI { -// NumIterations `default:"1" usage:"number of iterations"` -// } -// _ config.AddOptions(&storageCSI, "storage.csi") -// - The direct use Viper in tests is possible, but discouraged because -// it only works in test suites which use Viper (which is not -// required) and the supported options cannot be -// discovered by a test suite user. +// - They are stored in their own context structure or local +// variables. +// - The standard `flag` package is used to register them. +// The flag name should follow the pattern ..... +// where the prefix is unlikely to conflict with other tests or +// standard packages and each part is in lower camel case. For +// example, test/e2e/storage/csi/context.go could define +// storage.csi.numIterations. +// - framework/config can be used to simplify the registration of +// multiple options with a single function call: +// var storageCSI { +// NumIterations `default:"1" usage:"number of iterations"` +// } +// _ config.AddOptions(&storageCSI, "storage.csi") +// - The direct use Viper in tests is possible, but discouraged because +// it only works in test suites which use Viper (which is not +// required) and the supported options cannot be +// discovered by a test suite user. // // Test suite authors can use framework/viper to make all command line // parameters also configurable via a configuration file. @@ -193,6 +193,8 @@ type TestContextType struct { KoordinatorComponentNamespace string // SLOCtrlConfigMap is the name of the slo-controller configmap. SLOCtrlConfigMap string + // KoordSchedulerName is the SchedulerName of the koord-scheduler. + KoordSchedulerName string } // NodeKillerConfig describes configuration of NodeKiller -- a utility to @@ -335,6 +337,7 @@ func RegisterCommonFlags(flags *flag.FlagSet) { // koordinator configs flags.StringVar(&TestContext.KoordinatorComponentNamespace, "koordinator-component-namespace", "koordinator-system", "The namespace of the koordinator components deployed to.") flags.StringVar(&TestContext.SLOCtrlConfigMap, "slo-config-name", "slo-controller-config", "The name of the slo-controller configmap.") + flags.StringVar(&TestContext.KoordSchedulerName, "koord-scheduler-name", "koord-scheduler", "The SchedulerName of the koord-scheduler.") } // RegisterClusterFlags registers flags specific to the cluster e2e test suite. diff --git a/test/e2e/scheduling/nodenumaresource.go b/test/e2e/scheduling/nodenumaresource.go index ebb3800bc..6f7a01cdf 100644 --- a/test/e2e/scheduling/nodenumaresource.go +++ b/test/e2e/scheduling/nodenumaresource.go @@ -46,9 +46,10 @@ import ( var _ = SIGDescribe("NodeNUMAResource", func() { f := framework.NewDefaultFramework("nodenumaresource") + var koordSchedulerName string ginkgo.BeforeEach(func() { - + koordSchedulerName = framework.TestContext.KoordSchedulerName }) framework.KoordinatorDescribe("NodeNUMAResource CPUBindPolicy", func() { @@ -164,7 +165,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { }, }, PriorityClassName: string(extension.PriorityProd), - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, pod), "unable schedule the lowest priority pod") expectPodBoundReservation(f.ClientSet, f.KoordinatorClientSet, pod.Namespace, pod.Name, reservation.Name) @@ -248,7 +249,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { }, NodeName: reservation.Status.NodeName, PriorityClassName: string(extension.PriorityProd), - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, pod), "unable schedule the lowest priority pod") expectPodBoundReservation(f.ClientSet, f.KoordinatorClientSet, pod.Namespace, pod.Name, reservation.Name) @@ -416,7 +417,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }, } @@ -454,7 +455,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) ginkgo.By("Wait for Pod schedule failed") @@ -502,7 +503,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }, } @@ -545,7 +546,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) ginkgo.By("Wait for Pod schedule failed") @@ -590,7 +591,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }, } @@ -633,7 +634,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) ginkgo.By("Wait for Pod schedule failed") @@ -674,7 +675,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) } @@ -689,7 +690,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) } @@ -711,7 +712,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) ginkgo.By("Wait for Pod schedule failed") @@ -757,7 +758,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }, } @@ -794,7 +795,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) nodes = sets.NewInt() @@ -836,7 +837,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) } @@ -851,7 +852,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) } @@ -873,7 +874,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) nodes := sets.NewInt() @@ -920,7 +921,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }, } @@ -957,7 +958,7 @@ var _ = SIGDescribe("NodeNUMAResource", func() { Limits: requests, Requests: requests, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, NodeName: node.Name, }) nodes = sets.NewInt() diff --git a/test/e2e/scheduling/preemption.go b/test/e2e/scheduling/preemption.go index 4fec8a6f0..e942d4f75 100644 --- a/test/e2e/scheduling/preemption.go +++ b/test/e2e/scheduling/preemption.go @@ -37,9 +37,11 @@ import ( var _ = SIGDescribe("Preemption", func() { f := framework.NewDefaultFramework("preemption") + var koordSchedulerName string ginkgo.BeforeEach(func() { framework.AllNodesReady(f.ClientSet, time.Minute) + koordSchedulerName = framework.TestContext.KoordSchedulerName }) ginkgo.AfterEach(func() { @@ -68,7 +70,7 @@ var _ = SIGDescribe("Preemption", func() { apiext.ResourceGPU: resource.MustParse("100"), }, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) ginkgo.By("Create low priority Pod requests all GPUs") @@ -86,7 +88,7 @@ var _ = SIGDescribe("Preemption", func() { }, }, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, lowPriorityPod), "unable schedule the lowest priority pod") @@ -102,7 +104,7 @@ var _ = SIGDescribe("Preemption", func() { }, }, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, PriorityClassName: "system-cluster-critical", }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, highPriorityPod), "unable preempt lowest priority pod") @@ -119,7 +121,7 @@ var _ = SIGDescribe("Preemption", func() { apiext.ResourceGPU: resource.MustParse("100"), }, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) framework.ExpectNoError(err, fmt.Sprintf("unable to get node %v", nodeName)) @@ -169,7 +171,7 @@ var _ = SIGDescribe("Preemption", func() { }, }, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, lowPriorityPod), "unable schedule the lowest priority pod") expectPodBoundReservation(f.ClientSet, f.KoordinatorClientSet, lowPriorityPod.Namespace, lowPriorityPod.Name, reservation.Name) @@ -186,7 +188,7 @@ var _ = SIGDescribe("Preemption", func() { }, }, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, PriorityClassName: "system-cluster-critical", }) framework.ExpectNoError(e2epod.WaitForPodCondition(f.ClientSet, highPriorityPod.Namespace, highPriorityPod.Name, "wait for pod schedule failed", 60*time.Second, func(pod *corev1.Pod) (bool, error) { @@ -210,7 +212,7 @@ var _ = SIGDescribe("Preemption", func() { apiext.ResourceGPU: resource.MustParse("100"), }, }, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) framework.ExpectNoError(err, fmt.Sprintf("unable to get node %v", nodeName)) @@ -260,7 +262,7 @@ var _ = SIGDescribe("Preemption", func() { }, }, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, lowPriorityPod), "unable schedule the lowest priority pod") expectPodBoundReservation(f.ClientSet, f.KoordinatorClientSet, lowPriorityPod.Namespace, lowPriorityPod.Name, reservation.Name) @@ -280,7 +282,7 @@ var _ = SIGDescribe("Preemption", func() { }, }, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, PriorityClassName: "system-cluster-critical", }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, highPriorityPod), "unable to preempt") @@ -342,7 +344,7 @@ var _ = SIGDescribe("Preemption", func() { nodeName := runPodAndGetNodeName(f, pausePodConfig{ Name: "without-label", Resources: resourceRequirements, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) ginkgo.By("Create low priority Pod requests all fakeResource") @@ -351,7 +353,7 @@ var _ = SIGDescribe("Preemption", func() { Name: "low-priority-pod", Resources: resourceRequirements, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, lowPriorityPod), "unable schedule the lowest priority pod") @@ -360,7 +362,7 @@ var _ = SIGDescribe("Preemption", func() { Name: "high-priority-pod", Resources: resourceRequirements, NodeName: nodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, PriorityClassName: "system-cluster-critical", }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, highPriorityPod), "unable preempt lowest priority pod") @@ -416,7 +418,7 @@ var _ = SIGDescribe("Preemption", func() { }, Resources: resourceRequirements, NodeName: testNodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, lowPriorityPod), "unable schedule the lowest priority pod") expectPodBoundReservation(f.ClientSet, f.KoordinatorClientSet, lowPriorityPod.Namespace, lowPriorityPod.Name, reservation.Name) @@ -426,7 +428,7 @@ var _ = SIGDescribe("Preemption", func() { Name: "high-priority-pod", Resources: resourceRequirements, NodeName: testNodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, PriorityClassName: "system-cluster-critical", }) framework.ExpectNoError(e2epod.WaitForPodCondition(f.ClientSet, highPriorityPod.Namespace, highPriorityPod.Name, "wait for pod schedule failed", 60*time.Second, func(pod *corev1.Pod) (bool, error) { @@ -489,7 +491,7 @@ var _ = SIGDescribe("Preemption", func() { }, Resources: resourceRequirements, NodeName: testNodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, lowPriorityPod), "unable schedule the lowest priority pod") expectPodBoundReservation(f.ClientSet, f.KoordinatorClientSet, lowPriorityPod.Namespace, lowPriorityPod.Name, reservation.Name) @@ -502,7 +504,7 @@ var _ = SIGDescribe("Preemption", func() { }, Resources: resourceRequirements, NodeName: testNodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, PriorityClassName: "system-cluster-critical", }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, highPriorityPod), "unable to preempt") @@ -560,7 +562,7 @@ var _ = SIGDescribe("Preemption", func() { }, Resources: resourceRequirements, NodeName: testNodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, lowPriorityPod), "unable schedule the lowest priority pod") expectPodBoundReservation(f.ClientSet, f.KoordinatorClientSet, lowPriorityPod.Namespace, lowPriorityPod.Name, reservation.Name) @@ -573,7 +575,7 @@ var _ = SIGDescribe("Preemption", func() { }, Resources: resourceRequirements, NodeName: testNodeName, - SchedulerName: "koord-scheduler", + SchedulerName: koordSchedulerName, PriorityClassName: "system-cluster-critical", }) framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(f.ClientSet, highPriorityPod), "unable to preempt") diff --git a/test/e2e/slocontroller/batchresource.go b/test/e2e/slocontroller/batchresource.go index 75d415636..09bb21857 100644 --- a/test/e2e/slocontroller/batchresource.go +++ b/test/e2e/slocontroller/batchresource.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" + k8spodutil "k8s.io/kubernetes/pkg/api/v1/pod" "github.com/koordinator-sh/koordinator/apis/configuration" apiext "github.com/koordinator-sh/koordinator/apis/extension" @@ -36,37 +37,37 @@ import ( e2enode "github.com/koordinator-sh/koordinator/test/e2e/framework/node" ) -var ( - colocationEnabledConfigData = `{ +const colocationEnabledConfigData = `{ "enable": true, - "cpuReclaimThresholdPercent": 60, - "memoryReclaimThresholdPercent": 65, + "cpuReclaimThresholdPercent": 80, + "memoryReclaimThresholdPercent": 80, "memoryCalculatePolicy": "usage" }` - cpuReclaimThresholdPercent = 60 - memoryReclaimThresholdPercent = 65 +var ( + cpuReclaimThresholdPercent = 80 + memoryReclaimThresholdPercent = 80 maxNodeBatchCPUDiffPercent = 10 maxNodeBatchMemoryDiffPercent = 5 - minNodesBatchResourceAllocatableRatio = 0.8 + minNodesBatchResourceAllocatableRatio = 0.7 ) var _ = SIGDescribe("BatchResource", func() { var nodeList *corev1.NodeList var c clientset.Interface var koordClient koordinatorclientset.Interface - var koordNamespace, sloConfigName string + var koordNamespace, sloConfigName, koordSchedulerName string var err error f := framework.NewDefaultFramework("batchresource") - f.SkipNamespaceCreation = true ginkgo.BeforeEach(func() { c = f.ClientSet koordClient = f.KoordinatorClientSet koordNamespace = framework.TestContext.KoordinatorComponentNamespace sloConfigName = framework.TestContext.SLOCtrlConfigMap + koordSchedulerName = framework.TestContext.KoordSchedulerName framework.Logf("get some nodes which are ready and schedulable") nodeList, err = e2enode.GetReadySchedulableNodes(c) @@ -116,7 +117,7 @@ var _ = SIGDescribe("BatchResource", func() { newConfigMap.Data[configuration.ColocationConfigKey] = colocationEnabledConfigData newConfigMapUpdated, err := c.CoreV1().ConfigMaps(koordNamespace).Update(context.TODO(), newConfigMap, metav1.UpdateOptions{}) framework.ExpectNoError(err) - framework.Logf("update slo-controller-config successfully, data: %v", newConfigMapUpdated.Data) + framework.Logf("update slo-controller-config successfully, data: %+v", newConfigMapUpdated.Data) configMap = newConfigMapUpdated } else { framework.Logf("colocation is already enabled in slo-controller-config, keep the same") @@ -125,11 +126,14 @@ var _ = SIGDescribe("BatchResource", func() { framework.Logf("slo-controller-config does not exist, need create") newConfigMap, err := manifest.ConfigMapFromManifest("slocontroller/slo-controller-config.yaml") framework.ExpectNoError(err) + newConfigMap.SetNamespace(koordNamespace) newConfigMap.SetName(sloConfigName) + newConfigMap.Data[configuration.ColocationConfigKey] = colocationEnabledConfigData + newConfigMapCreated, err := c.CoreV1().ConfigMaps(koordNamespace).Create(context.TODO(), newConfigMap, metav1.CreateOptions{}) framework.ExpectNoError(err) - framework.Logf("create slo-controller-config successfully, data: %v", newConfigMapCreated.Data) + framework.Logf("create slo-controller-config successfully, data: %+v", newConfigMapCreated.Data) configMap = newConfigMapCreated defer rollbackSLOConfigObject(f, koordNamespace, sloConfigName) @@ -149,15 +153,17 @@ var _ = SIGDescribe("BatchResource", func() { // check node allocatable isAllocatable, msg := isNodeBatchResourcesValid(node, nodeMetric) - if isAllocatable { - allocatableCount++ - } else { + if !isAllocatable { framework.Logf("node %s has no allocatable batch resource, msg: %s", node.Name, msg) + continue } + + allocatableCount++ } if float64(allocatableCount) > float64(totalCount)*minNodesBatchResourceAllocatableRatio { - framework.Logf("finish checking node batch resources", totalCount, allocatableCount) + framework.Logf("finish checking node batch resources, total[%v], allocatable[%v]", + totalCount, allocatableCount) return true } @@ -173,10 +179,53 @@ var _ = SIGDescribe("BatchResource", func() { }, 180*time.Second, 5*time.Second).Should(gomega.Equal(true)) framework.Logf("check node batch resources finished, total[%v], allocatable[%v]", totalCount, allocatableCount) + + framework.Logf("start to verify node batch resources scheduling") + + ginkgo.By("Loading Pod from manifest") + pod, err := manifest.PodFromManifest("slocontroller/be-demo.yaml") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + pod.Namespace = f.Namespace.Name + pod.Spec.SchedulerName = koordSchedulerName + gomega.Expect(len(pod.Spec.Containers)).Should(gomega.Equal(1)) + + ginkgo.By("Create a Batch Pod") + f.PodClient().Create(pod) + defer func() { + err = f.PodClient().Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) + framework.ExpectNoError(err) + }() + + ginkgo.By("Wait for Batch Pod Ready") + gomega.Eventually(func() bool { + p, err := f.PodClient().Get(context.TODO(), pod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, podReady := k8spodutil.GetPodCondition(&p.Status, corev1.PodReady) + _, containersReady := k8spodutil.GetPodCondition(&p.Status, corev1.ContainersReady) + framework.Logf("created batch pod %s/%s and got status: %+v", p.Namespace, p.Name, p.Status) + + return podReady != nil && podReady.Status == corev1.ConditionTrue && + containersReady != nil && containersReady.Status == corev1.ConditionTrue + }, 180*time.Second, 5*time.Second).Should(gomega.Equal(true)) }) }) }) +func isNodeMetricValid(nodeMetric *slov1alpha1.NodeMetric) (bool, string) { + if nodeMetric == nil || nodeMetric.Status.NodeMetric == nil || nodeMetric.Status.NodeMetric.NodeUsage.ResourceList == nil { + return false, "node metric is incomplete" + } + _, ok := nodeMetric.Status.NodeMetric.NodeUsage.ResourceList[corev1.ResourceCPU] + if !ok { + return false, "cpu usage is missing" + } + _, ok = nodeMetric.Status.NodeMetric.NodeUsage.ResourceList[corev1.ResourceMemory] + if !ok { + return false, "memory usage is missing" + } + return true, "" +} + func isNodeBatchResourcesValid(node *corev1.Node, nodeMetric *slov1alpha1.NodeMetric) (bool, string) { // validate the node if node == nil || node.Status.Allocatable == nil { @@ -187,7 +236,8 @@ func isNodeBatchResourcesValid(node *corev1.Node, nodeMetric *slov1alpha1.NodeMe if !ok { return false, "batch cpu is missing" } - if batchMilliCPU.Value() < 0 || batchMilliCPU.Value() > node.Status.Allocatable.Cpu().MilliValue() { + // batch cpu can be larger when cpu normalization ratio > 1.0 + if batchMilliCPU.Value() < 0 { return false, "batch cpu is illegal" } batchMemory, ok := node.Status.Allocatable[apiext.BatchMemory] @@ -198,17 +248,11 @@ func isNodeBatchResourcesValid(node *corev1.Node, nodeMetric *slov1alpha1.NodeMe return false, "batch memory is illegal" } // validate the node metric - if nodeMetric == nil || nodeMetric.Status.NodeMetric == nil || nodeMetric.Status.NodeMetric.NodeUsage.ResourceList == nil { - return false, "node metric is incomplete" - } - cpuUsage, ok := nodeMetric.Status.NodeMetric.NodeUsage.ResourceList[corev1.ResourceCPU] - if !ok { - return false, "cpu usage is missing" - } - memoryUsage, ok := nodeMetric.Status.NodeMetric.NodeUsage.ResourceList[corev1.ResourceMemory] - if !ok { - return false, "memory usage is missing" + if isValid, msg := isNodeMetricValid(nodeMetric); !isValid { + return false, msg } + cpuUsage := nodeMetric.Status.NodeMetric.NodeUsage.ResourceList[corev1.ResourceCPU] + memoryUsage := nodeMetric.Status.NodeMetric.NodeUsage.ResourceList[corev1.ResourceMemory] // roughly check the batch resource results: // batch.total >= node.total - node.total * cpuReclaimRatio - nodeMetric.usage - node.total * maxDiffRatio estimatedBatchMilliCPULower := node.Status.Allocatable.Cpu().MilliValue()*int64(100-cpuReclaimThresholdPercent-maxNodeBatchCPUDiffPercent)/100 - cpuUsage.MilliValue() diff --git a/test/e2e/slocontroller/cpunormalization.go b/test/e2e/slocontroller/cpunormalization.go new file mode 100644 index 000000000..aaf634dac --- /dev/null +++ b/test/e2e/slocontroller/cpunormalization.go @@ -0,0 +1,346 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package slocontroller + +import ( + "context" + "encoding/json" + "math" + "time" + + topov1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1" + nrtclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned" + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/utils/pointer" + + "github.com/koordinator-sh/koordinator/apis/configuration" + "github.com/koordinator-sh/koordinator/apis/extension" + koordinatorclientset "github.com/koordinator-sh/koordinator/pkg/client/clientset/versioned" + "github.com/koordinator-sh/koordinator/test/e2e/framework" + "github.com/koordinator-sh/koordinator/test/e2e/framework/manifest" + e2enode "github.com/koordinator-sh/koordinator/test/e2e/framework/node" +) + +var ( + defaultCPUModelRatioCfg = configuration.ModelRatioCfg{ + BaseRatio: pointer.Float64(1.5), + TurboEnabledRatio: pointer.Float64(1.65), + HyperThreadEnabledRatio: pointer.Float64(1.0), + HyperThreadTurboEnabledRatio: pointer.Float64(1.1), + } + + ratioDiffEpsilon = 0.01 + minNodesCPUBasicInfoReadyRatio = 0.7 + minNodesCPUNormalizationCorrectRatio = 0.7 +) + +var _ = SIGDescribe("CPUNormalization", func() { + var nodeList *corev1.NodeList + var c clientset.Interface + var koordClient koordinatorclientset.Interface + var nrtClient nrtclientset.Interface + var koordNamespace, sloConfigName string + var err error + + f := framework.NewDefaultFramework("cpunormalization") + f.SkipNamespaceCreation = true + + ginkgo.BeforeEach(func() { + c = f.ClientSet + koordClient = f.KoordinatorClientSet + koordNamespace = framework.TestContext.KoordinatorComponentNamespace + sloConfigName = framework.TestContext.SLOCtrlConfigMap + + nrtClient, err = nrtclientset.NewForConfig(f.ClientConfig()) + framework.ExpectNoError(err, "unable to create NRT ClientSet") + + framework.Logf("get some nodes which are ready and schedulable") + nodeList, err = e2enode.GetReadySchedulableNodes(c) + framework.ExpectNoError(err) + + // fail the test when no node available + gomega.Expect(len(nodeList.Items)).NotTo(gomega.BeZero()) + }) + + framework.KoordinatorDescribe("CPUNormalization RatioUpdate", func() { + framework.ConformanceIt("update cpu normalization ratios in the node annotations", func() { + ginkgo.By("Check node cpu basic infos on NodeResourceTopology") + // assert totalCount > 0 + // FIXME: the failures of NUMA topology reporting are ignored + totalCount, validNodeCount, skippedCount := len(nodeList.Items), 0, 0 + var cpuModels []*extension.CPUBasicInfo + gomega.Eventually(func() bool { + for i := range nodeList.Items { + node := &nodeList.Items[i] + + nodeMetric, err := koordClient.SloV1alpha1().NodeMetrics().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if err != nil { + framework.Logf("failed to get node metric for node %s, err: %v", node.Name, err) + continue + } + // validate NodeMetric + isMetricValid, msg := isNodeMetricValid(nodeMetric) + if !isMetricValid { + framework.Logf("node %s has no valid node metric, msg: %s", node.Name, msg) + continue + } + + nrt, err := nrtClient.TopologyV1alpha1().NodeResourceTopologies().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if err != nil { + framework.Logf("failed to get node resource topology for node %s, err: %v", node.Name, err) + continue + } + // validate NodeResourceTopology + isNRTValid, msg := isNRTValid(nrt) + if !isNRTValid { + skippedCount++ + framework.Logf("node %s has no valid node resource topology, msg: %s", node.Name, msg) + continue + } + cpuBasicInfo, err := extension.GetCPUBasicInfo(nrt.Annotations) + if err != nil { + framework.Logf("nrt %s has no valid cpu basic info, err: %s", nrt.Name, err) + continue + } + if cpuBasicInfo == nil { + skippedCount++ + framework.Logf("nrt %s has no cpu basic info", nrt.Name) + continue + } + cpuModels = append(cpuModels, cpuBasicInfo) + + validNodeCount++ + } + + // expect enough valid NRTs have cpu basic info + if float64(validNodeCount+skippedCount) <= float64(totalCount)*minNodesCPUBasicInfoReadyRatio { + framework.Logf("there should be enough nodes that have cpu basic info on NRT, but got:"+ + " total[%v], valid[%v], skipped[%v]", totalCount, validNodeCount, skippedCount) + // reset nodes and counters + nodeList, err = e2enode.GetReadySchedulableNodes(c) + framework.ExpectNoError(err) + gomega.Expect(len(nodeList.Items)).NotTo(gomega.BeZero()) + cpuModels = make([]*extension.CPUBasicInfo, 0) + totalCount, validNodeCount, skippedCount = len(nodeList.Items), 0, 0 + return false + } + + framework.Logf("finish checking node cpu basic info, total[%v], valid[%v], skipped[%v]", + totalCount, validNodeCount, skippedCount) + return true + }, 180*time.Second, 5*time.Second).Should(gomega.Equal(true)) + + cpuNormalizationStrategy := makeCPUNormalizationStrategyForModels(cpuModels) + framework.Logf("prepare cpu ratio model: [%+v]", cpuNormalizationStrategy) + cpuNormalizationConfigBytes, err := json.Marshal(cpuNormalizationStrategy) + framework.ExpectNoError(err) + cpuNormalizationData := string(cpuNormalizationConfigBytes) + + ginkgo.By("Loading slo-controller-config in the cluster") + isConfigCreated := false + configMap, err := c.CoreV1().ConfigMaps(koordNamespace).Get(context.TODO(), sloConfigName, metav1.GetOptions{}) + if err == nil { + isConfigCreated = true + framework.Logf("successfully get slo-controller-config %s/%s", koordNamespace, sloConfigName) + } else if errors.IsNotFound(err) { + framework.Logf("cannot get slo-controller-config %s/%s, try to create a new one", + koordNamespace, sloConfigName) + } else { + framework.Failf("failed to get slo-controller-config %s/%s, got unexpected error: %v", + koordNamespace, sloConfigName, err) + } + + // If configmap is created, try to patch it with colocation enabled. + // If not exist, create the slo-controller-config. + // NOTE: slo-controller-config should not be modified by the others during the e2e test. + ginkgo.By("Prepare slo-controller-config to enable cpu normalization") + if isConfigCreated { + needUpdate := false + rollbackData := map[string]string{} + if configMap.Data == nil { + needUpdate = true + } else if configMap.Data[configuration.ColocationConfigKey] != colocationEnabledConfigData || + configMap.Data[configuration.CPUNormalizationConfigKey] != cpuNormalizationData { + rollbackData[configuration.ColocationConfigKey] = configMap.Data[configuration.ColocationConfigKey] + rollbackData[configuration.CPUNormalizationConfigKey] = configMap.Data[configuration.CPUNormalizationConfigKey] + needUpdate = true + } + + if needUpdate { + framework.Logf("cpu normalization is not enabled in slo-controller-config, need update") + defer rollbackSLOConfigData(f, koordNamespace, sloConfigName, rollbackData) + + newConfigMap := configMap.DeepCopy() + newConfigMap.Data[configuration.ColocationConfigKey] = colocationEnabledConfigData + newConfigMap.Data[configuration.CPUNormalizationConfigKey] = cpuNormalizationData + newConfigMapUpdated, err := c.CoreV1().ConfigMaps(koordNamespace).Update(context.TODO(), newConfigMap, metav1.UpdateOptions{}) + framework.ExpectNoError(err) + framework.Logf("update slo-controller-config successfully, data: %+v", newConfigMapUpdated.Data) + configMap = newConfigMapUpdated + } else { + framework.Logf("cpu normalization is already enabled in slo-controller-config, keep the same") + } + } else { + framework.Logf("slo-controller-config does not exist, need create") + newConfigMap, err := manifest.ConfigMapFromManifest("slocontroller/slo-controller-config.yaml") + framework.ExpectNoError(err) + + newConfigMap.SetNamespace(koordNamespace) + newConfigMap.SetName(sloConfigName) + newConfigMap.Data[configuration.ColocationConfigKey] = colocationEnabledConfigData + newConfigMap.Data[configuration.CPUNormalizationConfigKey] = cpuNormalizationData + + newConfigMapCreated, err := c.CoreV1().ConfigMaps(koordNamespace).Create(context.TODO(), newConfigMap, metav1.CreateOptions{}) + framework.ExpectNoError(err) + framework.Logf("create slo-controller-config successfully, data: %+v", newConfigMapCreated.Data) + configMap = newConfigMapCreated + + defer rollbackSLOConfigObject(f, koordNamespace, sloConfigName) + } + + ginkgo.By("Check node cpu normalization ratios") + totalCount, validNodeCount, skippedCount = len(nodeList.Items), 0, 0 + gomega.Eventually(func() bool { + framework.Logf("relist some nodes which are ready and schedulable") + nodeList, err = e2enode.GetReadySchedulableNodes(c) + framework.ExpectNoError(err) + + for i := range nodeList.Items { + node := &nodeList.Items[i] + ratio, err := extension.GetCPUNormalizationRatio(node) + if err != nil { + framework.Logf("failed to get cpu normalization ratio for node %s, err: %v", node.Name, err) + continue + } + if ratio < 1.0 { + skippedCount++ + framework.Logf("failed to get valid normalization ratio for node %s, skipped, ratio: %v", + node.Name, ratio) + continue + } + + nrt, err := nrtClient.TopologyV1alpha1().NodeResourceTopologies().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if err != nil { + framework.Logf("failed to get node resource topology for node %s, err: %v", node.Name, err) + continue + } + // validate NodeResourceTopology + isNRTValid, msg := isNRTValid(nrt) + if !isNRTValid { + skippedCount++ + framework.Logf("node %s has no valid node resource topology, msg: %s", node.Name, msg) + continue + } + cpuBasicInfo, err := extension.GetCPUBasicInfo(nrt.Annotations) + if err != nil { + framework.Logf("nrt %s has no valid cpu basic info, err: %s", nrt.Name, err) + continue + } + if cpuBasicInfo == nil { + skippedCount++ + framework.Logf("nrt %s has no cpu basic info", nrt.Name) + continue + } + + if expectedRatio := getCPUNormalizationRatioInDefaultModel(cpuBasicInfo); math.Abs(ratio-expectedRatio) > ratioDiffEpsilon { + framework.Logf("node cpu normalization ratio is different from expected, node %s, "+ + "cpu model [%+v], expected %v, current %v", node.Name, cpuBasicInfo, expectedRatio, ratio) + continue + } + + framework.Logf("check node %s cpu normalization ratio successfully, ratio %v", + node.Name, ratio) + validNodeCount++ + } + + // expect enough correct node ratios + if float64(validNodeCount+skippedCount) <= float64(totalCount)*minNodesCPUNormalizationCorrectRatio { + framework.Logf("there should be enough nodes that have correct cpu normalization ratio, "+ + "but got: total[%v], valid[%v], skipped[%v]", totalCount, validNodeCount, skippedCount) + // reset nodes and counters + nodeList, err = e2enode.GetReadySchedulableNodes(c) + framework.ExpectNoError(err) + gomega.Expect(len(nodeList.Items)).NotTo(gomega.BeZero()) + totalCount, validNodeCount, skippedCount = len(nodeList.Items), 0, 0 + return false + } + + framework.Logf("finish checking node cpu normalization ratio, total[%v], valid[%v], skipped[%v]", + totalCount, validNodeCount, skippedCount) + return true + }, 120*time.Second, 5*time.Second).Should(gomega.Equal(true)) + }) + + // TODO: submit a LS pod allocating cpu-normalized cpu resource + }) +}) + +func isNRTValid(nrt *topov1alpha1.NodeResourceTopology) (bool, string) { + if nrt == nil || nrt.Annotations == nil { + return false, "nrt is incomplete" + } + if len(nrt.Zones) <= 0 { + return false, "nrt has no zone" + } + if len(nrt.TopologyPolicies) <= 0 { + return false, "nrt has no topology policy" + } + return true, "" +} + +func makeCPUNormalizationStrategyForModels(cpuModels []*extension.CPUBasicInfo) *configuration.CPUNormalizationStrategy { + ratioModel := map[string]configuration.ModelRatioCfg{} + for _, cpuModel := range cpuModels { + ratioCfg, ok := ratioModel[cpuModel.CPUModel] + if !ok { + ratioCfg = configuration.ModelRatioCfg{} + } + if cpuModel.HyperThreadEnabled && cpuModel.TurboEnabled { + ratioCfg.HyperThreadTurboEnabledRatio = defaultCPUModelRatioCfg.HyperThreadTurboEnabledRatio + } else if cpuModel.HyperThreadEnabled { + ratioCfg.HyperThreadEnabledRatio = defaultCPUModelRatioCfg.HyperThreadEnabledRatio + } else if cpuModel.TurboEnabled { + ratioCfg.TurboEnabledRatio = defaultCPUModelRatioCfg.TurboEnabledRatio + } else { + ratioCfg.BaseRatio = defaultCPUModelRatioCfg.BaseRatio + } + ratioModel[cpuModel.CPUModel] = ratioCfg + } + + return &configuration.CPUNormalizationStrategy{ + Enable: pointer.Bool(true), + RatioModel: ratioModel, + } +} + +func getCPUNormalizationRatioInDefaultModel(info *extension.CPUBasicInfo) float64 { + if info.HyperThreadEnabled && info.TurboEnabled { + return *defaultCPUModelRatioCfg.HyperThreadTurboEnabledRatio + } + if info.HyperThreadEnabled { + return *defaultCPUModelRatioCfg.HyperThreadEnabledRatio + } + if info.TurboEnabled { + return *defaultCPUModelRatioCfg.TurboEnabledRatio + } + return *defaultCPUModelRatioCfg.BaseRatio +} diff --git a/test/e2e/testing-manifests/slocontroller/be-demo.yaml b/test/e2e/testing-manifests/slocontroller/be-demo.yaml new file mode 100644 index 000000000..5de4fcf61 --- /dev/null +++ b/test/e2e/testing-manifests/slocontroller/be-demo.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + app: e2e-test + koordinator.sh/qosClass: BE + name: be-demo + name: be-demo +spec: + schedulerName: koord-scheduler + priorityClassName: koord-batch + containers: + - image: {{.BusyBoxImage}} + imagePullPolicy: IfNotPresent + name: pause + command: ["sleep", "2000000000000"] + resources: + limits: + kubernetes.io/batch-cpu: "500" + kubernetes.io/batch-memory: 1Gi + requests: + kubernetes.io/batch-cpu: "50" + kubernetes.io/batch-memory: 100Mi + restartPolicy: Never