Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: use warmup as a general-purpose name #137

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions contrib/cmd/runkperf/commands/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"strconv"

"github.com/Azure/kperf/contrib/cmd/runkperf/commands/bench"
"github.com/Azure/kperf/contrib/cmd/runkperf/commands/ekswarmup"
"github.com/Azure/kperf/contrib/cmd/runkperf/commands/warmup"

"github.com/urfave/cli"
"k8s.io/klog/v2"
Expand All @@ -22,7 +22,7 @@ func App() *cli.App {
Name: "runkperf",
// TODO: add more fields
Commands: []cli.Command{
ekswarmup.Command,
warmup.Command,
bench.Command,
},
Flags: []cli.Flag{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package ekswarmup
package warmup

import (
"context"
Expand All @@ -10,6 +10,7 @@ import (
"time"

"github.com/Azure/kperf/api/types"
kperfcmdutils "github.com/Azure/kperf/cmd/kperf/commands/utils"
"github.com/Azure/kperf/contrib/internal/utils"

"github.com/urfave/cli"
Expand All @@ -21,10 +22,10 @@ import (
"k8s.io/klog/v2"
)

// Command represents ekswarmup subcommand.
// Command represents warmup subcommand.
var Command = cli.Command{
Name: "ekswarmup",
Usage: "Warmup EKS cluster and try best to scale it to 8 cores at least",
Name: "warmup",
Usage: "Warmup cluster and try best to scale it to 8 cores at least",
Flags: []cli.Flag{
cli.StringFlag{
Name: "kubeconfig",
Expand Down Expand Up @@ -55,12 +56,27 @@ var Command = cli.Command{
Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 20)",
Value: 10000,
},
cli.StringFlag{
Name: "vc-affinity",
Usage: "Deploy virtualnode's controller with a specific labels (FORMAT: KEY=VALUE[,VALUE])",
Value: "node.kubernetes.io/instance-type=Standard_D8s_v3,m4.2xlarge",
},
cli.StringFlag{
Name: "rg-affinity",
Usage: "Deploy runner group with a specific labels (FORMAT: KEY=VALUE[,VALUE])",
Value: "node.kubernetes.io/instance-type=Standard_D16s_v3,m4.4xlarge",
},
cli.BoolFlag{
Name: "eks",
Usage: "Indicates the target kubernetes cluster is EKS",
Hidden: true,
},
},
Action: func(cliCtx *cli.Context) (retErr error) {
ctx := context.Background()

rgCfgFile, rgCfgFileDone, err := utils.NewLoadProfileFromEmbed(
"loadprofile/ekswarmup.yaml",
"loadprofile/warmup.yaml",
func(spec *types.RunnerGroupSpec) error {
reqs := cliCtx.Int("total")
if reqs < 0 {
Expand All @@ -72,8 +88,15 @@ var Command = cli.Command{
return fmt.Errorf("invalid rate value: %v", rate)
}

rgAffinity := cliCtx.String("rg-affinity")
affinityLabels, err := kperfcmdutils.KeyValuesMap([]string{rgAffinity})
if err != nil {
return fmt.Errorf("failed to parse %s affinity: %w", rgAffinity, err)
}

spec.Profile.Spec.Total = reqs
spec.Profile.Spec.Rate = rate
spec.NodeAffinity = affinityLabels

data, _ := yaml.Marshal(spec)
klog.V(2).InfoS("Load Profile", "config", string(data))
Expand All @@ -86,10 +109,14 @@ var Command = cli.Command{
defer func() { _ = rgCfgFileDone() }()

kubeCfgPath := cliCtx.String("kubeconfig")
isEKS := cliCtx.Bool("eks")
virtualNodeAffinity := cliCtx.String("vc-affinity")

perr := patchEKSDaemonsetWithoutToleration(ctx, kubeCfgPath)
if perr != nil {
return perr
if isEKS {
perr := patchEKSDaemonsetWithoutToleration(ctx, kubeCfgPath)
if perr != nil {
return perr
}
}

cores, ferr := utils.FetchAPIServerCores(ctx, kubeCfgPath)
Expand All @@ -102,7 +129,7 @@ var Command = cli.Command{
klog.V(0).ErrorS(ferr, "failed to fetch apiserver cores")
}

delNP, err := deployWarmupVirtualNodepool(ctx, kubeCfgPath)
delNP, err := deployWarmupVirtualNodepool(ctx, kubeCfgPath, isEKS, virtualNodeAffinity)
if err != nil {
return err
}
Expand Down Expand Up @@ -155,24 +182,30 @@ func isReady(cores map[string]int) bool {
return n >= 2
}

// deployWarmupVirtualNodepool deploys nodepool on m4.2xlarge nodes for warmup.
func deployWarmupVirtualNodepool(ctx context.Context, kubeCfgPath string) (func() error, error) {
// deployWarmupVirtualNodepool deploys virtual nodepool.
func deployWarmupVirtualNodepool(ctx context.Context, kubeCfgPath string, isEKS bool, nodeAffinity string) (func() error, error) {
target := "warmup"
kr := utils.NewKperfRunner(kubeCfgPath, "")

klog.V(0).InfoS("Deploying virtual nodepool", "name", target)
sharedProviderID, err := utils.FetchNodeProviderIDByType(ctx, kubeCfgPath, utils.EKSIdleNodepoolInstanceType)
if err != nil {
return nil, fmt.Errorf("failed to get placeholder providerID: %w", err)

kr := utils.NewKperfRunner(kubeCfgPath, "")

sharedProviderID := ""
var err error

if isEKS {
sharedProviderID, err = utils.FetchNodeProviderIDByType(ctx, kubeCfgPath, utils.EKSIdleNodepoolInstanceType)
if err != nil {
return nil, fmt.Errorf("failed to get placeholder providerID: %w", err)
}
}

klog.V(0).InfoS("Trying to delete", "nodepool", target)
if err = kr.DeleteNodepool(ctx, 0, target); err != nil {
klog.V(0).ErrorS(err, "failed to delete", "nodepool", target)
}

err = kr.NewNodepool(ctx, 0, target, 100, 32, 96, 110,
"node.kubernetes.io/instance-type=m4.2xlarge", sharedProviderID)
err = kr.NewNodepool(ctx, 0, target, 100, 32, 96, 110, nodeAffinity, sharedProviderID)
if err != nil {
return nil, fmt.Errorf("failed to create nodepool %s: %w", target, err)
}
Expand All @@ -185,8 +218,9 @@ func deployWarmupVirtualNodepool(ctx context.Context, kubeCfgPath string) (func(
// patchEKSDaemonsetWithoutToleration removes tolerations to avoid pod scheduled
// to virtual nodes.
func patchEKSDaemonsetWithoutToleration(ctx context.Context, kubeCfgPath string) error {
clientset := mustClientset(kubeCfgPath)
klog.V(0).Info("Trying to removes EKS Daemonset's tolerations to avoid pod scheduled to virtual nodes")

clientset := mustClientset(kubeCfgPath)
ds := clientset.AppsV1().DaemonSets("kube-system")
for _, dn := range []string{"aws-node", "kube-proxy"} {
d, err := ds.Get(ctx, dn, metav1.GetOptions{})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,3 @@ loadProfile:
resource: events
limit: 1000
shares: 100 # chance 100 / (1000 + 100 + 100)
nodeAffinity:
node.kubernetes.io/instance-type:
- m4.4xlarge
6 changes: 0 additions & 6 deletions contrib/internal/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ var (
// provider ID for all the virtual nodes so that EKS cloud provider
// won't delete our virtual nodes.
EKSIdleNodepoolInstanceType = "m4.large"

// EKSRunnerNodepoolInstanceType is the instance type of nodes for kperf
// runners.
//
// NOTE: This is default type. Please align it with ../manifests/loadprofile/ekswarmup.yaml.
EKSRunnerNodepoolInstanceType = "m4.4xlarge"
)

// RepeatJobWithPod repeats to deploy 3k pods.
Expand Down
Loading