From 1ffeaaf6eb664c9f2850f9881d7d06776963c18f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Tue, 18 Jun 2024 14:24:53 +0200 Subject: [PATCH] Fix Sandbox API scheduling of OcpSandbox (#71) * Fix Sandbox API scheduling of OcpSandbox see GPTEINFRA-9992 The current scheduling algorithm of OcpSandbox selects the cluster with the least memory usage. Plus, the calculation is done by summing pods requests memory, which seems dubious. This is counterintuitive and against the principle of least astonishment. Instead, the algorithm should randomly select a cluster with available memory space. The minimum memory space required for a cluster to be schedulable should be configurable in the OcpSharedClusterConfiguration table. The default value should be 90% of the total memory of the cluster. This change: * Create 2 new columns in the `ocp_shared_cluster_configuration` for max CPU and max Memory percentage for a cluster to be considered healthy. max CPU default: 100 max Memory default: 90 * Upgrade to Go 1.22, required by k8s.io module `k8s.io/api@v0.30.2` * Schedule randomly on the first available cluster. * TODO: loop only on schedulable nodes * Filter out unhealthy nodes when calulating usage --- Containerfile.admin | 4 +- Containerfile.api | 4 +- Containerfile.conan | 4 +- Containerfile.metrics | 2 +- db/migrations/010_memory_cpu_usage.down.sql | 6 + db/migrations/010_memory_cpu_usage.up.sql | 10 + docs/api-reference/swagger.yaml | 10 + go.mod | 13 +- go.sum | 26 +-- internal/models/ocp_sandbox.go | 196 +++++++++++++++----- readme.adoc | 2 +- 11 files changed, 209 insertions(+), 68 deletions(-) create mode 100644 db/migrations/010_memory_cpu_usage.down.sql create mode 100644 db/migrations/010_memory_cpu_usage.up.sql diff --git a/Containerfile.admin b/Containerfile.admin index c3b2892f..5512c4c2 100644 --- a/Containerfile.admin +++ b/Containerfile.admin @@ -1,5 +1,5 @@ -ARG GO_VERSION=1.21 -# Switch back to Red Hat go-toolset when it supports go 1.21 +ARG GO_VERSION=1.22 +# Switch back to Red Hat go-toolset when it supports go 1.22 #FROM registry.access.redhat.com/ubi8/go-toolset:latest AS builder FROM docker.io/golang:${GO_VERSION}-bullseye as builder WORKDIR /sandbox/ diff --git a/Containerfile.api b/Containerfile.api index 3cf9f888..d2e99b71 100644 --- a/Containerfile.api +++ b/Containerfile.api @@ -1,5 +1,5 @@ -ARG GO_VERSION=1.21 -# Switch back to Red Hat go-toolset when it supports go 1.20 +ARG GO_VERSION=1.22 +# Switch back to Red Hat go-toolset when it supports go 1.22 #FROM registry.access.redhat.com/ubi8/go-toolset:latest AS builder FROM docker.io/golang:${GO_VERSION}-bullseye as builder WORKDIR /sandbox/ diff --git a/Containerfile.conan b/Containerfile.conan index d504c2b1..d0d6ce2d 100644 --- a/Containerfile.conan +++ b/Containerfile.conan @@ -1,5 +1,5 @@ -ARG GO_VERSION=1.21 -# Switch back to Red Hat go-toolset when it supports go 1.21 +ARG GO_VERSION=1.22 +# Switch back to Red Hat go-toolset when it supports go 1.22 #FROM registry.access.redhat.com/ubi8/go-toolset:latest AS builder FROM docker.io/golang:${GO_VERSION}-bullseye as builder WORKDIR /sandbox/ diff --git a/Containerfile.metrics b/Containerfile.metrics index b313828c..a5b9de8a 100644 --- a/Containerfile.metrics +++ b/Containerfile.metrics @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.21 +ARG GO_VERSION=1.22 # Switch back to Red Hat go-toolset when it supports go 1.20 #FROM registry.access.redhat.com/ubi8/go-toolset:latest AS builder FROM docker.io/golang:${GO_VERSION} as builder diff --git a/db/migrations/010_memory_cpu_usage.down.sql b/db/migrations/010_memory_cpu_usage.down.sql new file mode 100644 index 00000000..6c7ebf43 --- /dev/null +++ b/db/migrations/010_memory_cpu_usage.down.sql @@ -0,0 +1,6 @@ +BEGIN; + +ALTER TABLE ocp_shared_cluster_configurations DROP COLUMN max_memory_usage_percentage; +ALTER TABLE ocp_shared_cluster_configurations DROP COLUMN max_cpu_usage_percentage; + +COMMIT; diff --git a/db/migrations/010_memory_cpu_usage.up.sql b/db/migrations/010_memory_cpu_usage.up.sql new file mode 100644 index 00000000..6cb859ff --- /dev/null +++ b/db/migrations/010_memory_cpu_usage.up.sql @@ -0,0 +1,10 @@ +BEGIN; +-- Add max_memory_usage_percentage column to the ocp_shared_cluster_configurations table of type REAL +-- default value 90 +ALTER TABLE ocp_shared_cluster_configurations ADD COLUMN max_memory_usage_percentage REAL DEFAULT 90; + +-- Add max_cpu_usage_percentage column to the ocp_shared_cluster_configurations table of type real +-- default value 100 +ALTER TABLE ocp_shared_cluster_configurations ADD COLUMN max_cpu_usage_percentage REAL DEFAULT 100; + +COMMIT; diff --git a/docs/api-reference/swagger.yaml b/docs/api-reference/swagger.yaml index 5144bb77..07a175fe 100644 --- a/docs/api-reference/swagger.yaml +++ b/docs/api-reference/swagger.yaml @@ -2130,6 +2130,16 @@ components: deployer: openshift_cnv_nfs_path: /IBMfoobar/data01 openshift_cnv_nfs_server: fsf-region.domain.com + max_memory_usage_percentage: + type: integer + description: The maximum memory usage percentage for a cluster to be considered healthy + example: 80 + default: 90 + max_cpu_usage_percentage: + type: integer + description: The maximum CPU usage percentage for a cluster to be considered healthy + example: 80 + default: 100 example: name: ocp-cluster-1 api_url: https://api.ocp-cluster-1.com:6443 diff --git a/go.mod b/go.mod index 848c117a..7ffde819 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/rhpds/sandbox -go 1.21 +go 1.22.0 + +toolchain go1.22.4 require ( github.com/aws/aws-lambda-go v1.46.0 @@ -22,9 +24,10 @@ require ( github.com/prometheus/client_golang v1.19.0 github.com/sosedoff/ansible-vault-go v0.2.0 golang.org/x/term v0.18.0 - k8s.io/api v0.29.2 - k8s.io/apimachinery v0.29.2 - k8s.io/client-go v0.29.2 + k8s.io/api v0.30.2 + k8s.io/apimachinery v0.30.2 + k8s.io/client-go v0.30.2 + k8s.io/metrics v0.30.2 ) require ( @@ -84,7 +87,7 @@ require ( github.com/segmentio/asm v1.2.0 // indirect github.com/spf13/pflag v1.0.5 // indirect golang.org/x/crypto v0.21.0 // indirect - golang.org/x/net v0.22.0 // indirect + golang.org/x/net v0.23.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/go.sum b/go.sum index c016b497..bbc7da97 100644 --- a/go.sum +++ b/go.sum @@ -212,10 +212,10 @@ github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9 github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= -github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= -github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= -github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= +github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY= +github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM= +github.com/onsi/gomega v1.31.0 h1:54UJxxj6cPInHS3a35wm6BK/F9nHYueZ1NVujHDrnXE= +github.com/onsi/gomega v1.31.0/go.mod h1:DW9aCi7U6Yi40wNVAvT6kzFnEVEI5n3DloYBiKiT6zk= github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX50IvK2s= github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -304,8 +304,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= -golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -388,16 +388,18 @@ gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= -k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= -k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= -k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= -k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg= -k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA= +k8s.io/api v0.30.2 h1:+ZhRj+28QT4UOH+BKznu4CBgPWgkXO7XAvMcMl0qKvI= +k8s.io/api v0.30.2/go.mod h1:ULg5g9JvOev2dG0u2hig4Z7tQ2hHIuS+m8MNZ+X6EmI= +k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg= +k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50= +k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs= k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/metrics v0.30.2 h1:zj4kIPTCfEbY0RHEogpA7QtlItU7xaO11+Gz1zVDxlc= +k8s.io/metrics v0.30.2/go.mod h1:GpoO5XTy/g8CclVLtgA5WTrr2Cy5vCsqr5Xa/0ETWIk= k8s.io/utils v0.0.0-20240310230437-4693a0247e57 h1:gbqbevonBh57eILzModw6mrkbwM0gQBEuevE/AaBsHY= k8s.io/utils v0.0.0-20240310230437-4693a0247e57/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index e4f08f85..c0fd7b6f 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -19,6 +19,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + metricsv "k8s.io/metrics/pkg/client/clientset/versioned" ) type OcpSandboxProvider struct { @@ -27,19 +28,21 @@ type OcpSandboxProvider struct { } type OcpSharedClusterConfiguration struct { - ID int `json:"id"` - Name string `json:"name"` - ApiUrl string `json:"api_url"` - IngressDomain string `json:"ingress_domain"` - Kubeconfig string `json:"kubeconfig"` - Token string `json:"token"` - CreatedAt time.Time `json:"created_at"` - UpdatedAt time.Time `json:"updated_at"` - Annotations map[string]string `json:"annotations"` - Valid bool `json:"valid"` - AdditionalVars map[string]any `json:"additional_vars,omitempty"` - DbPool *pgxpool.Pool `json:"-"` - VaultSecret string `json:"-"` + ID int `json:"id"` + Name string `json:"name"` + ApiUrl string `json:"api_url"` + IngressDomain string `json:"ingress_domain"` + Kubeconfig string `json:"kubeconfig"` + Token string `json:"token"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + Annotations map[string]string `json:"annotations"` + Valid bool `json:"valid"` + AdditionalVars map[string]any `json:"additional_vars,omitempty"` + MaxMemoryUsagePercentage float64 `json:"max_memory_usage_percentage"` + MaxCpuUsagePercentage float64 `json:"max_cpu_usage_percentage"` + DbPool *pgxpool.Pool `json:"-"` + VaultSecret string `json:"-"` } type OcpSharedClusterConfigurations []OcpSharedClusterConfiguration @@ -114,6 +117,22 @@ func (p *OcpSharedClusterConfiguration) Bind(r *http.Request) error { return errors.New("annotations is required") } + if p.MaxMemoryUsagePercentage < 0 || p.MaxMemoryUsagePercentage > 100 { + return errors.New("max_memory_usage_percentage must be between 0 and 100") + } + if p.MaxCpuUsagePercentage < 0 || p.MaxCpuUsagePercentage > 100 { + return errors.New("max_cpu_usage_percentage must be between 0 and 100") + } + + // Set default values for CPU and Memory usage + if p.MaxMemoryUsagePercentage == 0 { + p.MaxMemoryUsagePercentage = 90 + } + + if p.MaxCpuUsagePercentage == 0 { + p.MaxCpuUsagePercentage = 100 + } + p.Valid = true return nil @@ -137,9 +156,29 @@ func (p *OcpSharedClusterConfiguration) Save() error { if err := p.DbPool.QueryRow( context.Background(), `INSERT INTO ocp_shared_cluster_configurations - (name, api_url, ingress_domain, kubeconfig, token, annotations, valid, additional_vars) - VALUES ($1, $2, $3, pgp_sym_encrypt($4::text, $5), pgp_sym_encrypt($6::text, $5), $7, $8, $9) RETURNING id`, - p.Name, p.ApiUrl, p.IngressDomain, p.Kubeconfig, p.VaultSecret, p.Token, p.Annotations, p.Valid, p.AdditionalVars, + (name, + api_url, + ingress_domain, + kubeconfig, + token, + annotations, + valid, + additional_vars, + max_memory_usage_percentage, + max_cpu_usage_percentage) + VALUES ($1, $2, $3, pgp_sym_encrypt($4::text, $5), pgp_sym_encrypt($6::text, $5), $7, $8, $9, $10, $11) + RETURNING id`, + p.Name, + p.ApiUrl, + p.IngressDomain, + p.Kubeconfig, + p.VaultSecret, + p.Token, + p.Annotations, + p.Valid, + p.AdditionalVars, + p.MaxMemoryUsagePercentage, + p.MaxCpuUsagePercentage, ).Scan(&p.ID); err != nil { return err } @@ -162,9 +201,22 @@ func (p *OcpSharedClusterConfiguration) Update() error { token = pgp_sym_encrypt($6::text, $5), annotations = $7, valid = $8, - additional_vars = $9 + additional_vars = $9, + max_memory_usage_percentage = $11, + max_cpu_usage_percentage = $12 WHERE id = $10`, - p.Name, p.ApiUrl, p.IngressDomain, p.Kubeconfig, p.VaultSecret, p.Token, p.Annotations, p.Valid, p.AdditionalVars, p.ID, + p.Name, + p.ApiUrl, + p.IngressDomain, + p.Kubeconfig, + p.VaultSecret, + p.Token, + p.Annotations, + p.Valid, + p.AdditionalVars, + p.ID, + p.MaxMemoryUsagePercentage, + p.MaxCpuUsagePercentage, ); err != nil { return err } @@ -219,7 +271,9 @@ func (p *OcpSandboxProvider) GetOcpSharedClusterConfigurationByName(name string) updated_at, annotations, valid, - additional_vars + additional_vars, + max_memory_usage_percentage, + max_cpu_usage_percentage FROM ocp_shared_cluster_configurations WHERE name = $2`, p.VaultSecret, name, ) @@ -237,6 +291,8 @@ func (p *OcpSandboxProvider) GetOcpSharedClusterConfigurationByName(name string) &cluster.Annotations, &cluster.Valid, &cluster.AdditionalVars, + &cluster.MaxMemoryUsagePercentage, + &cluster.MaxCpuUsagePercentage, ); err != nil { return OcpSharedClusterConfiguration{}, err } @@ -253,7 +309,19 @@ func (p *OcpSandboxProvider) GetOcpSharedClusterConfigurations() (OcpSharedClust rows, err := p.DbPool.Query( context.Background(), `SELECT - id, name, api_url, ingress_domain, pgp_sym_decrypt(kubeconfig::bytea, $1), pgp_sym_decrypt(token::bytea, $1), created_at, updated_at, annotations, valid, additional_vars + id, + name, + api_url, + ingress_domain, + pgp_sym_decrypt(kubeconfig::bytea, $1), + pgp_sym_decrypt(token::bytea, $1), + created_at, + updated_at, + annotations, + valid, + additional_vars, + max_memory_usage_percentage, + max_cpu_usage_percentage FROM ocp_shared_cluster_configurations`, p.VaultSecret, ) @@ -280,6 +348,8 @@ func (p *OcpSandboxProvider) GetOcpSharedClusterConfigurations() (OcpSharedClust &cluster.Annotations, &cluster.Valid, &cluster.AdditionalVars, + &cluster.MaxMemoryUsagePercentage, + &cluster.MaxCpuUsagePercentage, ); err != nil { return []OcpSharedClusterConfiguration{}, err } @@ -623,8 +693,24 @@ func (a *OcpSharedClusterConfiguration) CreateRestConfig() (*rest.Config, error) return clientcmd.RESTConfigFromKubeConfig([]byte(a.Kubeconfig)) } +func includeNodeInUsageCalculation(conditions []v1.NodeCondition) bool { + nodeReady := false + for _, condition := range conditions { + if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue { + nodeReady = true + break + } + + // If a condition is not memorypressure and is true, return false + if condition.Type != v1.NodeMemoryPressure && condition.Status == v1.ConditionTrue { + return false + } + } + + return nodeReady +} + func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[string]string, annotations map[string]string, multiple bool, ctx context.Context) (OcpSandboxWithCreds, error) { - var minOcpMemoryUsage float64 var selectedCluster OcpSharedClusterConfiguration // Ensure annotation has guid @@ -695,6 +781,13 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri continue providerLoop } + clientsetMetrics, err := metricsv.NewForConfig(config) + if err != nil { + log.Logger.Error("Error creating OCP metrics client", "error", err) + rnew.SetStatus("error") + continue providerLoop + } + nodes, err := clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{LabelSelector: "node-role.kubernetes.io/worker="}) if err != nil { log.Logger.Error("Error listing OCP nodes", "error", err) @@ -703,47 +796,64 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri } var totalAllocatableCpu, totalAllocatableMemory int64 - var totalRequestedCpu, totalRequestedMemory int64 + var totalUsageCpu, totalUsageMemory int64 for _, node := range nodes.Items { + + if !includeNodeInUsageCalculation(node.Status.Conditions) { + log.Logger.Info("Node not included in calculation", + "node", + node.Name, + "conditions", + node.Status.Conditions, + ) + continue + } + allocatableCpu := node.Status.Allocatable.Cpu().MilliValue() allocatableMemory := node.Status.Allocatable.Memory().Value() - podList, err := clientset.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{FieldSelector: "spec.nodeName=" + node.Name}) + totalAllocatableCpu += allocatableCpu + totalAllocatableMemory += allocatableMemory + + nodeMetric, err := clientsetMetrics.MetricsV1beta1(). + NodeMetricses(). + Get(context.Background(), node.Name, metav1.GetOptions{}) + if err != nil { - log.Logger.Error("Error listing OCP pods", "error", err) + log.Logger.Error("Error Get OCP node metrics v1beta1", "error", err) rnew.SetStatus("error") continue providerLoop } - totalRequestedCpuForNode := int64(0) - totalRequestedMemoryForNode := int64(0) - for _, pod := range podList.Items { - totalRequestedCpuForNode += pod.Spec.Containers[0].Resources.Requests.Cpu().MilliValue() - totalRequestedMemoryForNode += pod.Spec.Containers[0].Resources.Requests.Memory().Value() - } + mem, _ := nodeMetric.Usage.Memory().AsInt64() + cpu := nodeMetric.Usage.Cpu().MilliValue() - totalAllocatableCpu += allocatableCpu - totalAllocatableMemory += allocatableMemory - totalRequestedCpu += totalRequestedCpuForNode - totalRequestedMemory += totalRequestedMemoryForNode + totalUsageCpu += cpu + totalUsageMemory += mem } // Calculate total usage for the cluster - cpuUsage := (float64(totalRequestedCpu) / float64(totalAllocatableCpu)) * 100 - memoryUsage := (float64(totalRequestedMemory) / float64(totalAllocatableMemory)) * 100 - if minOcpMemoryUsage == 0 || memoryUsage < minOcpMemoryUsage { + clusterCpuUsage := (float64(totalUsageCpu) / float64(totalAllocatableCpu)) * 100 + clusterMemoryUsage := (float64(totalUsageMemory) / float64(totalAllocatableMemory)) * 100 + log.Logger.Info( + "Cluster Usage", + "Cluster", cluster.Name, + "CPU% Usage", clusterCpuUsage, + "Memory% Usage", clusterMemoryUsage, + ) + if clusterMemoryUsage < cluster.MaxMemoryUsagePercentage && clusterCpuUsage < cluster.MaxCpuUsagePercentage { selectedCluster = cluster - minOcpMemoryUsage = memoryUsage + log.Logger.Info("selectedCluster", "cluster", selectedCluster.Name) + break providerLoop } - log.Logger.Info("Cluster Usage", - "CPU Usage (Requests)", cpuUsage, - "Memory Usage (Requests)", memoryUsage) } - log.Logger.Info("selectedCluster", "cluster", selectedCluster.Name) if selectedCluster.Name == "" { - log.Logger.Error("Error electing cluster", "name", rnew.Name) + log.Logger.Error("Error electing cluster", + "name", rnew.Name, + "serviceUuid", rnew.ServiceUuid, + "reason", "no cluster available") rnew.SetStatus("error") return } diff --git a/readme.adoc b/readme.adoc index e4b715b8..f2942525 100644 --- a/readme.adoc +++ b/readme.adoc @@ -14,7 +14,7 @@ This repository is a consolidated codebase for everything related to sandboxes f .Build all binaries -You will need to have 'go' setup on your machine. Please use version 1.21 onwards +You will need to have 'go' setup on your machine. Please use version 1.22 onwards ---- make ----