From 70a4491b188b14099b218075b3ba54b4d5220857 Mon Sep 17 00:00:00 2001 From: Mikolaj Stefaniak Date: Fri, 7 Jun 2024 09:03:19 +0000 Subject: [PATCH] feat: metrics --- Dockerfile | 1 + cmd/main.go | 4 +- config/certmanager/certificate.yaml | 39 ---- config/certmanager/kustomization.yaml | 5 - config/certmanager/kustomizeconfig.yaml | 8 - config/default/kustomization.yaml | 46 ++--- config/default/manager_auth_proxy_patch.yaml | 50 ----- config/default/manager_config_dev_patch.yaml | 14 ++ config/default/manager_config_patch.yaml | 14 ++ config/default/manager_webhook_patch.yaml | 14 ++ config/default/webhookcainjection_patch.yaml | 29 --- config/prometheus/gmp.yaml | 35 ++++ config/prometheus/kustomization.yaml | 21 +- config/prometheus/monitor.yaml | 20 +- .../autoscaling_v1alpha1_startupcpuboost.yaml | 12 -- config/samples/kustomization.yaml | 4 - go.mod | 10 +- internal/boost/manager.go | 3 + internal/boost/manager_test.go | 11 +- internal/boost/startupcpuboost.go | 6 +- internal/boost/startupcpuboost_test.go | 15 +- internal/metrics/metrics.go | 186 ++++++++++++++++++ internal/metrics/metrics_suite_test.go | 27 +++ internal/metrics/metrics_test.go | 88 +++++++++ 24 files changed, 473 insertions(+), 189 deletions(-) delete mode 100644 config/certmanager/certificate.yaml delete mode 100644 config/certmanager/kustomization.yaml delete mode 100644 config/certmanager/kustomizeconfig.yaml delete mode 100644 config/default/manager_auth_proxy_patch.yaml delete mode 100644 config/default/webhookcainjection_patch.yaml create mode 100644 config/prometheus/gmp.yaml delete mode 100644 config/samples/autoscaling_v1alpha1_startupcpuboost.yaml delete mode 100644 config/samples/kustomization.yaml create mode 100644 internal/metrics/metrics.go create mode 100644 internal/metrics/metrics_suite_test.go create mode 100644 internal/metrics/metrics_test.go diff --git a/Dockerfile b/Dockerfile index d35fc7a..d53a385 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,6 +33,7 @@ COPY internal/controller/ internal/controller/ COPY internal/util/ internal/util/ COPY internal/webhook/ internal/webhook/ COPY internal/config/ internal/config/ +COPY internal/metrics/ internal/metrics/ # Build # the GOARCH has not a default value to allow the binary be built according to the host where the command diff --git a/cmd/main.go b/cmd/main.go index f73401c..b2040f1 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -34,6 +34,7 @@ import ( "github.com/google/kube-startup-cpu-boost/internal/boost" "github.com/google/kube-startup-cpu-boost/internal/config" "github.com/google/kube-startup-cpu-boost/internal/controller" + "github.com/google/kube-startup-cpu-boost/internal/metrics" "github.com/google/kube-startup-cpu-boost/internal/util" boostWebhook "github.com/google/kube-startup-cpu-boost/internal/webhook" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -60,8 +61,9 @@ func main() { os.Exit(1) } ctrl.SetLogger(config.Logger(cfg.ZapDevelopment, cfg.ZapLogLevel)) - tlsOpts := []func(*tls.Config){} + metrics.Register() + tlsOpts := []func(*tls.Config){} webhookServer := webhook.NewServer(webhook.Options{ TLSOpts: tlsOpts, Port: 9443, diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml deleted file mode 100644 index f275e87..0000000 --- a/config/certmanager/certificate.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# The following manifests contain a self-signed issuer CR and a certificate CR. -# More document can be found at https://docs.cert-manager.io -# WARNING: Targets CertManager v1.0. Check https://cert-manager.io/docs/installation/upgrading/ for breaking changes. -apiVersion: cert-manager.io/v1 -kind: Issuer -metadata: - labels: - app.kubernetes.io/name: certificate - app.kubernetes.io/instance: serving-cert - app.kubernetes.io/component: certificate - app.kubernetes.io/created-by: kube-startup-cpu-boost - app.kubernetes.io/part-of: kube-startup-cpu-boost - app.kubernetes.io/managed-by: kustomize - name: selfsigned-issuer - namespace: system -spec: - selfSigned: {} ---- -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - labels: - app.kubernetes.io/name: certificate - app.kubernetes.io/instance: serving-cert - app.kubernetes.io/component: certificate - app.kubernetes.io/created-by: kube-startup-cpu-boost - app.kubernetes.io/part-of: kube-startup-cpu-boost - app.kubernetes.io/managed-by: kustomize - name: serving-cert # this name should match the one appeared in kustomizeconfig.yaml - namespace: system -spec: - # SERVICE_NAME and SERVICE_NAMESPACE will be substituted by kustomize - dnsNames: - - SERVICE_NAME.SERVICE_NAMESPACE.svc - - SERVICE_NAME.SERVICE_NAMESPACE.svc.cluster.local - issuerRef: - kind: Issuer - name: selfsigned-issuer - secretName: webhook-server-cert # this secret will not be prefixed, since it's not managed by kustomize diff --git a/config/certmanager/kustomization.yaml b/config/certmanager/kustomization.yaml deleted file mode 100644 index bebea5a..0000000 --- a/config/certmanager/kustomization.yaml +++ /dev/null @@ -1,5 +0,0 @@ -resources: -- certificate.yaml - -configurations: -- kustomizeconfig.yaml diff --git a/config/certmanager/kustomizeconfig.yaml b/config/certmanager/kustomizeconfig.yaml deleted file mode 100644 index cf6f89e..0000000 --- a/config/certmanager/kustomizeconfig.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# This configuration is for teaching kustomize how to update name ref substitution -nameReference: -- kind: Issuer - group: cert-manager.io - fieldSpecs: - - kind: Certificate - group: cert-manager.io - path: spec/issuerRef/name diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 63019eb..7ad399c 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -1,19 +1,20 @@ -# Adds namespace to all resources. -namespace: kube-startup-cpu-boost-system +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -# Value of this field is prepended to the -# names of all resources, e.g. a deployment named -# "wordpress" becomes "alices-wordpress". -# Note that it should also match with the prefix (text before '-') of the namespace -# field above. +namespace: kube-startup-cpu-boost-system namePrefix: kube-startup-cpu-boost- -# Labels to add to all resources and selectors. -#labels: -#- includeSelectors: true -# pairs: -# someName: someValue - resources: - ../crd - ../rbac @@ -21,20 +22,13 @@ resources: - ../internalcert - ../webhook -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +# [PROMETHEUS] To enable prometheus metrics collection, uncomment the following line +# NOTE: check resources inside prometheus folder to select desired collection method #- ../prometheus -patchesStrategicMerge: -# Protect the /metrics endpoint by putting it behind auth. -# If you want your controller-manager to expose the /metrics -# endpoint w/o any authn/z, please comment the following line. -- manager_auth_proxy_patch.yaml - -- manager_config_patch.yaml - -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -- manager_webhook_patch.yaml +patches: +- path: manager_config_patch.yaml +- path: manager_webhook_patch.yaml -# Uncomment below for development +# Uncomment below for local development #- manager_config_dev_patch.yaml diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml deleted file mode 100644 index 7945f64..0000000 --- a/config/default/manager_auth_proxy_patch.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# This patch inject a sidecar container which is a HTTP proxy for the -# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. -apiVersion: apps/v1 -kind: Deployment -metadata: - name: controller-manager - namespace: system -spec: - template: - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - amd64 - - arm64 - - ppc64le - - s390x - - key: kubernetes.io/os - operator: In - values: - - linux - containers: - - name: kube-rbac-proxy - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - "ALL" - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 - args: - - "--secure-listen-address=0.0.0.0:8443" - - "--upstream=http://127.0.0.1:8080/" - - "--logtostderr=true" - - "--v=0" - ports: - - containerPort: 8443 - protocol: TCP - name: https - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 5m - memory: 64Mi diff --git a/config/default/manager_config_dev_patch.yaml b/config/default/manager_config_dev_patch.yaml index 1438795..9cd3fca 100644 --- a/config/default/manager_config_dev_patch.yaml +++ b/config/default/manager_config_dev_patch.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/config/default/manager_config_patch.yaml b/config/default/manager_config_patch.yaml index 34ffc17..8d7716d 100644 --- a/config/default/manager_config_patch.yaml +++ b/config/default/manager_config_patch.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/config/default/manager_webhook_patch.yaml b/config/default/manager_webhook_patch.yaml index d71dd8f..09f1a61 100644 --- a/config/default/manager_webhook_patch.yaml +++ b/config/default/manager_webhook_patch.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: apps/v1 kind: Deployment metadata: diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml deleted file mode 100644 index f4ab2ed..0000000 --- a/config/default/webhookcainjection_patch.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# This patch add annotation to admission webhook config and -# CERTIFICATE_NAMESPACE and CERTIFICATE_NAME will be substituted by kustomize -apiVersion: admissionregistration.k8s.io/v1 -kind: MutatingWebhookConfiguration -metadata: - labels: - app.kubernetes.io/name: mutatingwebhookconfiguration - app.kubernetes.io/instance: mutating-webhook-configuration - app.kubernetes.io/component: webhook - app.kubernetes.io/created-by: kube-startup-cpu-boost - app.kubernetes.io/part-of: kube-startup-cpu-boost - app.kubernetes.io/managed-by: kustomize - name: mutating-webhook-configuration - annotations: - cert-manager.io/inject-ca-from: CERTIFICATE_NAMESPACE/CERTIFICATE_NAME ---- -apiVersion: admissionregistration.k8s.io/v1 -kind: ValidatingWebhookConfiguration -metadata: - labels: - app.kubernetes.io/name: validatingwebhookconfiguration - app.kubernetes.io/instance: validating-webhook-configuration - app.kubernetes.io/component: webhook - app.kubernetes.io/created-by: kube-startup-cpu-boost - app.kubernetes.io/part-of: kube-startup-cpu-boost - app.kubernetes.io/managed-by: kustomize - name: validating-webhook-configuration - annotations: - cert-manager.io/inject-ca-from: CERTIFICATE_NAMESPACE/CERTIFICATE_NAME diff --git a/config/prometheus/gmp.yaml b/config/prometheus/gmp.yaml new file mode 100644 index 0000000..176fb94 --- /dev/null +++ b/config/prometheus/gmp.yaml @@ -0,0 +1,35 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Google Managed Prometheus collection +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: + app.kubernetes.io/instance: controller-manager-metrics-monitor + app.kubernetes.io/component: metrics + app.kubernetes.io/created-by: kube-startup-cpu-boost + app.kubernetes.io/part-of: kube-startup-cpu-boost + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-monitor + namespace: system +spec: + selector: + matchLabels: + control-plane: controller-manager + endpoints: + - port: 8080 + interval: 30s diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml index ed13716..4b066c1 100644 --- a/config/prometheus/kustomization.yaml +++ b/config/prometheus/kustomization.yaml @@ -1,2 +1,21 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Uncomment the desired metrics collection method resources: -- monitor.yaml +# For Google Managed Prometheus PodMonitoring +- gmp.yaml + +# For Prometheus ServiceMonitor +#- monitor.yaml \ No newline at end of file diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index aeec2e0..599a4b0 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -1,3 +1,16 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # Prometheus Monitor Service (Metrics) apiVersion: monitoring.coreos.com/v1 @@ -15,12 +28,7 @@ metadata: namespace: system spec: endpoints: - - path: /metrics - port: https - scheme: https - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - tlsConfig: - insecureSkipVerify: true + - targetPort: 8080 selector: matchLabels: control-plane: controller-manager diff --git a/config/samples/autoscaling_v1alpha1_startupcpuboost.yaml b/config/samples/autoscaling_v1alpha1_startupcpuboost.yaml deleted file mode 100644 index 7b686e0..0000000 --- a/config/samples/autoscaling_v1alpha1_startupcpuboost.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: autoscaling.x-k8s.io/v1alpha1 -kind: StartupCPUBoost -metadata: - labels: - app.kubernetes.io/name: startupcpuboost - app.kubernetes.io/instance: startupcpuboost-sample - app.kubernetes.io/part-of: kube-startup-cpu-boost - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/created-by: kube-startup-cpu-boost - name: startupcpuboost-sample -spec: - # TODO(user): Add fields here diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml deleted file mode 100644 index 5161fdf..0000000 --- a/config/samples/kustomization.yaml +++ /dev/null @@ -1,4 +0,0 @@ -## Append samples of your project ## -resources: -- autoscaling_v1alpha1_startupcpuboost.yaml -#+kubebuilder:scaffold:manifestskustomizesamples diff --git a/go.mod b/go.mod index bbcbdc0..b58060d 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,11 @@ require ( github.com/onsi/ginkgo/v2 v2.16.0 github.com/onsi/gomega v1.31.1 github.com/open-policy-agent/cert-controller v0.10.1 + github.com/prometheus/client_golang v1.19.0 + github.com/prometheus/client_model v0.6.0 + go.uber.org/mock v0.4.0 + go.uber.org/zap v1.27.0 + gomodules.xyz/jsonpatch/v2 v2.4.0 gopkg.in/inf.v0 v0.9.1 k8s.io/api v0.29.2 k8s.io/apimachinery v0.29.2 @@ -43,15 +48,11 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.19.0 // indirect - github.com/prometheus/client_model v0.6.0 // indirect github.com/prometheus/common v0.50.0 // indirect github.com/prometheus/procfs v0.13.0 // indirect github.com/spf13/pflag v1.0.5 // indirect go.uber.org/atomic v1.11.0 // indirect - go.uber.org/mock v0.4.0 go.uber.org/multierr v1.11.0 // indirect - go.uber.org/zap v1.27.0 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect golang.org/x/net v0.23.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect @@ -60,7 +61,6 @@ require ( golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.19.0 // indirect - gomodules.xyz/jsonpatch/v2 v2.4.0 google.golang.org/appengine v1.6.8 // indirect google.golang.org/protobuf v1.33.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/internal/boost/manager.go b/internal/boost/manager.go index d9b62c1..dbb461c 100644 --- a/internal/boost/manager.go +++ b/internal/boost/manager.go @@ -28,6 +28,7 @@ import ( "github.com/go-logr/logr" "github.com/google/kube-startup-cpu-boost/internal/boost/duration" + "github.com/google/kube-startup-cpu-boost/internal/metrics" ctrl "sigs.k8s.io/controller-runtime" ) @@ -118,6 +119,7 @@ func (m *managerImpl) AddStartupCPUBoost(ctx context.Context, boost StartupCPUBo log := m.loggerFromContext(ctx).WithValues("boost", boost.Name, "namespace", boost.Namespace) log.V(5).Info("handling startup-cpu-boost create") m.addStartupCPUBoost(boost) + metrics.NewBoostConfiguration(boost.Namespace()) return nil } @@ -132,6 +134,7 @@ func (m *managerImpl) RemoveStartupCPUBoost(ctx context.Context, namespace, name } key := boostKey{name: name, namespace: namespace} delete(m.timePolicyBoosts, key) + metrics.DeleteBoostConfiguration(namespace) } // StartupCPUBoost returns a startup-cpu-boost with a given name and namespace diff --git a/internal/boost/manager_test.go b/internal/boost/manager_test.go index b141481..fb6aed4 100644 --- a/internal/boost/manager_test.go +++ b/internal/boost/manager_test.go @@ -20,6 +20,7 @@ import ( autoscaling "github.com/google/kube-startup-cpu-boost/api/v1alpha1" cpuboost "github.com/google/kube-startup-cpu-boost/internal/boost" + "github.com/google/kube-startup-cpu-boost/internal/metrics" "github.com/google/kube-startup-cpu-boost/internal/mock" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -32,6 +33,9 @@ import ( var _ = Describe("Manager", func() { var manager cpuboost.Manager + BeforeEach(func() { + metrics.ClearSystemMetrics() + }) Describe("Registers startup-cpu-boost", func() { var ( spec *autoscaling.StartupCPUBoost @@ -69,8 +73,10 @@ var _ = Describe("Manager", func() { Expect(stored.Name()).To(Equal(spec.Name)) Expect(stored.Namespace()).To(Equal(spec.Namespace)) }) + It("updates boost configurations metric", func() { + Expect(metrics.BoostConfigurations(spec.Namespace)).To(Equal(float64(1))) + }) }) - }) Describe("De-registers startup-cpu-boost", func() { var ( @@ -96,6 +102,9 @@ var _ = Describe("Manager", func() { _, ok := manager.StartupCPUBoost(spec.Namespace, spec.Name) Expect(ok).To(BeFalse()) }) + It("updates boost configurations metric", func() { + Expect(metrics.BoostConfigurations(spec.Namespace)).To(Equal(float64(0))) + }) }) }) Describe("retrieves startup-cpu-boost for a POD", func() { diff --git a/internal/boost/startupcpuboost.go b/internal/boost/startupcpuboost.go index a21c204..6e0722c 100644 --- a/internal/boost/startupcpuboost.go +++ b/internal/boost/startupcpuboost.go @@ -26,6 +26,7 @@ import ( "github.com/google/kube-startup-cpu-boost/internal/boost/duration" bpod "github.com/google/kube-startup-cpu-boost/internal/boost/pod" "github.com/google/kube-startup-cpu-boost/internal/boost/resource" + "github.com/google/kube-startup-cpu-boost/internal/metrics" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" ctrl "sigs.k8s.io/controller-runtime" @@ -267,10 +268,13 @@ func (b *StartupCPUBoostImpl) updateStats(e StartupCPUBoostStatsEvent) { activeCnt += boostContainersLen(pod) } b.stats.ActiveContainerBoosts = activeCnt + metrics.SetBoostContainersActive(b.namespace, b.name, float64(activeCnt)) switch e.Type { case StartupCPUBoostStatsPodCreateEvent: pod := e.Object.(*corev1.Pod) - b.stats.TotalContainerBoosts += boostContainersLen(pod) + boostContainersLen := boostContainersLen(pod) + b.stats.TotalContainerBoosts += boostContainersLen + metrics.AddBoostContainersTotal(b.namespace, b.name, float64(boostContainersLen)) } } diff --git a/internal/boost/startupcpuboost_test.go b/internal/boost/startupcpuboost_test.go index a4eb889..86c5089 100644 --- a/internal/boost/startupcpuboost_test.go +++ b/internal/boost/startupcpuboost_test.go @@ -22,6 +22,7 @@ import ( cpuboost "github.com/google/kube-startup-cpu-boost/internal/boost" "github.com/google/kube-startup-cpu-boost/internal/boost/duration" "github.com/google/kube-startup-cpu-boost/internal/boost/resource" + "github.com/google/kube-startup-cpu-boost/internal/metrics" "github.com/google/kube-startup-cpu-boost/internal/mock" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -41,6 +42,7 @@ var _ = Describe("StartupCPUBoost", func() { BeforeEach(func() { pod = podTemplate.DeepCopy() spec = specTemplate.DeepCopy() + metrics.ClearBoostMetrics(spec.Namespace, spec.Name) }) Describe("Instantiates from the API specification", func() { JustBeforeEach(func() { @@ -200,6 +202,10 @@ var _ = Describe("StartupCPUBoost", func() { Expect(stats.ActiveContainerBoosts).To(Equal(2)) Expect(stats.TotalContainerBoosts).To(Equal(2)) }) + It("updates metrics", func() { + Expect(metrics.BoostContainersActive(boost.Namespace(), boost.Name())).To(Equal(float64(2))) + Expect(metrics.BoostContainersTotal(boost.Namespace(), boost.Name())).To(Equal(float64(2))) + }) }) When("POD exists", func() { var existingPod *corev1.Pod @@ -228,6 +234,10 @@ var _ = Describe("StartupCPUBoost", func() { Expect(stats.ActiveContainerBoosts).To(Equal(2)) Expect(stats.TotalContainerBoosts).To(Equal(2)) }) + It("updates metrics", func() { + Expect(metrics.BoostContainersActive(boost.Namespace(), boost.Name())).To(Equal(float64(2))) + Expect(metrics.BoostContainersTotal(boost.Namespace(), boost.Name())).To(Equal(float64(2))) + }) When("boost spec has pod condition policy", func() { BeforeEach(func() { spec.Spec.DurationPolicy.PodCondition = &autoscaling.PodConditionDurationPolicy{ @@ -263,7 +273,6 @@ var _ = Describe("StartupCPUBoost", func() { }) }) }) - Describe("Deletes a pod", func() { JustBeforeEach(func() { boost, err = cpuboost.NewStartupCPUBoost(nil, spec) @@ -284,6 +293,10 @@ var _ = Describe("StartupCPUBoost", func() { Expect(stats.ActiveContainerBoosts).To(Equal(0)) Expect(stats.TotalContainerBoosts).To(Equal(2)) }) + It("updates metrics", func() { + Expect(metrics.BoostContainersActive(boost.Namespace(), boost.Name())).To(Equal(float64(0))) + Expect(metrics.BoostContainersTotal(boost.Namespace(), boost.Name())).To(Equal(float64(2))) + }) }) }) }) diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 0000000..0112926 --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,186 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package metrics provides Kube Startup CPU Boost metrics for Prometheus. +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +const KubeStartupCPUBoostSubsystem = "boost" + +var ( + // boostConfigurations is a number of the container + // boost configurations registered in a boost manager. + boostConfigurations *prometheus.GaugeVec + // boostContainersTotal is a number of a containers which + // CPU resources were increased. + boostContainersTotal *prometheus.CounterVec + // boostContainersActive is a number of a containers which + // CPU resources and not yet reverted to their original values. + boostContainersActive *prometheus.GaugeVec +) + +// init initializes all of the Kube Startup CPU Boost metrics. +func init() { + boostConfigurations = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: KubeStartupCPUBoostSubsystem, + Name: "configurations", + Help: "Number of registered Kube Startup CPU Boost configurations", + }, []string{"namespace"}, + ) + boostContainersTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: KubeStartupCPUBoostSubsystem, + Name: "containers_total", + Help: "Number of a containers which CPU resources were increased", + }, []string{"namespace", "boost"}, + ) + boostContainersActive = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: KubeStartupCPUBoostSubsystem, + Name: "containers_active", + Help: "Number of a containers which CPU resources and not yet reverted to their original values", + }, []string{"namespace", "boost"}, + ) +} + +// Register registers all of the Kube Startup CPU Boost metrics +// in the Prometheus registry. +func Register() { + metrics.Registry.MustRegister( + boostConfigurations, + boostContainersTotal, + boostContainersActive, + ) +} + +// NewBoostConfiguration updates all of the relevant metrics when +// a new boost configuration is created +func NewBoostConfiguration(namespace string) { + boostConfigurations.With( + prometheus.Labels{"namespace": namespace}). + Inc() +} + +// DeleteBoostConfiguration updates all of the relevant metrics when +// a boost configuration is deleted +func DeleteBoostConfiguration(namespace string) { + boostConfigurations.With( + prometheus.Labels{"namespace": namespace}). + Dec() +} + +// SetBoostContainersActive updates the activeContainerBoosts metric +// for a given namespace and boost name with a given value +func SetBoostContainersActive(namespace string, boost string, value float64) { + boostContainersActive.With( + prometheus.Labels{"namespace": namespace, "boost": boost}). + Set(value) +} + +// AddBoostContainersTotal adds the given value to the TotalContainerBoosts +// metric for a given namespace and boost name +func AddBoostContainersTotal(namespace string, boost string, value float64) { + boostContainersTotal.With( + prometheus.Labels{"namespace": namespace, "boost": boost}). + Add(value) +} + +// ClearSystemMetrics clears all of the system metrics. +func ClearSystemMetrics() { + boostConfigurations.Reset() +} + +// ClearBoostMetrics clears all of relevant metrics for given +// namespace and boost +func ClearBoostMetrics(namespace string, boost string) { + boostContainersTotal.Delete( + prometheus.Labels{"namespace": namespace, "boost": boost}, + ) + boostContainersActive.Delete( + prometheus.Labels{"namespace": namespace, "boost": boost}, + ) +} + +// BoostConfigurations returns value for a totalBoostConfigurations +// metric for a given namespace. +func BoostConfigurations(namespace string) float64 { + return gaugeVecValue(boostConfigurations, prometheus.Labels{ + "namespace": namespace, + }) +} + +// BoostContainersTotal returns value for a totalContainerBoosts +// metric for a given namespace and boost name. +func BoostContainersTotal(namespace string, boost string) float64 { + return counterVecValue(boostContainersTotal, prometheus.Labels{ + "namespace": namespace, + "boost": boost, + }) +} + +// BoostContainersActive returns value for a totalContainerBoosts +// metric for a given namespace and boost name. +func BoostContainersActive(namespace string, boost string) float64 { + return gaugeVecValue(boostContainersActive, prometheus.Labels{ + "namespace": namespace, + "boost": boost, + }) +} + +// CounterVecValue collects and returns value for a counterVec +// metric for a given labels. Created for purpose of tests. +func counterVecValue(vec *prometheus.CounterVec, labels prometheus.Labels) (value float64) { + cnt, err := vec.GetMetricWith(labels) + if err != nil { + return + } + collect(cnt, func(m *dto.Metric) { + value += m.GetCounter().GetValue() + }) + return +} + +// GaugeVecValue collects and returns value for a gaugeVec +// metric for a given labels. Created for purpose of tests. +func gaugeVecValue(vec *prometheus.GaugeVec, labels prometheus.Labels) (value float64) { + cnt, err := vec.GetMetricWith(labels) + if err != nil { + return + } + collect(cnt, func(m *dto.Metric) { + value += m.GetGauge().GetValue() + }) + return +} + +// collect collects the given prometheus collector and writes +// corresponding metric to the DTO object for further processing. +func collect(col prometheus.Collector, do func(*dto.Metric)) { + ch := make(chan prometheus.Metric) + go func() { + col.Collect(ch) + close(ch) + }() + for x := range ch { + m := &dto.Metric{} + x.Write(m) + do(m) + } +} diff --git a/internal/metrics/metrics_suite_test.go b/internal/metrics/metrics_suite_test.go new file mode 100644 index 0000000..e703cb9 --- /dev/null +++ b/internal/metrics/metrics_suite_test.go @@ -0,0 +1,27 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestMetrics(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Metrics Suite") +} diff --git a/internal/metrics/metrics_test.go b/internal/metrics/metrics_test.go new file mode 100644 index 0000000..c10858e --- /dev/null +++ b/internal/metrics/metrics_test.go @@ -0,0 +1,88 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics_test + +import ( + "github.com/google/kube-startup-cpu-boost/internal/metrics" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Metrics", func() { + Describe("registers new boost configuration", func() { + var ( + namespace = "default" + ) + BeforeEach(func() { + metrics.ClearSystemMetrics() + }) + JustBeforeEach(func() { + metrics.NewBoostConfiguration(namespace) + metrics.NewBoostConfiguration(namespace) + }) + It("updates the boost configurations metric", func() { + Expect(metrics.BoostConfigurations(namespace)).To(Equal(float64(2))) + }) + }) + Describe("deletes boost configuration", func() { + var ( + namespace = "default" + ) + BeforeEach(func() { + metrics.ClearSystemMetrics() + }) + JustBeforeEach(func() { + metrics.NewBoostConfiguration(namespace) + metrics.NewBoostConfiguration(namespace) + metrics.DeleteBoostConfiguration(namespace) + }) + It("updates the boost configurations metric", func() { + Expect(metrics.BoostConfigurations(namespace)).To(Equal(float64(1))) + }) + }) + Describe("sets active container boost metric", func() { + var ( + namespace = "default" + boost = "boost-01" + value = float64(5) + ) + BeforeEach(func() { + metrics.ClearBoostMetrics(namespace, boost) + }) + JustBeforeEach(func() { + metrics.SetBoostContainersActive(namespace, boost, value) + }) + It("updates the active container boosts metric", func() { + Expect(metrics.BoostContainersActive(namespace, boost)).To(Equal(value)) + }) + }) + Describe("adds total container boost metric", func() { + var ( + namespace = "default" + boost = "boost-01" + value = float64(5) + ) + BeforeEach(func() { + metrics.ClearBoostMetrics(namespace, boost) + }) + JustBeforeEach(func() { + metrics.AddBoostContainersTotal(namespace, boost, 3) + metrics.AddBoostContainersTotal(namespace, boost, value) + }) + It("updates the total container boosts metric", func() { + Expect(metrics.BoostContainersTotal(namespace, boost)).To(Equal(float64(8))) + }) + }) +})