Skip to content

Commit db73121

Browse files
Enable metrics provider
1 parent b6c5897 commit db73121

File tree

10 files changed

+277
-74
lines changed

10 files changed

+277
-74
lines changed

pkg/config/controller.go

+5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"time"
2121

2222
"github.com/go-logr/logr"
23+
"sigs.k8s.io/controller-runtime/pkg/metrics"
2324
)
2425

2526
// Controller contains configuration options for controllers. It only includes options
@@ -66,6 +67,10 @@ type Controller struct {
6667
// Note: This flag is disabled by default until a future version. It's currently in beta.
6768
UsePriorityQueue *bool
6869

70+
// MetricsProvider allows users to override the location where controller metrics are emitted.
71+
// By default, metrics are emitted to a pre-configured Prometheus registry
72+
MetricsProvider metrics.ControllerMetricsProvider
73+
6974
// Logger is the logger controllers should use.
7075
Logger logr.Logger
7176
}

pkg/controller/controller.go

+14
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"k8s.io/client-go/util/workqueue"
2626
"k8s.io/klog/v2"
2727
"k8s.io/utils/ptr"
28+
"sigs.k8s.io/controller-runtime/pkg/metrics"
2829

2930
"sigs.k8s.io/controller-runtime/pkg/config"
3031
"sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue"
@@ -84,6 +85,10 @@ type TypedOptions[request comparable] struct {
8485
// Logger will be used to build a default LogConstructor if unset.
8586
Logger logr.Logger
8687

88+
// MetricsProvider allows users to override the location where controller metrics are emitted.
89+
// By default, metrics are emitted to a pre-configured Prometheus registry
90+
MetricsProvider metrics.ControllerMetricsProvider
91+
8792
// LogConstructor is used to construct a logger used for this controller and passed
8893
// to each reconciliation via the context field.
8994
LogConstructor func(request *request) logr.Logger
@@ -101,6 +106,10 @@ func (options *TypedOptions[request]) DefaultFromConfig(config config.Controller
101106
options.Logger = config.Logger
102107
}
103108

109+
if options.MetricsProvider == nil {
110+
options.MetricsProvider = config.MetricsProvider
111+
}
112+
104113
if options.SkipNameValidation == nil {
105114
options.SkipNameValidation = config.SkipNameValidation
106115
}
@@ -196,6 +205,10 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req
196205
}
197206
}
198207

208+
if options.MetricsProvider == nil {
209+
options.MetricsProvider = metrics.NewPrometheusProvider()
210+
}
211+
199212
if options.LogConstructor == nil {
200213
log := options.Logger.WithValues(
201214
"controller", name,
@@ -250,6 +263,7 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req
250263
MaxConcurrentReconciles: options.MaxConcurrentReconciles,
251264
CacheSyncTimeout: options.CacheSyncTimeout,
252265
Name: name,
266+
MetricsProvider: options.MetricsProvider,
253267
LogConstructor: options.LogConstructor,
254268
RecoverPanic: options.RecoverPanic,
255269
LeaderElected: options.NeedLeaderElection,

pkg/internal/controller/controller.go

+29-24
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ import (
3030
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
3131
"k8s.io/apimachinery/pkg/util/uuid"
3232
"k8s.io/client-go/util/workqueue"
33+
"sigs.k8s.io/controller-runtime/pkg/metrics"
3334

3435
"sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue"
35-
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics"
3636
logf "sigs.k8s.io/controller-runtime/pkg/log"
3737
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3838
"sigs.k8s.io/controller-runtime/pkg/source"
@@ -89,6 +89,9 @@ type Controller[request comparable] struct {
8989
// outside the context of a reconciliation.
9090
LogConstructor func(request *request) logr.Logger
9191

92+
// MetricsProvider is used to route metrics that are fired due to controller reconciles
93+
MetricsProvider metrics.ControllerMetricsProvider
94+
9295
// RecoverPanic indicates whether the panic caused by reconcile should be recovered.
9396
// Defaults to true.
9497
RecoverPanic *bool
@@ -101,7 +104,7 @@ type Controller[request comparable] struct {
101104
func (c *Controller[request]) Reconcile(ctx context.Context, req request) (_ reconcile.Result, err error) {
102105
defer func() {
103106
if r := recover(); r != nil {
104-
ctrlmetrics.ReconcilePanics.WithLabelValues(c.Name).Inc()
107+
c.MetricsProvider.ReconcilePanics().Inc(map[string]string{labelKeyController: c.Name})
105108

106109
if c.RecoverPanic == nil || *c.RecoverPanic {
107110
for _, fn := range utilruntime.PanicHandlers {
@@ -294,30 +297,32 @@ func (c *Controller[request]) processNextWorkItem(ctx context.Context) bool {
294297
// period.
295298
defer c.Queue.Done(obj)
296299

297-
ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Add(1)
298-
defer ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Add(-1)
300+
c.MetricsProvider.ActiveWorkers().Add(map[string]string{labelKeyController: c.Name}, 1)
301+
defer c.MetricsProvider.ActiveWorkers().Add(map[string]string{labelKeyController: c.Name}, -1)
299302

300303
c.reconcileHandler(ctx, obj, priority)
301304
return true
302305
}
303306

304307
const (
305-
labelError = "error"
306-
labelRequeueAfter = "requeue_after"
307-
labelRequeue = "requeue"
308-
labelSuccess = "success"
308+
labelKeyController = "controller"
309+
labelKeyResult = "result"
310+
labelError = "error"
311+
labelRequeueAfter = "requeue_after"
312+
labelRequeue = "requeue"
313+
labelSuccess = "success"
309314
)
310315

311316
func (c *Controller[request]) initMetrics() {
312-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Add(0)
313-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeueAfter).Add(0)
314-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeue).Add(0)
315-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelSuccess).Add(0)
316-
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Add(0)
317-
ctrlmetrics.TerminalReconcileErrors.WithLabelValues(c.Name).Add(0)
318-
ctrlmetrics.ReconcilePanics.WithLabelValues(c.Name).Add(0)
319-
ctrlmetrics.WorkerCount.WithLabelValues(c.Name).Set(float64(c.MaxConcurrentReconciles))
320-
ctrlmetrics.ActiveWorkers.WithLabelValues(c.Name).Set(0)
317+
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelError}, 0)
318+
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeueAfter}, 0)
319+
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeue}, 0)
320+
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelSuccess}, 0)
321+
c.MetricsProvider.ReconcileErrors().Add(map[string]string{labelKeyController: c.Name}, 0)
322+
c.MetricsProvider.TerminalReconcileErrors().Add(map[string]string{labelKeyController: c.Name}, 0)
323+
c.MetricsProvider.ReconcilePanics().Add(map[string]string{labelKeyController: c.Name}, 0)
324+
c.MetricsProvider.WorkerCount().Set(map[string]string{labelKeyController: c.Name}, float64(c.MaxConcurrentReconciles))
325+
c.MetricsProvider.ActiveWorkers().Set(map[string]string{labelKeyController: c.Name}, 0)
321326
}
322327

323328
func (c *Controller[request]) reconcileHandler(ctx context.Context, req request, priority int) {
@@ -341,12 +346,12 @@ func (c *Controller[request]) reconcileHandler(ctx context.Context, req request,
341346
switch {
342347
case err != nil:
343348
if errors.Is(err, reconcile.TerminalError(nil)) {
344-
ctrlmetrics.TerminalReconcileErrors.WithLabelValues(c.Name).Inc()
349+
c.MetricsProvider.TerminalReconcileErrors().Inc(map[string]string{"controller": c.Name})
345350
} else {
346351
c.Queue.AddWithOpts(priorityqueue.AddOpts{RateLimited: true, Priority: priority}, req)
347352
}
348-
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
349-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Inc()
353+
c.MetricsProvider.ReconcileErrors().Inc(map[string]string{labelKeyController: c.Name})
354+
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelError})
350355
if !result.IsZero() {
351356
log.Info("Warning: Reconciler returned both a non-zero result and a non-nil error. The result will always be ignored if the error is non-nil and the non-nil error causes requeuing with exponential backoff. For more details, see: https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/reconcile#Reconciler")
352357
}
@@ -359,17 +364,17 @@ func (c *Controller[request]) reconcileHandler(ctx context.Context, req request,
359364
// to result.RequestAfter
360365
c.Queue.Forget(req)
361366
c.Queue.AddWithOpts(priorityqueue.AddOpts{After: result.RequeueAfter, Priority: priority}, req)
362-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeueAfter).Inc()
367+
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeueAfter})
363368
case result.Requeue: //nolint: staticcheck // We have to handle it until it is removed
364369
log.V(5).Info("Reconcile done, requeueing")
365370
c.Queue.AddWithOpts(priorityqueue.AddOpts{RateLimited: true, Priority: priority}, req)
366-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeue).Inc()
371+
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeue})
367372
default:
368373
log.V(5).Info("Reconcile successful")
369374
// Finally, if no error occurs we Forget this item so it does not
370375
// get queued again until another change happens.
371376
c.Queue.Forget(req)
372-
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelSuccess).Inc()
377+
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelSuccess})
373378
}
374379
}
375380

@@ -380,7 +385,7 @@ func (c *Controller[request]) GetLogger() logr.Logger {
380385

381386
// updateMetrics updates prometheus metrics within the controller.
382387
func (c *Controller[request]) updateMetrics(reconcileTime time.Duration) {
383-
ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds())
388+
c.MetricsProvider.ReconcileTime().Observe(map[string]string{labelKeyController: c.Name}, reconcileTime.Seconds())
384389
}
385390

386391
// ReconcileIDFromContext gets the reconcileID from the current context.

pkg/internal/controller/controller_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"sigs.k8s.io/controller-runtime/pkg/handler"
4444
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics"
4545
"sigs.k8s.io/controller-runtime/pkg/internal/log"
46+
"sigs.k8s.io/controller-runtime/pkg/metrics"
4647
"sigs.k8s.io/controller-runtime/pkg/reconcile"
4748
"sigs.k8s.io/controller-runtime/pkg/source"
4849
)
@@ -75,6 +76,7 @@ var _ = Describe("controller", func() {
7576
NewQueue: func(string, workqueue.TypedRateLimiter[reconcile.Request]) workqueue.TypedRateLimitingInterface[reconcile.Request] {
7677
return queue
7778
},
79+
MetricsProvider: metrics.NewPrometheusProvider(),
7880
LogConstructor: func(_ *reconcile.Request) logr.Logger {
7981
return log.RuntimeLog.WithName("controller").WithName("test")
8082
},
@@ -354,6 +356,7 @@ var _ = Describe("controller", func() {
354356
NewQueue: func(string, workqueue.TypedRateLimiter[TestRequest]) workqueue.TypedRateLimitingInterface[TestRequest] {
355357
return queue
356358
},
359+
MetricsProvider: metrics.NewPrometheusProvider(),
357360
LogConstructor: func(*TestRequest) logr.Logger {
358361
return log.RuntimeLog.WithName("controller").WithName("test")
359362
},

pkg/internal/controller/metrics/metrics.go

-18
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ import (
2020
"time"
2121

2222
"github.com/prometheus/client_golang/prometheus"
23-
"github.com/prometheus/client_golang/prometheus/collectors"
24-
"sigs.k8s.io/controller-runtime/pkg/metrics"
2523
)
2624

2725
var (
@@ -81,19 +79,3 @@ var (
8179
Help: "Number of currently used workers per controller",
8280
}, []string{"controller"})
8381
)
84-
85-
func init() {
86-
metrics.Registry.MustRegister(
87-
ReconcileTotal,
88-
ReconcileErrors,
89-
TerminalReconcileErrors,
90-
ReconcilePanics,
91-
ReconcileTime,
92-
WorkerCount,
93-
ActiveWorkers,
94-
// expose process metrics like CPU, Memory, file descriptor usage etc.
95-
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
96-
// expose all Go runtime metrics like GC stats, memory stats etc.
97-
collectors.NewGoCollector(collectors.WithGoCollectorRuntimeMetrics(collectors.MetricsAll)),
98-
)
99-
}

pkg/manager/internal.go

-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ import (
3535
"k8s.io/client-go/tools/leaderelection"
3636
"k8s.io/client-go/tools/leaderelection/resourcelock"
3737
"k8s.io/client-go/tools/record"
38-
3938
"sigs.k8s.io/controller-runtime/pkg/cache"
4039
"sigs.k8s.io/controller-runtime/pkg/client"
4140
"sigs.k8s.io/controller-runtime/pkg/cluster"

pkg/manager/manager.go

+10
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"k8s.io/client-go/tools/leaderelection/resourcelock"
3434
"k8s.io/client-go/tools/record"
3535
"k8s.io/utils/ptr"
36+
"sigs.k8s.io/controller-runtime/pkg/metrics"
3637
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
3738

3839
"sigs.k8s.io/controller-runtime/pkg/cache"
@@ -223,6 +224,10 @@ type Options struct {
223224
// Metrics are the metricsserver.Options that will be used to create the metricsserver.Server.
224225
Metrics metricsserver.Options
225226

227+
// LeaderEelectionMetricProvider allows users to override the location where leader election metrics are emitted.
228+
// By default, metrics are emitted to a pre-configured Prometheus registry
229+
LeaderElectionMetricProvider metrics.LeaderElectionMetricsProvider
230+
226231
// HealthProbeBindAddress is the TCP address that the controller should bind to
227232
// for serving health probes
228233
// It can be set to "0" or "" to disable serving the health probe.
@@ -401,6 +406,11 @@ func New(config *rest.Config, options Options) (Manager, error) {
401406
if err != nil {
402407
return nil, err
403408
}
409+
leaderElectionMetricsProvider := options.LeaderElectionMetricProvider
410+
if leaderElectionMetricsProvider == nil {
411+
leaderElectionMetricsProvider = metrics.NewPrometheusProvider()
412+
}
413+
metrics.SetLeaderElectionProvider(leaderElectionMetricsProvider)
404414

405415
// Create health probes listener. This will throw an error if the bind
406416
// address is invalid or already in use.

pkg/metrics/leaderelection.go

+17-30
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,34 @@
11
package metrics
22

33
import (
4-
"github.com/prometheus/client_golang/prometheus"
54
"k8s.io/client-go/tools/leaderelection"
65
)
76

8-
// This file is copied and adapted from k8s.io/component-base/metrics/prometheus/clientgo/leaderelection
9-
// which registers metrics to the k8s legacy Registry. We require very
10-
// similar functionality, but must register metrics to a different Registry.
11-
12-
var (
13-
leaderGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
14-
Name: "leader_election_master_status",
15-
Help: "Gauge of if the reporting system is master of the relevant lease, 0 indicates backup, 1 indicates master. 'name' is the string used to identify the lease. Please make sure to group by name.",
16-
}, []string{"name"})
17-
18-
leaderSlowpathCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
19-
Name: "leader_election_slowpath_total",
20-
Help: "Total number of slow path exercised in renewing leader leases. 'name' is the string used to identify the lease. Please make sure to group by name.",
21-
}, []string{"name"})
22-
)
23-
24-
func init() {
25-
Registry.MustRegister(leaderGauge)
26-
leaderelection.SetProvider(leaderelectionMetricsProvider{})
7+
// SetLeaderElectionProvider sets the leader election provider leveraged by client-go
8+
func SetLeaderElectionProvider(provider LeaderElectionMetricsProvider) {
9+
leaderelection.SetProvider(leaderElectionMetricsProvider{provider: provider})
2710
}
2811

29-
type leaderelectionMetricsProvider struct{}
12+
type leaderElectionMetricsProvider struct {
13+
provider LeaderElectionMetricsProvider
14+
}
3015

31-
func (leaderelectionMetricsProvider) NewLeaderMetric() leaderelection.LeaderMetric {
32-
return leaderElectionPrometheusAdapter{}
16+
func (l leaderElectionMetricsProvider) NewLeaderMetric() leaderelection.LeaderMetric {
17+
return leaderElectionMetricAdapter(l)
3318
}
3419

35-
type leaderElectionPrometheusAdapter struct{}
20+
type leaderElectionMetricAdapter struct {
21+
provider LeaderElectionMetricsProvider
22+
}
3623

37-
func (s leaderElectionPrometheusAdapter) On(name string) {
38-
leaderGauge.WithLabelValues(name).Set(1.0)
24+
func (l leaderElectionMetricAdapter) On(name string) {
25+
l.provider.LeaderGauge().Set(map[string]string{"name": name}, 1)
3926
}
4027

41-
func (s leaderElectionPrometheusAdapter) Off(name string) {
42-
leaderGauge.WithLabelValues(name).Set(0.0)
28+
func (l leaderElectionMetricAdapter) Off(name string) {
29+
l.provider.LeaderGauge().Set(map[string]string{"name": name}, 0)
4330
}
4431

45-
func (leaderElectionPrometheusAdapter) SlowpathExercised(name string) {
46-
leaderSlowpathCounter.WithLabelValues(name).Inc()
32+
func (l leaderElectionMetricAdapter) SlowpathExercised(name string) {
33+
l.provider.SlowpathExercised().Inc(map[string]string{"name": name})
4734
}

0 commit comments

Comments
 (0)