Skip to content

Commit 9d9a6f4

Browse files
Use a global setter
1 parent db73121 commit 9d9a6f4

File tree

15 files changed

+473
-503
lines changed

15 files changed

+473
-503
lines changed

pkg/config/controller.go

-5
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"time"
2121

2222
"github.com/go-logr/logr"
23-
"sigs.k8s.io/controller-runtime/pkg/metrics"
2423
)
2524

2625
// Controller contains configuration options for controllers. It only includes options
@@ -67,10 +66,6 @@ type Controller struct {
6766
// Note: This flag is disabled by default until a future version. It's currently in beta.
6867
UsePriorityQueue *bool
6968

70-
// MetricsProvider allows users to override the location where controller metrics are emitted.
71-
// By default, metrics are emitted to a pre-configured Prometheus registry
72-
MetricsProvider metrics.ControllerMetricsProvider
73-
7469
// Logger is the logger controllers should use.
7570
Logger logr.Logger
7671
}

pkg/controller/controller.go

-14
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"k8s.io/client-go/util/workqueue"
2626
"k8s.io/klog/v2"
2727
"k8s.io/utils/ptr"
28-
"sigs.k8s.io/controller-runtime/pkg/metrics"
2928

3029
"sigs.k8s.io/controller-runtime/pkg/config"
3130
"sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue"
@@ -85,10 +84,6 @@ type TypedOptions[request comparable] struct {
8584
// Logger will be used to build a default LogConstructor if unset.
8685
Logger logr.Logger
8786

88-
// MetricsProvider allows users to override the location where controller metrics are emitted.
89-
// By default, metrics are emitted to a pre-configured Prometheus registry
90-
MetricsProvider metrics.ControllerMetricsProvider
91-
9287
// LogConstructor is used to construct a logger used for this controller and passed
9388
// to each reconciliation via the context field.
9489
LogConstructor func(request *request) logr.Logger
@@ -106,10 +101,6 @@ func (options *TypedOptions[request]) DefaultFromConfig(config config.Controller
106101
options.Logger = config.Logger
107102
}
108103

109-
if options.MetricsProvider == nil {
110-
options.MetricsProvider = config.MetricsProvider
111-
}
112-
113104
if options.SkipNameValidation == nil {
114105
options.SkipNameValidation = config.SkipNameValidation
115106
}
@@ -205,10 +196,6 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req
205196
}
206197
}
207198

208-
if options.MetricsProvider == nil {
209-
options.MetricsProvider = metrics.NewPrometheusProvider()
210-
}
211-
212199
if options.LogConstructor == nil {
213200
log := options.Logger.WithValues(
214201
"controller", name,
@@ -263,7 +250,6 @@ func NewTypedUnmanaged[request comparable](name string, options TypedOptions[req
263250
MaxConcurrentReconciles: options.MaxConcurrentReconciles,
264251
CacheSyncTimeout: options.CacheSyncTimeout,
265252
Name: name,
266-
MetricsProvider: options.MetricsProvider,
267253
LogConstructor: options.LogConstructor,
268254
RecoverPanic: options.RecoverPanic,
269255
LeaderElected: options.NeedLeaderElection,

pkg/controller/metrics/metrics.go

+179
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package metrics
2+
3+
import (
4+
"time"
5+
6+
"github.com/prometheus/client_golang/prometheus"
7+
internalmetrics "sigs.k8s.io/controller-runtime/pkg/internal/metrics"
8+
"sigs.k8s.io/controller-runtime/pkg/metrics"
9+
)
10+
11+
var (
12+
// reconcileTotal is a prometheus counter metrics which holds the total
13+
// number of reconciliations per controller. It has two labels. controller label refers
14+
// to the controller name and result label refers to the reconcile result i.e
15+
// success, error, requeue, requeue_after.
16+
reconcileTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
17+
Name: "controller_runtime_reconcile_total",
18+
Help: "Total number of reconciliations per controller",
19+
}, []string{"controller", "result"})
20+
21+
// reconcileErrors is a prometheus counter metrics which holds the total
22+
// number of errors from the Reconciler.
23+
reconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
24+
Name: "controller_runtime_reconcile_errors_total",
25+
Help: "Total number of reconciliation errors per controller",
26+
}, []string{"controller"})
27+
28+
// terminalReconcileErrors is a prometheus counter metrics which holds the total
29+
// number of terminal errors from the Reconciler.
30+
terminalReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
31+
Name: "controller_runtime_terminal_reconcile_errors_total",
32+
Help: "Total number of terminal reconciliation errors per controller",
33+
}, []string{"controller"})
34+
35+
// reconcilePanics is a prometheus counter metrics which holds the total
36+
// number of panics from the Reconciler.
37+
reconcilePanics = prometheus.NewCounterVec(prometheus.CounterOpts{
38+
Name: "controller_runtime_reconcile_panics_total",
39+
Help: "Total number of reconciliation panics per controller",
40+
}, []string{"controller"})
41+
42+
// reconcileTime is a prometheus metric which keeps track of the duration
43+
// of reconciliations.
44+
reconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
45+
Name: "controller_runtime_reconcile_time_seconds",
46+
Help: "Length of time per reconciliation per controller",
47+
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
48+
1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 40, 50, 60},
49+
NativeHistogramBucketFactor: 1.1,
50+
NativeHistogramMaxBucketNumber: 100,
51+
NativeHistogramMinResetDuration: 1 * time.Hour,
52+
}, []string{"controller"})
53+
54+
// workerCount is a prometheus metric which holds the number of
55+
// concurrent reconciles per controller.
56+
workerCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
57+
Name: "controller_runtime_max_concurrent_reconciles",
58+
Help: "Maximum number of concurrent reconciles per controller",
59+
}, []string{"controller"})
60+
61+
// activeWorkers is a prometheus metric which holds the number
62+
// of active workers per controller.
63+
activeWorkers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
64+
Name: "controller_runtime_active_workers",
65+
Help: "Number of currently used workers per controller",
66+
}, []string{"controller"})
67+
)
68+
69+
// ControllerMetricsProvider is an interface that provides methods for firing controller metrics
70+
type ControllerMetricsProvider interface {
71+
// ReconcileTotal is a prometheus counter metrics which holds the total
72+
// number of reconciliations per controller. It has two labels. controller label refers
73+
// to the controller name and result label refers to the reconcile result i.e
74+
// success, error, requeue, requeue_after.
75+
ReconcileTotal() internalmetrics.CounterMetric
76+
// ReconcileErrors is a prometheus counter metrics which holds the total
77+
// number of errors from the Reconciler.
78+
ReconcileErrors() internalmetrics.CounterMetric
79+
// TerminalReconcileErrors is a prometheus counter metrics which holds the total
80+
// number of terminal errors from the Reconciler.
81+
TerminalReconcileErrors() internalmetrics.CounterMetric
82+
// ReconcilePanics is a prometheus counter metrics which holds the total
83+
// number of panics from the Reconciler.
84+
ReconcilePanics() internalmetrics.CounterMetric
85+
// ReconcileTime is a prometheus metric which keeps track of the duration
86+
// of reconciliations.
87+
ReconcileTime() internalmetrics.ObservationMetric
88+
// WorkerCount is a prometheus metric which holds the number of
89+
// concurrent reconciles per controller.
90+
WorkerCount() internalmetrics.GaugeMetric
91+
// ActiveWorkers is a prometheus metric which holds the number
92+
// of active workers per controller.
93+
ActiveWorkers() internalmetrics.GaugeMetric
94+
}
95+
96+
// PrometheusProvider is a metrics.ControllerMetricsProvider and a metrics.LeaderElectionMetricsProvider
97+
// that registers and fires prometheus metrics in response to leader election and controller events
98+
type PrometheusProvider struct {
99+
reconcileTotal *prometheus.CounterVec
100+
reconcileErrors *prometheus.CounterVec
101+
terminalReconcileErrors *prometheus.CounterVec
102+
reconcilePanics *prometheus.CounterVec
103+
reconcileTime *prometheus.HistogramVec
104+
workerCount *prometheus.GaugeVec
105+
activeWorkers *prometheus.GaugeVec
106+
}
107+
108+
// NewPrometheusProvider creates a PrometheusProvider
109+
func NewPrometheusProvider() *PrometheusProvider {
110+
return &PrometheusProvider{
111+
reconcileTotal: reconcileTotal,
112+
reconcileErrors: reconcileErrors,
113+
terminalReconcileErrors: terminalReconcileErrors,
114+
reconcilePanics: reconcilePanics,
115+
reconcileTime: reconcileTime,
116+
workerCount: workerCount,
117+
activeWorkers: activeWorkers,
118+
}
119+
}
120+
121+
// ReconcileTotal returns a Prometheus counter that fulfills the CounterMetric interface
122+
func (p PrometheusProvider) ReconcileTotal() internalmetrics.CounterMetric {
123+
return &internalmetrics.PrometheusCounterAdapter{CounterVec: p.reconcileTotal}
124+
}
125+
126+
// ReconcileErrors returns a Prometheus counter that fulfills the CounterMetric interface
127+
func (p PrometheusProvider) ReconcileErrors() internalmetrics.CounterMetric {
128+
return &internalmetrics.PrometheusCounterAdapter{CounterVec: p.reconcileErrors}
129+
}
130+
131+
// TerminalReconcileErrors returns a Prometheus counter that fulfills the CounterMetric interface
132+
func (p PrometheusProvider) TerminalReconcileErrors() internalmetrics.CounterMetric {
133+
return &internalmetrics.PrometheusCounterAdapter{CounterVec: p.terminalReconcileErrors}
134+
}
135+
136+
// ReconcilePanics returns a Prometheus counter that fulfills the CounterMetric interface
137+
func (p PrometheusProvider) ReconcilePanics() internalmetrics.CounterMetric {
138+
return &internalmetrics.PrometheusCounterAdapter{CounterVec: p.reconcilePanics}
139+
}
140+
141+
// ReconcileTime returns a Prometheus histogram that fulfills the ObservationMetric interface
142+
func (p PrometheusProvider) ReconcileTime() internalmetrics.ObservationMetric {
143+
return &internalmetrics.PrometheusHistogramAdapter{HistogramVec: p.reconcileTime}
144+
}
145+
146+
// WorkerCount returns a Prometheus gauge that fulfills the GaugeMetric interface
147+
func (p PrometheusProvider) WorkerCount() internalmetrics.GaugeMetric {
148+
return &internalmetrics.PrometheusGaugeAdapter{GaugeVec: p.workerCount}
149+
}
150+
151+
// ActiveWorkers returns a Prometheus gauge that fulfills the GaugeMetric interface
152+
func (p PrometheusProvider) ActiveWorkers() internalmetrics.GaugeMetric {
153+
return &internalmetrics.PrometheusGaugeAdapter{GaugeVec: p.activeWorkers}
154+
}
155+
156+
func init() {
157+
metrics.Registry.MustRegister(
158+
reconcileTotal,
159+
reconcileErrors,
160+
terminalReconcileErrors,
161+
reconcilePanics,
162+
reconcileTime,
163+
workerCount,
164+
activeWorkers,
165+
)
166+
}
167+
168+
var controllerMetricsProvider ControllerMetricsProvider = NewPrometheusProvider()
169+
170+
// SetControllerMetricsProvider assigns a provider to the ControllerMetricsProvider for exposing controller metrics.
171+
// The PrometheusProvider will be used by default if the provider is not overridden
172+
func SetControllerMetricsProvider(provider ControllerMetricsProvider) {
173+
controllerMetricsProvider = provider
174+
}
175+
176+
// GetControllerMetricsProvider returns the controller metrics provider being used by the controller reconciliation
177+
func GetControllerMetricsProvider() ControllerMetricsProvider {
178+
return controllerMetricsProvider
179+
}

pkg/controller/priorityqueue/metrics.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66

77
"k8s.io/client-go/util/workqueue"
88
"k8s.io/utils/clock"
9-
"sigs.k8s.io/controller-runtime/pkg/internal/metrics"
9+
"sigs.k8s.io/controller-runtime/pkg/metrics"
1010
)
1111

1212
// This file is mostly a copy of unexported code from

pkg/controller/priorityqueue/metrics_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import (
44
"sync"
55

66
"k8s.io/client-go/util/workqueue"
7-
"sigs.k8s.io/controller-runtime/pkg/internal/metrics"
7+
"sigs.k8s.io/controller-runtime/pkg/metrics"
88
)
99

1010
func newFakeMetricsProvider() *fakeMetricsProvider {

pkg/controller/priorityqueue/priorityqueue.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
"k8s.io/utils/clock"
1313
"k8s.io/utils/ptr"
1414

15-
"sigs.k8s.io/controller-runtime/pkg/internal/metrics"
15+
"sigs.k8s.io/controller-runtime/pkg/metrics"
1616
)
1717

1818
// AddOpts describes the options for adding items to the queue.

pkg/internal/controller/controller.go

+20-23
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import (
3030
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
3131
"k8s.io/apimachinery/pkg/util/uuid"
3232
"k8s.io/client-go/util/workqueue"
33-
"sigs.k8s.io/controller-runtime/pkg/metrics"
33+
"sigs.k8s.io/controller-runtime/pkg/controller/metrics"
3434

3535
"sigs.k8s.io/controller-runtime/pkg/controller/priorityqueue"
3636
logf "sigs.k8s.io/controller-runtime/pkg/log"
@@ -89,9 +89,6 @@ type Controller[request comparable] struct {
8989
// outside the context of a reconciliation.
9090
LogConstructor func(request *request) logr.Logger
9191

92-
// MetricsProvider is used to route metrics that are fired due to controller reconciles
93-
MetricsProvider metrics.ControllerMetricsProvider
94-
9592
// RecoverPanic indicates whether the panic caused by reconcile should be recovered.
9693
// Defaults to true.
9794
RecoverPanic *bool
@@ -104,7 +101,7 @@ type Controller[request comparable] struct {
104101
func (c *Controller[request]) Reconcile(ctx context.Context, req request) (_ reconcile.Result, err error) {
105102
defer func() {
106103
if r := recover(); r != nil {
107-
c.MetricsProvider.ReconcilePanics().Inc(map[string]string{labelKeyController: c.Name})
104+
metrics.GetControllerMetricsProvider().ReconcilePanics().Inc(map[string]string{labelKeyController: c.Name})
108105

109106
if c.RecoverPanic == nil || *c.RecoverPanic {
110107
for _, fn := range utilruntime.PanicHandlers {
@@ -297,8 +294,8 @@ func (c *Controller[request]) processNextWorkItem(ctx context.Context) bool {
297294
// period.
298295
defer c.Queue.Done(obj)
299296

300-
c.MetricsProvider.ActiveWorkers().Add(map[string]string{labelKeyController: c.Name}, 1)
301-
defer c.MetricsProvider.ActiveWorkers().Add(map[string]string{labelKeyController: c.Name}, -1)
297+
metrics.GetControllerMetricsProvider().ActiveWorkers().Add(map[string]string{labelKeyController: c.Name}, 1)
298+
defer metrics.GetControllerMetricsProvider().ActiveWorkers().Add(map[string]string{labelKeyController: c.Name}, -1)
302299

303300
c.reconcileHandler(ctx, obj, priority)
304301
return true
@@ -314,15 +311,15 @@ const (
314311
)
315312

316313
func (c *Controller[request]) initMetrics() {
317-
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelError}, 0)
318-
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeueAfter}, 0)
319-
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeue}, 0)
320-
c.MetricsProvider.ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelSuccess}, 0)
321-
c.MetricsProvider.ReconcileErrors().Add(map[string]string{labelKeyController: c.Name}, 0)
322-
c.MetricsProvider.TerminalReconcileErrors().Add(map[string]string{labelKeyController: c.Name}, 0)
323-
c.MetricsProvider.ReconcilePanics().Add(map[string]string{labelKeyController: c.Name}, 0)
324-
c.MetricsProvider.WorkerCount().Set(map[string]string{labelKeyController: c.Name}, float64(c.MaxConcurrentReconciles))
325-
c.MetricsProvider.ActiveWorkers().Set(map[string]string{labelKeyController: c.Name}, 0)
314+
metrics.GetControllerMetricsProvider().ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelError}, 0)
315+
metrics.GetControllerMetricsProvider().ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeueAfter}, 0)
316+
metrics.GetControllerMetricsProvider().ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeue}, 0)
317+
metrics.GetControllerMetricsProvider().ReconcileTotal().Add(map[string]string{labelKeyController: c.Name, labelKeyResult: labelSuccess}, 0)
318+
metrics.GetControllerMetricsProvider().ReconcileErrors().Add(map[string]string{labelKeyController: c.Name}, 0)
319+
metrics.GetControllerMetricsProvider().TerminalReconcileErrors().Add(map[string]string{labelKeyController: c.Name}, 0)
320+
metrics.GetControllerMetricsProvider().ReconcilePanics().Add(map[string]string{labelKeyController: c.Name}, 0)
321+
metrics.GetControllerMetricsProvider().WorkerCount().Set(map[string]string{labelKeyController: c.Name}, float64(c.MaxConcurrentReconciles))
322+
metrics.GetControllerMetricsProvider().ActiveWorkers().Set(map[string]string{labelKeyController: c.Name}, 0)
326323
}
327324

328325
func (c *Controller[request]) reconcileHandler(ctx context.Context, req request, priority int) {
@@ -346,12 +343,12 @@ func (c *Controller[request]) reconcileHandler(ctx context.Context, req request,
346343
switch {
347344
case err != nil:
348345
if errors.Is(err, reconcile.TerminalError(nil)) {
349-
c.MetricsProvider.TerminalReconcileErrors().Inc(map[string]string{"controller": c.Name})
346+
metrics.GetControllerMetricsProvider().TerminalReconcileErrors().Inc(map[string]string{"controller": c.Name})
350347
} else {
351348
c.Queue.AddWithOpts(priorityqueue.AddOpts{RateLimited: true, Priority: priority}, req)
352349
}
353-
c.MetricsProvider.ReconcileErrors().Inc(map[string]string{labelKeyController: c.Name})
354-
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelError})
350+
metrics.GetControllerMetricsProvider().ReconcileErrors().Inc(map[string]string{labelKeyController: c.Name})
351+
metrics.GetControllerMetricsProvider().ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelError})
355352
if !result.IsZero() {
356353
log.Info("Warning: Reconciler returned both a non-zero result and a non-nil error. The result will always be ignored if the error is non-nil and the non-nil error causes requeuing with exponential backoff. For more details, see: https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/reconcile#Reconciler")
357354
}
@@ -364,17 +361,17 @@ func (c *Controller[request]) reconcileHandler(ctx context.Context, req request,
364361
// to result.RequestAfter
365362
c.Queue.Forget(req)
366363
c.Queue.AddWithOpts(priorityqueue.AddOpts{After: result.RequeueAfter, Priority: priority}, req)
367-
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeueAfter})
364+
metrics.GetControllerMetricsProvider().ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeueAfter})
368365
case result.Requeue: //nolint: staticcheck // We have to handle it until it is removed
369366
log.V(5).Info("Reconcile done, requeueing")
370367
c.Queue.AddWithOpts(priorityqueue.AddOpts{RateLimited: true, Priority: priority}, req)
371-
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeue})
368+
metrics.GetControllerMetricsProvider().ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelRequeue})
372369
default:
373370
log.V(5).Info("Reconcile successful")
374371
// Finally, if no error occurs we Forget this item so it does not
375372
// get queued again until another change happens.
376373
c.Queue.Forget(req)
377-
c.MetricsProvider.ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelSuccess})
374+
metrics.GetControllerMetricsProvider().ReconcileTotal().Inc(map[string]string{labelKeyController: c.Name, labelKeyResult: labelSuccess})
378375
}
379376
}
380377

@@ -385,7 +382,7 @@ func (c *Controller[request]) GetLogger() logr.Logger {
385382

386383
// updateMetrics updates prometheus metrics within the controller.
387384
func (c *Controller[request]) updateMetrics(reconcileTime time.Duration) {
388-
c.MetricsProvider.ReconcileTime().Observe(map[string]string{labelKeyController: c.Name}, reconcileTime.Seconds())
385+
metrics.GetControllerMetricsProvider().ReconcileTime().Observe(map[string]string{labelKeyController: c.Name}, reconcileTime.Seconds())
389386
}
390387

391388
// ReconcileIDFromContext gets the reconcileID from the current context.

0 commit comments

Comments
 (0)