1
1
package machineset
2
2
3
3
import (
4
+ "bytes"
4
5
"context"
5
6
"encoding/json"
6
7
"fmt"
@@ -61,6 +62,7 @@ type Controller struct {
61
62
mapiStats MachineResourceStats
62
63
capiMachineSetStats MachineResourceStats
63
64
capiMachineDeploymentStats MachineResourceStats
65
+ mapiBootImageState map [string ]BootImageState
64
66
conditionMutex sync.Mutex
65
67
mapiSyncMutex sync.Mutex
66
68
@@ -74,6 +76,13 @@ type MachineResourceStats struct {
74
76
totalCount int
75
77
}
76
78
79
+ // State structure uses for detecting hot loops. Reset when cluster is opted
80
+ // out of boot image updates.
81
+ type BootImageState struct {
82
+ value []byte
83
+ hotLoopCount int
84
+ }
85
+
77
86
// Helper function that checks if all resources have been evaluated
78
87
func (mrs MachineResourceStats ) isFinished () bool {
79
88
return mrs .totalCount == (mrs .inProgress + mrs .erroredCount )
@@ -91,6 +100,9 @@ const (
91
100
92
101
ArchLabelKey = "kubernetes.io/arch="
93
102
OSLabelKey = "machine.openshift.io/os-id"
103
+
104
+ // Threshold for hot loop detection
105
+ HotLoopLimit = 3
94
106
)
95
107
96
108
// New returns a new machine-set-boot-image controller.
@@ -145,6 +157,8 @@ func New(
145
157
146
158
ctrl .featureGateAccess = featureGateAccess
147
159
160
+ ctrl .mapiBootImageState = map [string ]BootImageState {}
161
+
148
162
return ctrl
149
163
}
150
164
@@ -355,6 +369,11 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) {
355
369
}
356
370
if ! machineManagerFound {
357
371
klog .V (4 ).Infof ("No MAPI machineset manager was found, so no MAPI machinesets will be enrolled." )
372
+ // clear out MAPI boot image history
373
+ for k := range ctrl .mapiBootImageState {
374
+ delete (ctrl .mapiBootImageState , k )
375
+ }
376
+
358
377
}
359
378
360
379
mapiMachineSets , err := ctrl .mapiMachineSetLister .List (machineResourceSelector )
@@ -367,6 +386,10 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) {
367
386
// If no machine resources were enrolled; exit the enqueue process without errors.
368
387
if len (mapiMachineSets ) == 0 {
369
388
klog .Infof ("No MAPI machinesets were enrolled, so no MAPI machinesets will be enqueued." )
389
+ // clear out MAPI boot image history
390
+ for k := range ctrl .mapiBootImageState {
391
+ delete (ctrl .mapiBootImageState , k )
392
+ }
370
393
}
371
394
372
395
// Reset stats before initiating reconciliation loop
@@ -477,13 +500,44 @@ func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet
477
500
478
501
// Patch the machineset if required
479
502
if patchRequired {
503
+ // First, check if we're hot looping
504
+ if ctrl .checkMAPIMachineSetHotLoop (newMachineSet ) {
505
+ return fmt .Errorf ("refusing to reconcile machineset %s, hot loop detected. Please opt-out of boot image updates, adjust your machine provisioning workflow to prevent hot loops and opt back in to resume boot image updates" , machineSet .Name )
506
+ }
480
507
klog .Infof ("Patching MAPI machineset %s" , machineSet .Name )
481
508
return ctrl .patchMachineSet (machineSet , newMachineSet )
482
509
}
483
510
klog .Infof ("No patching required for MAPI machineset %s" , machineSet .Name )
484
511
return nil
485
512
}
486
513
514
+ // Checks against a local store of boot image updates to detect hot looping
515
+ func (ctrl * Controller ) checkMAPIMachineSetHotLoop (machineSet * machinev1beta1.MachineSet ) bool {
516
+ bis , ok := ctrl .mapiBootImageState [machineSet .Name ]
517
+ if ! ok {
518
+ // If the machineset doesn't currently have a record, create a new one.
519
+ ctrl .mapiBootImageState [machineSet .Name ] = BootImageState {
520
+ value : machineSet .Spec .Template .Spec .ProviderSpec .Value .Raw ,
521
+ hotLoopCount : 1 ,
522
+ }
523
+ } else {
524
+ hotLoopCount := 1
525
+ // If the controller is updating to a value that was previously updated to, increase the hot loop counter
526
+ if bytes .Equal (bis .value , machineSet .Spec .Template .Spec .ProviderSpec .Value .Raw ) {
527
+ hotLoopCount = (bis .hotLoopCount ) + 1
528
+ }
529
+ // Return an error and degrade if the hot loop counter is above threshold
530
+ if hotLoopCount > HotLoopLimit {
531
+ return true
532
+ }
533
+ ctrl .mapiBootImageState [machineSet .Name ] = BootImageState {
534
+ value : machineSet .Spec .Template .Spec .ProviderSpec .Value .Raw ,
535
+ hotLoopCount : hotLoopCount ,
536
+ }
537
+ }
538
+ return false
539
+ }
540
+
487
541
// This function patches the machineset object using the machineClient
488
542
// Returns an error if marshsalling or patching fails.
489
543
func (ctrl * Controller ) patchMachineSet (oldMachineSet , newMachineSet * machinev1beta1.MachineSet ) error {
0 commit comments