@@ -635,8 +635,10 @@ func (ctrl *Controller) updateNode(old, cur interface{}) {
635
635
}
636
636
637
637
var changed bool
638
- oldReadyErr := checkNodeReady (oldNode )
639
- newReadyErr := checkNodeReady (curNode )
638
+ oldLNS := ctrlcommon .NewLayeredNodeState (oldNode )
639
+ curLNS := ctrlcommon .NewLayeredNodeState (curNode )
640
+ oldReadyErr := oldLNS .CheckNodeReady ()
641
+ newReadyErr := curLNS .CheckNodeReady ()
640
642
641
643
oldReady := getErrorString (oldReadyErr )
642
644
newReady := getErrorString (newReadyErr )
@@ -652,7 +654,7 @@ func (ctrl *Controller) updateNode(old, cur interface{}) {
652
654
653
655
// Specifically log when a node has completed an update so the MCC logs are a useful central aggregate of state changes
654
656
if oldNode .Annotations [daemonconsts .CurrentMachineConfigAnnotationKey ] != oldNode .Annotations [daemonconsts .DesiredMachineConfigAnnotationKey ] &&
655
- isNodeDone ( curNode , false ) {
657
+ curLNS . IsNodeDone ( ) {
656
658
ctrl .logPoolNode (pool , curNode , "Completed update to %s" , curNode .Annotations [daemonconsts .DesiredMachineConfigAnnotationKey ])
657
659
changed = true
658
660
} else {
@@ -1092,12 +1094,14 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
1092
1094
ctx := context .TODO ()
1093
1095
for _ , node := range nodes {
1094
1096
// All the nodes that need to be upgraded should have `NodeUpdateInProgressTaint` so that they're less likely
1095
- // to be chosen during the scheduling cycle.
1097
+ // to be chosen during the scheduling cycle. This includes nodes which are:
1098
+ // (i) In a Pool being updated to a new MC or image
1099
+ // (ii) In a Pool that is being opted out of layering
1096
1100
hasInProgressTaint := checkIfNodeHasInProgressTaint (node )
1097
1101
1098
1102
lns := ctrlcommon .NewLayeredNodeState (node )
1099
1103
1100
- if lns .IsDesiredEqualToPool (pool , layered ) {
1104
+ if ( ! layered && lns .IsDesiredMachineConfigEqualToPool (pool ) && ! lns . AreImageAnnotationsPresentOnNode ()) || ( layered && lns . IsDesiredEqualToBuild ( mosc , mosb ) ) {
1101
1105
if hasInProgressTaint {
1102
1106
if err := ctrl .removeUpdateInProgressTaint (ctx , node .Name ); err != nil {
1103
1107
err = fmt .Errorf ("failed removing %s taint for node %s: %w" , constants .NodeUpdateInProgressTaint .Key , node .Name , err )
@@ -1200,7 +1204,7 @@ func (ctrl *Controller) setClusterConfigAnnotation(nodes []*corev1.Node) error {
1200
1204
1201
1205
// updateCandidateNode needs to understand MOSB
1202
1206
// specifically, the LayeredNodeState probably needs to understand mosb
1203
- func (ctrl * Controller ) updateCandidateNode (mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild , nodeName string , pool * mcfgv1.MachineConfigPool ) error {
1207
+ func (ctrl * Controller ) updateCandidateNode (mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild , nodeName string , pool * mcfgv1.MachineConfigPool , layered bool ) error {
1204
1208
return clientretry .RetryOnConflict (constants .NodeUpdateBackoff , func () error {
1205
1209
oldNode , err := ctrl .kubeClient .CoreV1 ().Nodes ().Get (context .TODO (), nodeName , metav1.GetOptions {})
1206
1210
if err != nil {
@@ -1212,30 +1216,9 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1.MachineOSConfig, mosb *
1212
1216
}
1213
1217
1214
1218
lns := ctrlcommon .NewLayeredNodeState (oldNode )
1215
- layered , err := ctrl .isLayeredPool (mosc , mosb )
1216
- if err != nil {
1217
- return fmt .Errorf ("Failed to determine whether pool %s opts in to OCL due to an error: %s" , pool .Name , err )
1218
- }
1219
- if mosb == nil {
1220
- if lns .IsDesiredEqualToPool (pool , layered ) {
1221
- // If the node's desired annotations match the pool, return directly without updating the node.
1222
- klog .V (4 ).Infof ("Pool %s: node %s: no update is needed" , pool .Name , nodeName )
1223
- return nil
1224
-
1225
- }
1226
- lns .SetDesiredStateFromPool (layered , pool )
1227
-
1219
+ if ! layered {
1220
+ lns .SetDesiredStateFromPool (pool )
1228
1221
} else {
1229
- if lns .IsDesiredEqualToBuild (mosc , mosb ) {
1230
- // If the node's desired annotations match the pool, return directly without updating the node.
1231
- klog .V (4 ).Infof ("Pool %s: node %s: no update is needed" , pool .Name , nodeName )
1232
- return nil
1233
- }
1234
- // ensure this is happening. it might not be.
1235
- // we need to ensure the node controller is triggered at all the same times
1236
- // when using this new system
1237
- // we know the mosc+mosb can trigger one another and cause a build, but if the node controller
1238
- // can't set this anno, and subsequently cannot trigger the daemon to update, we need to rework.
1239
1222
lns .SetDesiredStateFromMachineOSConfig (mosc , mosb )
1240
1223
}
1241
1224
@@ -1246,6 +1229,12 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1.MachineOSConfig, mosb *
1246
1229
return err
1247
1230
}
1248
1231
1232
+ // Don't make a patch call if no update is needed.
1233
+ if reflect .DeepEqual (newData , oldData ) {
1234
+ return nil
1235
+ }
1236
+
1237
+ klog .V (4 ).Infof ("Pool %s: layered=%v node %s update is needed" , pool .Name , layered , nodeName )
1249
1238
patchBytes , err := strategicpatch .CreateTwoWayMergePatch (oldData , newData , corev1.Node {})
1250
1239
if err != nil {
1251
1240
return fmt .Errorf ("failed to create patch for node %q: %w" , nodeName , err )
@@ -1258,7 +1247,7 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1.MachineOSConfig, mosb *
1258
1247
// getAllCandidateMachines returns all possible nodes which can be updated to the target config, along with a maximum
1259
1248
// capacity. It is the reponsibility of the caller to choose a subset of the nodes given the capacity.
1260
1249
func getAllCandidateMachines (layered bool , config * mcfgv1.MachineOSConfig , build * mcfgv1.MachineOSBuild , pool * mcfgv1.MachineConfigPool , nodesInPool []* corev1.Node , maxUnavailable int ) ([]* corev1.Node , uint ) {
1261
- unavail := getUnavailableMachines (nodesInPool , pool , layered , build )
1250
+ unavail := getUnavailableMachines (nodesInPool , pool )
1262
1251
if len (unavail ) >= maxUnavailable {
1263
1252
klog .V (4 ).Infof ("getAllCandidateMachines: No capacity left for pool %s (unavail=%d >= maxUnavailable=%d)" ,
1264
1253
pool .Name , len (unavail ), maxUnavailable )
@@ -1272,26 +1261,18 @@ func getAllCandidateMachines(layered bool, config *mcfgv1.MachineOSConfig, build
1272
1261
var nodes []* corev1.Node
1273
1262
for _ , node := range nodesInPool {
1274
1263
lns := ctrlcommon .NewLayeredNodeState (node )
1275
- if ! layered {
1276
- if lns .IsDesiredEqualToPool (pool , layered ) {
1277
- if isNodeMCDFailing (node ) {
1278
- failingThisConfig ++
1279
- }
1280
- continue
1281
- }
1282
- } else {
1283
- if lns .IsDesiredEqualToBuild (config , build ) {
1284
- // If the node's desired annotations match the pool, return directly without updating the node.
1285
- klog .V (4 ).Infof ("Pool %s: layered node %s: no update is needed" , pool .Name , node .Name )
1286
- continue
1264
+ if ! lns .CheckNodeCandidacyForUpdate (layered , pool , config , build ) {
1265
+ if lns .IsNodeMCDFailing () {
1266
+ failingThisConfig ++
1287
1267
}
1268
+ continue
1288
1269
}
1289
1270
// Ignore nodes that are currently mid-update or unscheduled
1290
- if ! isNodeReady ( node ) {
1271
+ if ! lns . IsNodeReady ( ) {
1291
1272
klog .V (4 ).Infof ("node %s skipped during candidate selection as it is currently unscheduled" , node .Name )
1292
1273
continue
1293
1274
}
1294
- klog .V ( 4 ). Infof ("Pool %s: selected candidate node %s" , pool .Name , node .Name )
1275
+ klog .Infof ("Pool %s: selected candidate node %s" , pool .Name , node .Name )
1295
1276
nodes = append (nodes , node )
1296
1277
}
1297
1278
// Nodes which are failing to target this config also count against
@@ -1308,15 +1289,6 @@ func getAllCandidateMachines(layered bool, config *mcfgv1.MachineOSConfig, build
1308
1289
return nodes , uint (capacity )
1309
1290
}
1310
1291
1311
- // getCandidateMachines returns the maximum subset of nodes which can be updated to the target config given availability constraints.
1312
- func getCandidateMachines (pool * mcfgv1.MachineConfigPool , config * mcfgv1.MachineOSConfig , build * mcfgv1.MachineOSBuild , nodesInPool []* corev1.Node , maxUnavailable int , layered bool ) []* corev1.Node {
1313
- nodes , capacity := getAllCandidateMachines (layered , config , build , pool , nodesInPool , maxUnavailable )
1314
- if uint (len (nodes )) < capacity {
1315
- return nodes
1316
- }
1317
- return nodes [:capacity ]
1318
- }
1319
-
1320
1292
// getOperatorPodNodeName fetches the name of the current node running the machine-config-operator pod
1321
1293
func (ctrl * Controller ) getOperatorNodeName () (string , error ) {
1322
1294
// Create a selector object with a filter on the machine-config-operator pod
@@ -1392,7 +1364,7 @@ func (ctrl *Controller) setDesiredAnnotations(layered bool, mosc *mcfgv1.Machine
1392
1364
klog .Infof ("Continuing to sync layered MachineConfigPool %s" , pool .Name )
1393
1365
}
1394
1366
for _ , node := range candidates {
1395
- if err := ctrl .updateCandidateNode (mosc , mosb , node .Name , pool ); err != nil {
1367
+ if err := ctrl .updateCandidateNode (mosc , mosb , node .Name , pool , layered ); err != nil {
1396
1368
return fmt .Errorf ("setting desired %s for node %s: %w" , pool .Spec .Configuration .Name , node .Name , err )
1397
1369
}
1398
1370
}
0 commit comments