Skip to content

Commit 4c9d2f8

Browse files
committed
controller: consider rollback nodes unavailable
1 parent b2e4422 commit 4c9d2f8

File tree

2 files changed

+19
-7
lines changed

2 files changed

+19
-7
lines changed

pkg/controller/node/node_controller.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1232,10 +1232,13 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1.MachineOSConfig, mosb *
12321232
func getAllCandidateMachines(layered bool, config *mcfgv1.MachineOSConfig, build *mcfgv1.MachineOSBuild, pool *mcfgv1.MachineConfigPool, nodesInPool []*corev1.Node, maxUnavailable int) ([]*corev1.Node, uint) {
12331233
unavail := getUnavailableMachines(nodesInPool, pool, layered, build)
12341234
if len(unavail) >= maxUnavailable {
1235-
klog.V(4).Infof("Pool %s: No nodes available for updates", pool.Name)
1235+
klog.V(4).Infof("getAllCandidateMachines: No capacity left for pool %s (unavail=%d >= maxUnavailable=%d)",
1236+
pool.Name, len(unavail), maxUnavailable)
12361237
return nil, 0
12371238
}
12381239
capacity := maxUnavailable - len(unavail)
1240+
klog.V(4).Infof("getAllCandidateMachines: Computed capacity=%d for pool %s", capacity, pool.Name)
1241+
12391242
failingThisConfig := 0
12401243
// We only look at nodes which aren't already targeting our desired config
12411244
var nodes []*corev1.Node

pkg/controller/node/status.go

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -373,13 +373,13 @@ func isNodeDone(node *corev1.Node, layered bool) bool {
373373
return false
374374
}
375375

376-
if layered {
377-
// The MachineConfig annotations are loaded on boot-up by the daemon which
378-
// isn't currently done for the image annotations, so the comparisons here
379-
// are a bit more nuanced.
380-
cimage, cok := node.Annotations[daemonconsts.CurrentImageAnnotationKey]
381-
dimage, dok := node.Annotations[daemonconsts.DesiredImageAnnotationKey]
376+
// The MachineConfig annotations are loaded on boot-up by the daemon which
377+
// isn't currently done for the image annotations, so the comparisons here
378+
// are a bit more nuanced.
379+
cimage, cok := node.Annotations[daemonconsts.CurrentImageAnnotationKey]
380+
dimage, dok := node.Annotations[daemonconsts.DesiredImageAnnotationKey]
382381

382+
if layered {
383383
// If desired image is not set, but the pool is layered, this node can
384384
// be considered ready for an update. This is the very first time node
385385
// is being opted into layering.
@@ -398,6 +398,15 @@ func isNodeDone(node *corev1.Node, layered bool) bool {
398398

399399
}
400400

401+
// If not in layered mode, we also need to consider the case when the node is rolling back
402+
// from layered to non-layered. In those cases, cconfig==dconfig, but the node
403+
// will still need to do an update back to dconfig's OSImageURL. We can detect a
404+
// rolling back node by checking if the cimage stills exists but the dimage does not exist.
405+
if cok && !dok {
406+
// The node is not "done" in this case, as the current image annotation still exists.
407+
return false
408+
}
409+
401410
return cconfig == dconfig && isNodeMCDState(node, daemonconsts.MachineConfigDaemonStateDone)
402411
}
403412

0 commit comments

Comments
 (0)