diff --git a/pkg/product/mke/api/cluster_spec.go b/pkg/product/mke/api/cluster_spec.go index 68708387..8bd51059 100644 --- a/pkg/product/mke/api/cluster_spec.go +++ b/pkg/product/mke/api/cluster_spec.go @@ -8,6 +8,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/Mirantis/mcc/pkg/constant" common "github.com/Mirantis/mcc/pkg/product/common/api" @@ -267,7 +268,10 @@ func pingHost(h *Host, address string, waitgroup *sync.WaitGroup, errCh chan<- e } return nil }, - retry.Attempts(12), // last attempt should wait ~7min + retry.MaxJitter(time.Second*3), + retry.Delay(time.Second*30), + retry.DelayType(retry.FixedDelay), + retry.Attempts(10), // should try for ~5min ) if err != nil { errCh <- fmt.Errorf("MKE health check failed: %w", err) diff --git a/pkg/product/mke/phase/uninstall_mcr.go b/pkg/product/mke/phase/uninstall_mcr.go index 5117076a..b5f755e2 100644 --- a/pkg/product/mke/phase/uninstall_mcr.go +++ b/pkg/product/mke/phase/uninstall_mcr.go @@ -22,6 +22,32 @@ func (p *UninstallMCR) Title() string { // Run installs the engine on each host. func (p *UninstallMCR) Run() error { + workers := p.Config.Spec.Workers() + managers := p.Config.Spec.Managers() + swarmLeader := p.Config.Spec.SwarmLeader() + + // Drain all workers + for _, h := range workers { + if err := mcr.DrainNode(swarmLeader, h); err != nil { + return fmt.Errorf("%s: drain worker node: %w", h, err) + } + } + + // Drain all managers + for _, h := range managers { + if swarmLeader.Address() == h.Address() { + continue + } + if err := mcr.DrainNode(swarmLeader, h); err != nil { + return fmt.Errorf("%s: draining manager node: %w", h, err) + } + } + + // Drain the leader + if err := mcr.DrainNode(swarmLeader, swarmLeader); err != nil { + return fmt.Errorf("%s: drain leader node: %w", swarmLeader, err) + } + if err := phase.RunParallelOnHosts(p.Config.Spec.Hosts, p.Config, p.uninstallMCR); err != nil { return fmt.Errorf("uninstall container runtime: %w", err) } @@ -31,12 +57,6 @@ func (p *UninstallMCR) Run() error { func (p *UninstallMCR) uninstallMCR(h *api.Host, config *api.ClusterConfig) error { log.Infof("%s: uninstalling container runtime", h) - leader := config.Spec.SwarmLeader() - - if err := mcr.DrainNode(leader, h); err != nil { - return fmt.Errorf("%s: drain node: %w", h, err) - } - uVolumeCmd := h.Configurer.DockerCommandf("volume prune -f") log.Infof("%s: unmounted dangling volumes", h)