Skip to content

Commit

Permalink
Stop swarm services by scaling down and up
Browse files Browse the repository at this point in the history
  • Loading branch information
m90 committed Jan 13, 2024
1 parent 64daf7b commit b551c90
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 36 deletions.
8 changes: 6 additions & 2 deletions cmd/backup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,16 @@ func main() {
}()

s.must(s.withLabeledCommands(lifecyclePhaseArchive, func() error {
restartContainers, err := s.stopContainers()
if s.cli == nil {
return nil
}

restartContainersOrServices, err := s.stopContainersOrServices()
// The mechanism for restarting containers is not using hooks as it
// should happen as soon as possible (i.e. before uploading backups or
// similar).
defer func() {
s.must(restartContainers())
s.must(restartContainersOrServices())
}()
if err != nil {
return err
Expand Down
87 changes: 53 additions & 34 deletions cmd/backup/script.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/docker/docker/api/types"
ctr "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/client"
"github.com/leekchan/timeutil"
"github.com/offen/envconfig"
Expand Down Expand Up @@ -318,14 +317,63 @@ func newScript() (*script, error) {
return s, nil
}

// stopContainers stops all Docker containers that are marked as to being
// stopped during the backup and returns a function that can be called to
// restart everything that has been stopped.
func (s *script) stopContainers() (func() error, error) {
func (s *script) stopContainersOrServices() (func() error, error) {
if s.cli == nil {
return noop, nil
}

dockerInfo, err := s.cli.Info(context.Background())
if err != nil {
return noop, fmt.Errorf("stopContainers: error getting docker info: %w", err)
}
isDockerSwarm := dockerInfo.Swarm.LocalNodeState != "inactive"
if isDockerSwarm {
return s.stopServices()
}
return s.stopContainers()

}

func (s *script) stopServices() (func() error, error) {
serviceLabel := fmt.Sprintf(
"docker-volume-backup.stop-during-backup=%s",
s.c.BackupStopContainerLabel,
)
matchingServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: serviceLabel,
}),
})
if err != nil {
return noop, fmt.Errorf("stopServices: error querying services: %w", err)
}

for _, service := range matchingServices {
var zero uint64
service.Spec.Mode.Replicated.Replicas = &zero
_, err := s.cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{})
if err != nil {
return noop, fmt.Errorf("stopServices: error scaling down services: %w", err)
}
}

return func() error {
for _, service := range matchingServices {
service.Spec.Mode.Replicated.Replicas = service.PreviousSpec.Mode.Replicated.Replicas
_, err := s.cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{})
if err != nil {
return fmt.Errorf("stopServices: error scaling up services: %w", err)
}
}
return nil
}, nil
}

// stopContainers stops all Docker containers or services that are marked as to being
// stopped during the backup and returns a function that can be called to
// restart everything that has been stopped.
func (s *script) stopContainers() (func() error, error) {
allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{})
if err != nil {
return noop, fmt.Errorf("stopContainers: error querying for containers: %w", err)
Expand Down Expand Up @@ -385,42 +433,13 @@ func (s *script) stopContainers() (func() error, error) {
}

return func() error {
servicesRequiringUpdate := map[string]struct{}{}

var restartErrors []error
for _, container := range stoppedContainers {
if swarmServiceName, ok := container.Labels["com.docker.swarm.service.name"]; ok {
servicesRequiringUpdate[swarmServiceName] = struct{}{}
continue
}
if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil {
restartErrors = append(restartErrors, err)
}
}

if len(servicesRequiringUpdate) != 0 {
services, _ := s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
for serviceName := range servicesRequiringUpdate {
var serviceMatch swarm.Service
for _, service := range services {
if service.Spec.Name == serviceName {
serviceMatch = service
break
}
}
if serviceMatch.ID == "" {
return fmt.Errorf("stopContainers: couldn't find service with name %s", serviceName)
}
serviceMatch.Spec.TaskTemplate.ForceUpdate += 1
if _, err := s.cli.ServiceUpdate(
context.Background(), serviceMatch.ID,
serviceMatch.Version, serviceMatch.Spec, types.ServiceUpdateOptions{},
); err != nil {
restartErrors = append(restartErrors, err)
}
}
}

if len(restartErrors) != 0 {
return fmt.Errorf(
"stopContainers: %d error(s) restarting containers and services: %w",
Expand Down

0 comments on commit b551c90

Please sign in to comment.