Skip to content

Commit 619366d

Browse files
Merge pull request #16088 from alexlarsson/wait-on-conmon-without-sleep
Avoid unnecessary timeout of 250msec when waiting on container shutdown
2 parents 406616d + c34b5be commit 619366d

File tree

1 file changed

+29
-1
lines changed

1 file changed

+29
-1
lines changed

libpod/container_api.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/containers/storage/pkg/archive"
1818
spec "github.com/opencontainers/runtime-spec/specs-go"
1919
"github.com/sirupsen/logrus"
20+
"golang.org/x/sys/unix"
2021
)
2122

2223
// Init creates a container in the OCI runtime, moving a container from
@@ -515,6 +516,22 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
515516
var conmonTimer time.Timer
516517
conmonTimerSet := false
517518

519+
conmonPidFd := -1
520+
conmonPidFdTriggered := false
521+
522+
if c.state.ConmonPID != 0 {
523+
// Track lifetime of conmon precisely using pidfd_open + poll.
524+
// There are many cases for this to fail, for instance conmon is dead
525+
// or pidfd_open is not supported (pre linux 5.3), so fall back to the
526+
// traditional loop with poll + sleep
527+
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
528+
conmonPidFd = fd
529+
defer unix.Close(conmonPidFd)
530+
} else if err != unix.ENOSYS && err != unix.ESRCH {
531+
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
532+
}
533+
}
534+
518535
getExitCode := func() (bool, int32, error) {
519536
containerRemoved := false
520537
if !c.batched {
@@ -582,7 +599,18 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
582599
case <-ctx.Done():
583600
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
584601
default:
585-
time.Sleep(pollInterval)
602+
if conmonPidFd != -1 && !conmonPidFdTriggered {
603+
// If possible (pidfd works), the first cycle we block until conmon dies
604+
// If this happens, and we fall back to the old poll delay
605+
// There is a deadlock in the cleanup code for "play kube" which causes
606+
// conmon to not exit, so unfortunately we have to use the poll interval
607+
// timeout here to avoid hanging.
608+
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
609+
_, _ = unix.Poll(fds, int(pollInterval.Milliseconds()))
610+
conmonPidFdTriggered = true
611+
} else {
612+
time.Sleep(pollInterval)
613+
}
586614
}
587615
}
588616
}

0 commit comments

Comments
 (0)