Skip to content

Commit c34b5be

Browse files
committed
Avoid unnecessary timeout of 250msec when waiting on container shutdown
When you run "podman run foo" we attach to the container, which essentially blocks until the container process exits. When that happens podman immediately calls Container.WaitForExit(), but at this point the exit value has not yet been written to the db by conmon. This means that we almost always hit the "check for exit state; sleep 250msec" loop in WaitForExit(), delaying the exit of podman run by 250 msec. More recent kernels (>= 5.3) supports the pidfd_open() syscall, that lets you open a fd representing a pid and then poll on it to wait until the process exits. We can use this to have the first sleep be exactly as long as is needed for conmon to exit (if we know its pid). If for whatever reason there is still issues we use the old sleep loop on later iterations. This makes "time podman run fedora true" about 200msec faster. [NO NEW TESTS NEEDED] Signed-off-by: Alexander Larsson <[email protected]>
1 parent 2062ab9 commit c34b5be

File tree

1 file changed

+29
-1
lines changed

1 file changed

+29
-1
lines changed

libpod/container_api.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/containers/storage/pkg/archive"
1818
spec "github.com/opencontainers/runtime-spec/specs-go"
1919
"github.com/sirupsen/logrus"
20+
"golang.org/x/sys/unix"
2021
)
2122

2223
// Init creates a container in the OCI runtime, moving a container from
@@ -515,6 +516,22 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
515516
var conmonTimer time.Timer
516517
conmonTimerSet := false
517518

519+
conmonPidFd := -1
520+
conmonPidFdTriggered := false
521+
522+
if c.state.ConmonPID != 0 {
523+
// Track lifetime of conmon precisely using pidfd_open + poll.
524+
// There are many cases for this to fail, for instance conmon is dead
525+
// or pidfd_open is not supported (pre linux 5.3), so fall back to the
526+
// traditional loop with poll + sleep
527+
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
528+
conmonPidFd = fd
529+
defer unix.Close(conmonPidFd)
530+
} else if err != unix.ENOSYS && err != unix.ESRCH {
531+
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
532+
}
533+
}
534+
518535
getExitCode := func() (bool, int32, error) {
519536
containerRemoved := false
520537
if !c.batched {
@@ -582,7 +599,18 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
582599
case <-ctx.Done():
583600
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
584601
default:
585-
time.Sleep(pollInterval)
602+
if conmonPidFd != -1 && !conmonPidFdTriggered {
603+
// If possible (pidfd works), the first cycle we block until conmon dies
604+
// If this happens, and we fall back to the old poll delay
605+
// There is a deadlock in the cleanup code for "play kube" which causes
606+
// conmon to not exit, so unfortunately we have to use the poll interval
607+
// timeout here to avoid hanging.
608+
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
609+
_, _ = unix.Poll(fds, int(pollInterval.Milliseconds()))
610+
conmonPidFdTriggered = true
611+
} else {
612+
time.Sleep(pollInterval)
613+
}
586614
}
587615
}
588616
}

0 commit comments

Comments
 (0)