Skip to content

Commit

Permalink
Merge pull request #23985 from Luap99/wait-hang
Browse files Browse the repository at this point in the history
wait: fix handling of multiple conditions with exited
  • Loading branch information
openshift-merge-bot[bot] authored Sep 18, 2024
2 parents 62c1016 + aa10892 commit f580ae0
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 17 deletions.
30 changes: 17 additions & 13 deletions libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"io"
"net/http"
"os"
"sync"
"time"

"github.com/containers/common/pkg/resize"
Expand Down Expand Up @@ -596,7 +595,7 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)

// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
c.lock.Unlock()
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
err := waitForConmonExit(ctx, conmonPID, conmonPidFd, pollInterval)
c.lock.Lock()
if err != nil {
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
Expand All @@ -619,15 +618,24 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
return c.runtime.state.GetContainerExitCode(id)
}

func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
func waitForConmonExit(ctx context.Context, conmonPID, conmonPidFd int, pollInterval time.Duration) error {
if conmonPidFd > -1 {
for {
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
if _, err := unix.Poll(fds, -1); err != nil {
if n, err := unix.Poll(fds, int(pollInterval.Milliseconds())); err != nil {
if err == unix.EINTR {
continue
}
return err
} else if n == 0 {
// n == 0 means timeout
select {
case <-ctx.Done():
return define.ErrCanceled
default:
// context not done, wait again
continue
}
}
return nil
}
Expand All @@ -640,7 +648,11 @@ func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) e
}
return err
}
time.Sleep(pollInterval)
select {
case <-ctx.Done():
return define.ErrCanceled
case <-time.After(pollInterval):
}
}
return nil
}
Expand Down Expand Up @@ -695,22 +707,15 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
}
}

var wg sync.WaitGroup

if waitForExit {
wg.Add(1)
go func() {
defer wg.Done()

code, err := c.WaitForExit(ctx, waitTimeout)
trySend(code, err)
}()
}

if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
stoppedCount := 0
for {
if len(wantedStates) > 0 {
Expand Down Expand Up @@ -780,7 +785,6 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
case <-ctx.Done():
result = waitResult{-1, define.ErrCanceled}
}
wg.Wait()
return result.code, result.err
}

Expand Down
15 changes: 15 additions & 0 deletions test/e2e/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,19 @@ var _ = Describe("Podman wait", func() {
Expect(session).Should(ExitCleanly())
Expect(session.OutputToStringArray()).To(Equal([]string{"0", "0", "0"}))
})

It("podman wait on multiple conditions", func() {
session := podmanTest.Podman([]string{"run", "-d", ALPINE, "sleep", "100"})
session.Wait(20)
Expect(session).Should(ExitCleanly())
cid := session.OutputToString()

// condition should return once nay of the condition is met not all of them,
// as the container is running this should return immediately
// https://github.com/containers/podman-py/issues/425
session = podmanTest.Podman([]string{"wait", "--condition", "running,exited", cid})
session.Wait(20)
Expect(session).Should(ExitCleanly())
Expect(session.OutputToString()).To(Equal("-1"))
})
})
4 changes: 0 additions & 4 deletions test/system/035-logs.bats
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,6 @@ function _log_test_restarted() {
fi
cname="c-ltr-$(safename)"
run_podman run --log-driver=$driver ${events_backend} --name $cname $IMAGE sh -c 'start=0; if test -s log; then start=`tail -n 1 log`; fi; seq `expr $start + 1` `expr $start + 10` | tee -a log'
# FIXME: #9597
# run/start is flaking for remote so let's wait for the container condition
# to stop wasting energy until the root cause gets fixed.
run_podman container wait --condition=exited --condition=stopped $cname
run_podman ${events_backend} start -a $cname
logfile=$(mktemp -p ${PODMAN_TMPDIR} logfileXXXXXXXX)
$PODMAN $_PODMAN_TEST_OPTS ${events_backend} logs -f $cname > $logfile
Expand Down

1 comment on commit f580ae0

@packit-as-a-service
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

podman-next COPR build failed. @containers/packit-build please check.

Please sign in to comment.