Skip to content

Commit

Permalink
cmd/snap: inhibit snap run during snap removal (canonical#14126)
Browse files Browse the repository at this point in the history
* cmd/snaplock/runinhibit: add remove inhibition hint type

Signed-off-by: Zeyad Gouda <[email protected]>

* cmd/snap: inhibit snap run during snap removal

For context, This is needed in preparation for the new
"snap remove" --terminate flag. New snap app runs must
be inhibited before we start killing the running apps
to avoid having new runs in the window between killing
currently running and unlinking the snap binaries.

Signed-off-by: Zeyad Gouda <[email protected]>

* cmd/snap: refactor error handling (thanks @bboozzoo)

Signed-off-by: Zeyad Gouda <[email protected]>

* cmd/snap: move RAA check inside waitWhileInhibited

Signed-off-by: Zeyad Gouda <[email protected]>

* cmd/snap: move remove inhibition check inside waitWhileInhibited

Signed-off-by: Zeyad Gouda <[email protected]>

* cmd/snap: extend inhibition race condition to check remove hint

Signed-off-by: Zeyad Gouda <[email protected]>

* cmd/snap: refactor and improve error handling (thanks @olivercalder)

Signed-off-by: Zeyad Gouda <[email protected]>

* cmd/snap: add clarification comment (thanks @pedronis)

Signed-off-by: Zeyad Gouda <[email protected]>

---------

Signed-off-by: Zeyad Gouda <[email protected]>
  • Loading branch information
ZeyadYasser authored Jul 25, 2024
1 parent e05d878 commit 3a71035
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 43 deletions.
67 changes: 45 additions & 22 deletions cmd/snap/cmd_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,22 +489,40 @@ func (x *cmdRun) straceOpts() (opts []string, raw bool, err error) {
return opts, raw, nil
}

// isSnapRefreshConflictDetected detects if snap refreshed was started while not
// holding the inhibition hint file lock.
// checkSnapRunInhibitionConflict detects if snap refreshed/removed was started
// while not holding the inhibition hint file lock.
//
// For context, on snap first install, the inhibition hint lock file is not created
// so we cannot hold it. It is created after the first refresh. This allows for a
// window where we don't hold the lock before the tracking cgroup is created where
// a snap refresh could start.
func isSnapRefreshConflictDetected(app *snap.AppInfo, hintFlock *osutil.FileLock) bool {
if !features.RefreshAppAwareness.IsEnabled() || app.IsService() || hintFlock != nil {
// so we cannot hold it. It is created after the first refresh/remove. This allows
// for a window where we don't hold the lock before the tracking cgroup is created
// where a snap refresh/removal could start.
func checkSnapRunInhibitionConflict(app *snap.AppInfo) error {
// Remove hint check takes precedence because we want to exit early
snapName := app.Snap.InstanceName()
hint, _, err := runinhibit.IsLocked(snapName)
if err != nil {
return err
}
if hint == runinhibit.HintInhibitedForRemove {
return fmt.Errorf(i18n.G("cannot run %q, snap is being removed"), snapName)
}

if !features.RefreshAppAwareness.IsEnabled() || app.IsService() {
// Skip check
return false
return nil
}

// We started without a hint lock file, if it exists now this means that:
// - There is an ongoing refresh
// - Or, A refresh was started and finished
// Let's retry to avoid either existing with an error due to missing current
// symlink or worse starting with the wrong revision.
if osutil.FileExists(runinhibit.HintFile(snapName)) {
// errSnapRefreshConflict should trigger a retry
return errSnapRefreshConflict
}

// We started without a hint lock file, if it exists now this means that a
// refresh was started.
return osutil.FileExists(runinhibit.HintFile(app.Snap.InstanceName()))
return nil
}

func (x *cmdRun) snapRunApp(snapApp string, args []string) error {
Expand All @@ -522,7 +540,13 @@ func (x *cmdRun) snapRunApp(snapApp string, args []string) error {
return fmt.Errorf("race condition detected, snap-run can only retry once")
}

info, app, hintFlock, err := maybeWaitWhileInhibited(context.Background(), x.client, snapName, appName)
info, app, hintFlock, err := waitWhileInhibited(context.Background(), x.client, snapName, appName)
if errors.Is(err, errOngoingSnapRefresh) {
return fmt.Errorf(i18n.G("cannot run %q, snap is being refreshed"), snapName)
}
if errors.Is(err, errInhibitedForRemove) {
return fmt.Errorf(i18n.G("cannot run %q, snap is being removed"), snapName)
}
if errors.Is(err, errSnapRefreshConflict) {
// Possible race condition detected, let's retry.

Expand All @@ -537,10 +561,12 @@ func (x *cmdRun) snapRunApp(snapApp string, args []string) error {
return err
}

closeFlockOrRetry := func() error {
// This needs to run inside the transient cgroup created for a snap
// such that any pending refresh of the snap will get blocked after
// we release the lock.
closeFlockOrCheckConflict := func() error {
// Unlocking the hint file needs to run inside the transient cgroup
// created such that:
// - For refresh, snap refresh will get blocked after we release the lock.
// - For removal, created transient cgroup can be detected by the remove change
// and process will be killed after we release the lock.
if hintFlock != nil {
// It is okay to release the lock here (beforeExec) because snapd unless forced
// will not inhibit the snap and do a refresh anymore because it detects app
Expand All @@ -551,16 +577,13 @@ func (x *cmdRun) snapRunApp(snapApp string, args []string) error {
hintFlock.Close()
return nil
}
// hintFlock might be nil if the hint file did not exist
if isSnapRefreshConflictDetected(app, hintFlock) {
return errSnapRefreshConflict
}
return nil

return checkSnapRunInhibitionConflict(app)
}

runner := newAppRunnable(info, app)

err = x.runSnapConfine(info, runner, closeFlockOrRetry, args)
err = x.runSnapConfine(info, runner, closeFlockOrCheckConflict, args)
if errors.Is(err, errSnapRefreshConflict) {
// Possible race condition detected, let's retry.
//
Expand Down
121 changes: 113 additions & 8 deletions cmd/snap/cmd_run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ func checkHintFileLocked(c *check.C, snapName string) {
flock.Close()
}

func (s *RunSuite) TestSnapRunAppRunsChecksInhibitionLock(c *check.C) {
func (s *RunSuite) TestSnapRunAppRunsChecksRefreshInhibitionLock(c *check.C) {
defer mockSnapConfine(dirs.DistroLibExecDir)()

// mock installed snap
Expand Down Expand Up @@ -319,7 +319,30 @@ func (s *RunSuite) TestSnapRunAppRunsChecksInhibitionLock(c *check.C) {
checkHintFileNotLocked(c, "snapname")
}

func (s *RunSuite) TestSnapRunAppRefreshAppAwarenessUnsetSkipsInhibitionLockCheck(c *check.C) {
func (s *RunSuite) testSnapRunAppRunsChecksRemoveInhibitionLock(c *check.C, svc bool) {
inhibitInfo := runinhibit.InhibitInfo{Previous: snap.R(11)}
c.Assert(runinhibit.LockWithHint("snapname", runinhibit.HintInhibitedForRemove, inhibitInfo), check.IsNil)

cmd := "snapname.app"
if svc {
cmd = "snapname.svc"
}

_, err := snaprun.Parser(snaprun.Client()).ParseArgs([]string{"run", "--", cmd, "--arg1"})
c.Assert(err, check.ErrorMatches, `cannot run "snapname", snap is being removed`)
}

func (s *RunSuite) TestSnapRunAppRunsChecksRemoveInhibitionLock(c *check.C) {
const svc = false
s.testSnapRunAppRunsChecksRemoveInhibitionLock(c, svc)
}

func (s *RunSuite) TestSnapRunAppRunsChecksRemoveInhibitionLockService(c *check.C) {
const svc = true
s.testSnapRunAppRunsChecksRemoveInhibitionLock(c, svc)
}

func (s *RunSuite) TestSnapRunAppRefreshAppAwarenessUnsetSkipsInhibitionLockWait(c *check.C) {
defer mockSnapConfine(dirs.DistroLibExecDir)()

// mock installed snap
Expand All @@ -337,13 +360,8 @@ func (s *RunSuite) TestSnapRunAppRefreshAppAwarenessUnsetSkipsInhibitionLockChec
// unset refresh-app-awareness flag
c.Assert(os.RemoveAll(features.RefreshAppAwareness.ControlFile()), check.IsNil)

restore := snaprun.MockWaitWhileInhibited(func(ctx context.Context, snapName string, notInhibited func(ctx context.Context) error, inhibited func(ctx context.Context, hint runinhibit.Hint, inhibitInfo *runinhibit.InhibitInfo) (cont bool, err error), interval time.Duration) (flock *osutil.FileLock, retErr error) {
return nil, fmt.Errorf("runinhibit.WaitWhileInhibited should not have been called")
})
defer restore()

_, err := snaprun.Parser(snaprun.Client()).ParseArgs([]string{"run", "--", "snapname.app", "--arg1"})
c.Assert(err, check.IsNil)
c.Assert(err, check.ErrorMatches, `cannot run "snapname", snap is being refreshed`)
}

func (s *RunSuite) TestSnapRunAppNewRevisionAfterInhibition(c *check.C) {
Expand Down Expand Up @@ -806,6 +824,93 @@ func (s *RunSuite) TestSnapRunAppRetryNoInhibitHintFileThenOngoingRefreshService
s.testSnapRunAppRetryNoInhibitHintFileThenOngoingRefresh(c, svc)
}

func (s *RunSuite) testSnapRunAppRetryNoInhibitHintFileThenOngoingRemove(c *check.C, svc bool) {
_, restore := logger.MockLogger()
defer restore()

defer mockSnapConfine(dirs.DistroLibExecDir)()

// mock installed snap
snaptest.MockSnapCurrent(c, string(mockYaml), &snap.SideInfo{Revision: snap.R("x2")})

c.Assert(os.MkdirAll(dirs.FeaturesDir, 0755), check.IsNil)
c.Assert(os.WriteFile(features.RefreshAppAwareness.ControlFile(), []byte(nil), 0644), check.IsNil)

var waitWhileInhibitedCalled int
restore = snaprun.MockWaitWhileInhibited(func(ctx context.Context, snapName string, notInhibited func(ctx context.Context) error, inhibited func(ctx context.Context, hint runinhibit.Hint, inhibitInfo *runinhibit.InhibitInfo) (cont bool, err error), interval time.Duration) (flock *osutil.FileLock, retErr error) {
waitWhileInhibitedCalled++

c.Check(snapName, check.Equals, "snapname")
err := notInhibited(ctx)
c.Assert(err, check.IsNil)

// mock snap inhibited to trigger race condition detection
// i.e. we started without a hint lock file (snap on first install)
// then a remove started which created the hint lock file.
c.Assert(runinhibit.LockWithHint("snapname", runinhibit.HintInhibitedForRemove, runinhibit.InhibitInfo{Previous: snap.R("x2")}), check.IsNil)

// nil FileLock means no inhibit file exists
return nil, nil
})
defer restore()

var createCgroupCalled int
restore = snaprun.MockCreateTransientScopeForTracking(func(securityTag string, opts *cgroup.TrackingOptions) error {
createCgroupCalled++
return nil
})
defer restore()

var confirmCgroupCalled int
confirmCgroup := func(securityTag string) error {
confirmCgroupCalled++
if createCgroupCalled >= 1 || svc {
// tracking cgroup was already created
return nil
}
// no tracking cgroup exists for current process
return cgroup.ErrCannotTrackProcess
}

if svc {
restore = snaprun.MockConfirmSystemdServiceTracking(confirmCgroup)
} else {
restore = snaprun.MockConfirmSystemdAppTracking(confirmCgroup)
}
defer restore()

cmd := "snapname.app"
if svc {
cmd = "snapname.svc"
}

_, err := snaprun.Parser(snaprun.Client()).ParseArgs([]string{"run", "--debug-log", "--", cmd})
c.Assert(err, check.ErrorMatches, `cannot run "snapname", snap is being removed`)

// no retry, sinlge call
c.Check(waitWhileInhibitedCalled, check.Equals, 1)
c.Check(confirmCgroupCalled, check.Equals, 1)
if svc {
// service cgroup already created
c.Check(createCgroupCalled, check.Equals, 0)
} else {
c.Check(createCgroupCalled, check.Equals, 1)
}

// lock should be released
checkHintFileNotLocked(c, "snapname")
}

func (s *RunSuite) TestSnapRunAppRetryNoInhibitHintFileThenOngoingRemove(c *check.C) {
const svc = false
s.testSnapRunAppRetryNoInhibitHintFileThenOngoingRemove(c, svc)
}

func (s *RunSuite) TestSnapRunAppRetryNoInhibitHintFileThenOngoingRemoveService(c *check.C) {
const svc = true
s.testSnapRunAppRetryNoInhibitHintFileThenOngoingRemove(c, svc)
}

func (s *RunSuite) TestSnapRunAppRetryNoInhibitHintFileThenOngoingRefreshMissingCurrent(c *check.C) {
logbuf, restore := logger.MockLogger()
defer restore()
Expand Down
29 changes: 16 additions & 13 deletions cmd/snap/inhibit.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,13 @@ var runinhibitWaitWhileInhibited = runinhibit.WaitWhileInhibited
// which could alter the current snap revision.
var errSnapRefreshConflict = fmt.Errorf("snap refresh conflict detected")

// maybeWaitWhileInhibited is a wrapper for waitWhileInhibited that skips waiting
// if refresh-app-awareness flag is disabled.
func maybeWaitWhileInhibited(ctx context.Context, cli *client.Client, snapName string, appName string) (info *snap.Info, app *snap.AppInfo, hintFlock *osutil.FileLock, err error) {
// wait only if refresh-app-awareness flag is enabled
if features.RefreshAppAwareness.IsEnabled() {
return waitWhileInhibited(ctx, cli, snapName, appName)
}
// errInhibitedForRemove indicates that snap is inhibited from running because
// it is being removed.
var errInhibitedForRemove = fmt.Errorf("snap is being removed")

info, app, err = getInfoAndApp(snapName, appName, snap.R(0))
if err != nil {
return nil, nil, nil, err
}
return info, app, nil, nil
}
// errOngoingSnapRefresh indicates snap cannot run because it is being refreshed
// and refresh-app-awareness is not enabled.
var errOngoingSnapRefresh = fmt.Errorf("snap is being refreshed")

// waitWhileInhibited blocks until snap is not inhibited for refresh anymore and then
// returns a locked hint file lock along with the latest snap and app information.
Expand All @@ -82,6 +75,12 @@ func waitWhileInhibited(ctx context.Context, cli *client.Client, snapName string
return err
}
inhibited := func(ctx context.Context, hint runinhibit.Hint, inhibitInfo *runinhibit.InhibitInfo) (cont bool, err error) {
if hint == runinhibit.HintInhibitedForRemove {
return false, errInhibitedForRemove
}
if !features.RefreshAppAwareness.IsEnabled() {
return true, errOngoingSnapRefresh
}
if !notified {
flow = newInhibitionFlow(cli, snapName)
info, app, err = getInfoAndApp(snapName, appName, inhibitInfo.Previous)
Expand Down Expand Up @@ -123,6 +122,10 @@ func waitWhileInhibited(ctx context.Context, cli *client.Client, snapName string
}
}

if info == nil || app == nil {
return nil, nil, nil, fmt.Errorf("internal error: info and app cannot be nil")
}

return info, app, hintFlock, nil
}

Expand Down
Loading

0 comments on commit 3a71035

Please sign in to comment.