Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gofer: open volumes from the initial userns #11118

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions runsc/cmd/gofer.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/devices/tpuproxy/vfio"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/cmd/util"
"gvisor.dev/gvisor/runsc/config"
"gvisor.dev/gvisor/runsc/container"
"gvisor.dev/gvisor/runsc/flag"
"gvisor.dev/gvisor/runsc/fsgofer"
"gvisor.dev/gvisor/runsc/fsgofer/filter"
Expand Down Expand Up @@ -91,6 +93,7 @@ type Gofer struct {

specFD int
mountsFD int
rpcFD int
profileFDs profile.FDArgs
syncFDs goferSyncFDs
stopProfiling func()
Expand Down Expand Up @@ -123,6 +126,7 @@ func (g *Gofer) SetFlags(f *flag.FlagSet) {
f.IntVar(&g.devIoFD, "dev-io-fd", -1, "optional FD to connect /dev gofer server")
f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
f.IntVar(&g.rpcFD, "rpc-fd", -1, "RPC file descriptor.")

// Add synchronization FD flags.
g.syncFDs.setFlags(f)
Expand Down Expand Up @@ -153,15 +157,24 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomm
g.syncFDs.syncNVProxy()
g.syncFDs.syncUsernsForRootless()

rpcClntSock, err := unet.NewSocket(g.rpcFD)
if err != nil {
util.Fatalf("creating rpc socket: %v", err)
}

rpcClnt := urpc.NewClient(rpcClntSock)
defer rpcClnt.Close()

if g.setUpRoot {
if err := g.setupRootFS(spec, conf); err != nil {
if err := g.setupRootFS(spec, conf, rpcClnt); err != nil {
util.Fatalf("Error setting up root FS: %v", err)
}
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
cleanupUnmounter := g.syncFDs.spawnProcUnmounter()
defer cleanupUnmounter()
}
}
rpcClnt.Close()
if g.applyCaps {
overrides := g.syncFDs.flags()
overrides["apply-caps"] = "false"
Expand Down Expand Up @@ -369,7 +382,7 @@ func (g *Gofer) writeMounts(mounts []specs.Mount) error {
// It is protected by selinux rules.
const procFDBindMount = "/proc/fs"

func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error {
func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config, rpcClnt *urpc.Client) error {
// Convert all shared mounts into slaves to be sure that nothing will be
// propagated outside of our namespace.
procPath := "/proc"
Expand Down Expand Up @@ -437,7 +450,7 @@ func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error {
}

// Replace the current spec, with the clean spec with symlinks resolved.
if err := g.setupMounts(conf, spec.Mounts, root, procPath); err != nil {
if err := g.setupMounts(conf, spec.Mounts, root, procPath, rpcClnt); err != nil {
util.Fatalf("error setting up FS: %v", err)
}

Expand Down Expand Up @@ -487,7 +500,7 @@ func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error {
// setupMounts bind mounts all mounts specified in the spec in their correct
// location inside root. It will resolve relative paths and symlinks. It also
// creates directories as needed.
func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error {
func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string, rpcClnt *urpc.Client) error {
mountIdx := 1 // First index is for rootfs.
for _, m := range mounts {
if !specutils.IsGoferMount(m) {
Expand All @@ -511,7 +524,19 @@ func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, pro
}

log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
if err := specutils.SafeSetupAndMount(m.Source, dst, m.Type, flags, procPath); err != nil {
src := m.Source
var fd *os.File
if err := unix.Access(src, unix.R_OK); err != nil {
var res container.OpenMountResult
if err := rpcClnt.Call("goferRPC.OpenMount", &m, &res); err != nil {
return fmt.Errorf("opening %s: %w", m.Source, err)
}
fd = res.Files[0]
src = fmt.Sprintf("%s/self/fd/%d", procPath, fd.Fd())
}
err = specutils.SafeSetupAndMount(src, dst, m.Type, flags, procPath)
fd.Close()
if err != nil {
return fmt.Errorf("mounting %+v: %v", m, err)
}

Expand Down
3 changes: 3 additions & 0 deletions runsc/container/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ go_library(
name = "container",
srcs = [
"container.go",
"gofer_rpc.go",
"hook.go",
"state_file.go",
"status.go",
Expand All @@ -28,6 +29,8 @@ go_library(
"//pkg/sighandling",
"//pkg/state/statefile",
"//pkg/sync",
"//pkg/unet",
"//pkg/urpc",
"//runsc/boot",
"//runsc/cgroup",
"//runsc/config",
Expand Down
22 changes: 22 additions & 0 deletions runsc/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sighandling"
"gvisor.dev/gvisor/pkg/state/statefile"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/cgroup"
"gvisor.dev/gvisor/runsc/config"
Expand Down Expand Up @@ -1272,6 +1274,25 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
}
donations.DonateAndClose("mounts-fd", mountsGofer)

rpcServ, rpcClnt, err := unet.SocketPair(false)
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to create an rpc socket pair: %w", err)
}
rpcClntFD, _ := rpcClnt.Release()
donations.DonateAndClose("rpc-fd", os.NewFile(uintptr(rpcClntFD), "gofer-rpc"))
rpcPidCh := make(chan int, 1)
defer close(rpcPidCh)
go func() {
pid := <-rpcPidCh
if pid == 0 {
rpcServ.Close()
return
}
s := urpc.NewServer()
s.Register(&goferRPC{goferPID: pid})
s.StartHandling(rpcServ)
}()

// Count the number of mounts that needs an IO file.
ioFileCount := 0
for _, cfg := range c.GoferMountConfs {
Expand Down Expand Up @@ -1370,6 +1391,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
log.Infof("Gofer started, PID: %d", cmd.Process.Pid)
c.GoferPid = cmd.Process.Pid
c.goferIsChild = true
rpcPidCh <- cmd.Process.Pid

// Set up and synchronize rootless mode userns mappings.
if rootlessEUID {
Expand Down
103 changes: 103 additions & 0 deletions runsc/container/gofer_rpc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2024 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package container

import (
"fmt"
"os"
"runtime"
"sync"

specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/urpc"
)

type openMountRequest struct {
mount *specs.Mount
result *OpenMountResult
done chan error
}

type goferRPC struct {
mu sync.Mutex
openMountRequests chan *openMountRequest
goferPID int
}

// OpenMountResult is a result of the rpcp.OpenMount call.
type OpenMountResult struct {
urpc.FilePayload
}

func (rpc *goferRPC) handleRequest(req *openMountRequest) {
defer close(req.done)
fd, err := os.OpenFile(req.mount.Source, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
req.done <- err
return
}
req.result.Files = []*os.File{fd}
}

func (rpc *goferRPC) openMountLoop() error {
if err := unix.Unshare(unix.CLONE_FS); err != nil {
return fmt.Errorf("open mount thread: %w", err)
}
nsFd, err := os.Open(fmt.Sprintf("/proc/%d/ns/mnt", rpc.goferPID))
if err != nil {
return fmt.Errorf("open mount thread: open container mntns: %w", err)
}
defer nsFd.Close()
if err := unix.Setns(int(nsFd.Fd()), unix.CLONE_NEWNS); err != nil {
return fmt.Errorf("open mount thread: join container mntns: %w", err)
}
for req := range rpc.openMountRequests {
rpc.handleRequest(req)
}
return nil
}

// OpenMount is a helper rpc call that a gofer process uses when it can't
// open/create a mount.
func (rpc *goferRPC) OpenMount(m *specs.Mount, res *OpenMountResult) error {
rpc.mu.Lock()
defer rpc.mu.Unlock()

if rpc.openMountRequests == nil {
rpc.openMountRequests = make(chan *openMountRequest)
go func() {
// This goroutine holds the current threads forever. It
// never exits, because child proccesses can set
// PDEATHSIG. It can't serve other go-routines, because
// it does unshare CLONE_FS.
runtime.LockOSThread()
if err := rpc.openMountLoop(); err != nil {
for req := range rpc.openMountRequests {
req.done <- err
}
}
panic("unreachable")
}()
}
req := openMountRequest{
mount: m,
result: res,
done: make(chan error),
}
rpc.openMountRequests <- &req
err := <-req.done
return err
}
Loading