Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: move some c code to go #4309

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ type IDMap struct {
Size int64 `json:"size"`
}

// ToString is to serialize the IDMap to a string.
func (i IDMap) ToString() string {
return fmt.Sprintf("%d %d %d", i.ContainerID, i.HostID, i.Size)
}

// Seccomp represents syscall restrictions
// By default, only the native architecture of the kernel is allowed to be used
// for syscalls. Additional architectures can be added by specifying them in
Expand Down
110 changes: 24 additions & 86 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/execabs"
"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/cgroups"
Expand Down Expand Up @@ -593,6 +592,10 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.stage1SockChild)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_STAGE1PIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
Expand Down Expand Up @@ -666,14 +669,16 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
}

init := &initProcess{
cmd: cmd,
comm: comm,
manager: c.cgroupManager,
containerProcess: containerProcess{
cmd: cmd,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
container: c,
},
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
}
c.initProcess = init
return init, nil
Expand All @@ -689,15 +694,18 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
return nil, err
}
proc := &setnsProcess{
cmd: cmd,
containerProcess: containerProcess{
cmd: cmd,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
container: c,
},
cgroupPaths: state.CgroupPaths,
rootlessCgroups: c.config.RootlessCgroups,
intelRdtPath: state.IntelRdtPath,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
initProcessPid: state.InitProcessPid,
}
if len(p.SubCgroupPaths) > 0 {
Expand Down Expand Up @@ -1047,17 +1055,6 @@ func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]str
return paths, nil
}

func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
data := bytes.NewBuffer(nil)
for _, im := range idMap {
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
if _, err := data.WriteString(line); err != nil {
return nil, err
}
}
return data.Bytes(), nil
}

// netlinkError is an error wrapper type for use by custom netlink message
// types. Panics with errors are wrapped in netlinkError so that the recover
// in bootstrapData can distinguish intentional panics.
Expand Down Expand Up @@ -1104,59 +1101,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
})
}

// write namespace paths only when we are not joining an existing user ns
_, joinExistingUser := nsMaps[configs.NEWUSER]
if !joinExistingUser {
// write uid mappings
if len(c.config.UIDMappings) > 0 {
if c.config.RootlessEUID {
// We resolve the paths for new{u,g}idmap from
// the context of runc to avoid doing a path
// lookup in the nsexec context.
if path, err := execabs.LookPath("newuidmap"); err == nil {
r.AddData(&Bytemsg{
Type: UidmapPathAttr,
Value: []byte(path),
})
}
}
b, err := encodeIDMapping(c.config.UIDMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: UidmapAttr,
Value: b,
})
}

// write gid mappings
if len(c.config.GIDMappings) > 0 {
b, err := encodeIDMapping(c.config.GIDMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: GidmapAttr,
Value: b,
})
if c.config.RootlessEUID {
if path, err := execabs.LookPath("newgidmap"); err == nil {
r.AddData(&Bytemsg{
Type: GidmapPathAttr,
Value: []byte(path),
})
}
}
if requiresRootOrMappingTool(c.config) {
r.AddData(&Boolmsg{
Type: SetgroupAttr,
Value: true,
})
}
}
}

if c.config.OomScoreAdj != nil {
// write oom_score_adj
r.AddData(&Bytemsg{
Expand All @@ -1165,12 +1109,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
})
}

// write rootless
r.AddData(&Boolmsg{
Type: RootlessEUIDAttr,
Value: c.config.RootlessEUID,
})

// write boottime and monotonic time ns offsets.
if c.config.TimeOffsets != nil {
var offsetSpec bytes.Buffer
Expand Down Expand Up @@ -1211,9 +1149,9 @@ func ignoreTerminateErrors(err error) error {
return err
}

func requiresRootOrMappingTool(c *configs.Config) bool {
func requiresRootOrMappingTool(gidMappings []configs.IDMap) bool {
gidMap := []configs.IDMap{
{ContainerID: 0, HostID: int64(os.Getegid()), Size: 1},
}
return !reflect.DeepEqual(c.GIDMappings, gidMap)
return !reflect.DeepEqual(gidMappings, gidMap)
}
150 changes: 150 additions & 0 deletions libcontainer/container_setup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package libcontainer

import (
"encoding/binary"
"fmt"
"io"
"os"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/execabs"
"golang.org/x/sys/unix"
)

// NsExecSyncMsg is used for communication between the parent and child during
// container setup.
type NsExecSyncMsg uint32

const (
syncUsermapPls NsExecSyncMsg = iota + 0x40
syncUsermapAck
syncRecvPidPls
syncRecvPidAck
syncTimeOffsetsPls
syncTimeOffsetsAck
lifubang marked this conversation as resolved.
Show resolved Hide resolved
)

type NsExecSetup struct {
process *containerProcess
}

const bufSize int = 4

// parseNsExecSync runs the given callback function on each message received
// from the child. It will return once the child sends SYNC_RECVPID_PLS.
func parseNsExecSync(r io.Reader, fn func(NsExecSyncMsg) error) error {
logrus.Debugf("start to communicate with the nsexec\n")
var msg NsExecSyncMsg
var buf [bufSize]byte
native := nl.NativeEndian()

for {
if _, err := io.ReadAtLeast(r, buf[:], bufSize); err != nil {
return err
}
msg = NsExecSyncMsg(native.Uint32(buf[:]))
if err := fn(msg); err != nil {
return err
}
if msg == syncRecvPidPls {
break
}
}
logrus.Debugf("finished communicating with the nsexec\n")
return nil
}

// ackSyncMsg is used to send a message to the child.
func ackSyncMsg(f *os.File, msg NsExecSyncMsg) error {
var buf [bufSize]byte
native := nl.NativeEndian()
native.PutUint32(buf[:], uint32(msg))
if _, err := unix.Write(int(f.Fd()), buf[:]); err != nil {
logrus.Debugf("failed to write message to nsexec: %v", err)
return err
}
return nil
}

// helpDoingNsExec is used to help the process to communicate with the nsexec.
func (s *NsExecSetup) helpDoingNsExec() error {
syncSock := s.process.comm.stage1SockParent
err := parseNsExecSync(syncSock, func(msg NsExecSyncMsg) error {
switch msg {
case syncUsermapPls:
logrus.Debugf("stage-1 requested userns mappings")
if err := s.setupUsermap(); err != nil {
return err
}
return ackSyncMsg(syncSock, syncUsermapAck)
case syncRecvPidPls:
logrus.Debugf("stage-1 reports pid")
var pid uint32
if err := binary.Read(syncSock, nl.NativeEndian(), &pid); err != nil {
return err
}
s.process.childPid = int(pid)
return ackSyncMsg(syncSock, syncRecvPidAck)
case syncTimeOffsetsPls:
logrus.Debugf("stage-1 request to configure timens offsets")
if err := system.UpdateTimeNsOffsets(s.process.cmd.Process.Pid, s.process.container.config.TimeOffsets); err != nil {
return err
}
return ackSyncMsg(syncSock, syncTimeOffsetsAck)
default:
}
return fmt.Errorf("unexpected message %d", msg)
})
_ = syncSock.Close()
return err
}

// setupUsermap is used to set up the user mappings.
func (s *NsExecSetup) setupUsermap() error {
var uidMapPath, gidMapPath string

// Enable setgroups(2) if we've been asked to. But we also have to explicitly
// disable setgroups(2) if we're creating a rootless container for single-entry
// mapping. (this is required since Linux 3.19).
// For rootless multi-entry mapping, we should use newuidmap/newgidmap
// to do mapping user namespace.
if s.process.config.RootlessEUID && !requiresRootOrMappingTool(s.process.config.Config.GIDMappings) {
_ = system.UpdateSetgroups(s.process.cmd.Process.Pid, system.SetgroupsDeny)
}

nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range s.process.container.config.Namespaces {
if ns.Path != "" {
nsMaps[ns.Type] = ns.Path
}
}
_, joinExistingUser := nsMaps[configs.NEWUSER]
if !joinExistingUser {
// write uid mappings
if len(s.process.container.config.UIDMappings) > 0 {
if s.process.container.config.RootlessEUID {
if path, err := execabs.LookPath("newuidmap"); err == nil {
uidMapPath = path
}
}
}

// write gid mappings
if len(s.process.container.config.GIDMappings) > 0 {
if s.process.container.config.RootlessEUID {
if path, err := execabs.LookPath("newgidmap"); err == nil {
gidMapPath = path
}
}
}
}

/* Set up mappings. */
if err := system.UpdateUidmap(uidMapPath, s.process.cmd.Process.Pid, s.process.container.config.UIDMappings); err != nil {
return err
}
return system.UpdateGidmap(gidMapPath, s.process.cmd.Process.Pid, s.process.container.config.GIDMappings)
}
29 changes: 24 additions & 5 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strconv"
"strings"
"syscall"
"unsafe"

"github.com/containerd/console"
"github.com/moby/sys/user"
Expand All @@ -35,11 +36,6 @@ const (
initStandard initType = "standard"
)

type pid struct {
Pid int `json:"stage2_pid"`
PidFirstChild int `json:"stage1_pid"`
}

// network is an internal struct used to setup container networks.
type network struct {
configs.Network
Expand Down Expand Up @@ -151,6 +147,11 @@ func startInitialization() (retErr error) {

logrus.SetOutput(logPipe)
logrus.SetFormatter(new(logrus.JSONFormatter))

/* For debugging. */
procName := append([]byte("runc:[2:INIT]"), 0)
_ = unix.Prctl(unix.PR_SET_NAME, uintptr(unsafe.Pointer(&procName[0])), 0, 0, 0)

logrus.Debug("child process in init()")

// Only init processes have FIFOFD.
Expand Down Expand Up @@ -215,6 +216,24 @@ func startInitialization() (retErr error) {
return err
}

if _, err := unix.Setsid(); err != nil {
return fmt.Errorf("setsid failed: %w", err)
}

if err := unix.Setuid(0); err != nil {
return fmt.Errorf("setuid failed %w", err)
}

if err := unix.Setgid(0); err != nil {
return fmt.Errorf("setgid failed %w", err)
}

if !config.RootlessEUID && requiresRootOrMappingTool(config.Config.GIDMappings) {
if err := unix.Setgroups([]int{0}); err != nil {
return fmt.Errorf("setgroups failed %w", err)
}
}

// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe)
}
Expand Down
Loading
Loading