Skip to content

refactor: move some c code to go #4309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ type IDMap struct {
Size int64 `json:"size"`
}

// ToString is to serialize the IDMap to a string.
func (i IDMap) ToString() string {
return fmt.Sprintf("%d %d %d", i.ContainerID, i.HostID, i.Size)
}

// Seccomp represents syscall restrictions
// By default, only the native architecture of the kernel is allowed to be used
// for syscalls. Additional architectures can be added by specifying them in
Expand Down
78 changes: 6 additions & 72 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,10 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.stage1SockChild)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_STAGE1PIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
Expand Down Expand Up @@ -1022,17 +1026,6 @@ func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]str
return paths, nil
}

func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
data := bytes.NewBuffer(nil)
for _, im := range idMap {
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
if _, err := data.WriteString(line); err != nil {
return nil, err
}
}
return data.Bytes(), nil
}

// netlinkError is an error wrapper type for use by custom netlink message
// types. Panics with errors are wrapped in netlinkError so that the recover
// in bootstrapData can distinguish intentional panics.
Expand Down Expand Up @@ -1079,59 +1072,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
})
}

// write namespace paths only when we are not joining an existing user ns
_, joinExistingUser := nsMaps[configs.NEWUSER]
if !joinExistingUser {
// write uid mappings
if len(c.config.UIDMappings) > 0 {
if c.config.RootlessEUID {
// We resolve the paths for new{u,g}idmap from
// the context of runc to avoid doing a path
// lookup in the nsexec context.
if path, err := exec.LookPath("newuidmap"); err == nil {
r.AddData(&Bytemsg{
Type: UidmapPathAttr,
Value: []byte(path),
})
}
}
b, err := encodeIDMapping(c.config.UIDMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: UidmapAttr,
Value: b,
})
}

// write gid mappings
if len(c.config.GIDMappings) > 0 {
b, err := encodeIDMapping(c.config.GIDMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: GidmapAttr,
Value: b,
})
if c.config.RootlessEUID {
if path, err := exec.LookPath("newgidmap"); err == nil {
r.AddData(&Bytemsg{
Type: GidmapPathAttr,
Value: []byte(path),
})
}
}
if requiresRootOrMappingTool(c.config) {
r.AddData(&Boolmsg{
Type: SetgroupAttr,
Value: true,
})
}
}
}

if c.config.OomScoreAdj != nil {
// write oom_score_adj
r.AddData(&Bytemsg{
Expand All @@ -1140,12 +1080,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
})
}

// write rootless
r.AddData(&Boolmsg{
Type: RootlessEUIDAttr,
Value: c.config.RootlessEUID,
})

// write boottime and monotonic time ns offsets.
if c.config.TimeOffsets != nil {
var offsetSpec bytes.Buffer
Expand Down Expand Up @@ -1186,9 +1120,9 @@ func ignoreTerminateErrors(err error) error {
return err
}

func requiresRootOrMappingTool(c *configs.Config) bool {
func requiresRootOrMappingTool(gidMappings []configs.IDMap) bool {
gidMap := []configs.IDMap{
{ContainerID: 0, HostID: int64(os.Getegid()), Size: 1},
}
return !reflect.DeepEqual(c.GIDMappings, gidMap)
return !reflect.DeepEqual(gidMappings, gidMap)
}
147 changes: 147 additions & 0 deletions libcontainer/container_setup_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package libcontainer

import (
"encoding/binary"
"fmt"
"io"
"os"
"os/exec"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)

// NsExecSyncMsg is used for communication between the parent and child during
// container setup.
type NsExecSyncMsg uint32

const (
SyncUsermapPls NsExecSyncMsg = iota + 0x40
SyncUsermapAck
SyncRecvPidPls
SyncRecvPidAck
SyncTimeOffsetsPls
SyncTimeOffsetsAck
)

const bufSize = 4

// setupNsExec is used to help nsexec to setup the container and wait the container's pid.
func (s *containerProcess) setupNsExec(syncSock *os.File) error {
logrus.Debugf("waiting nsexec to report the container's pid")
err := ParseNsExecSync(syncSock, func(msg NsExecSyncMsg) error {
switch msg {
case SyncUsermapPls:
logrus.Debugf("nsexec has requested userns mappings")
if err := s.setupUsermap(); err != nil {
return err
}
return AckNsExecSync(syncSock, SyncUsermapAck)
case SyncTimeOffsetsPls:
logrus.Debugf("nsexec has requested to configure timens offsets")
if err := system.UpdateTimeNsOffsets(s.cmd.Process.Pid, s.container.config.TimeOffsets); err != nil {
return err
}
return AckNsExecSync(syncSock, SyncTimeOffsetsAck)
case SyncRecvPidPls:
logrus.Debugf("nsexec has reported pid")
var pid uint32
if err := binary.Read(syncSock, nl.NativeEndian(), &pid); err != nil {
return err
}
s.childPid = int(pid)
return AckNsExecSync(syncSock, SyncRecvPidAck)
default:
return fmt.Errorf("unexpected message %d", msg)
}
})

return err
}

// setupUsermap is used to set up the user mappings.
func (s *containerProcess) setupUsermap() error {
var uidMapPath, gidMapPath string

// Enable setgroups(2) if we've been asked to. But we also have to explicitly
// disable setgroups(2) if we're creating a rootless container for single-entry
// mapping. (this is required since Linux 3.19).
// For rootless multi-entry mapping, we should use newuidmap/newgidmap
// to do mapping user namespace.
if s.config.Config.RootlessEUID && !requiresRootOrMappingTool(s.config.Config.GIDMappings) {
_ = system.UpdateSetgroups(s.cmd.Process.Pid, system.SetgroupsDeny)
}

nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range s.container.config.Namespaces {
if ns.Path != "" {
nsMaps[ns.Type] = ns.Path
}
}
_, joinExistingUser := nsMaps[configs.NEWUSER]
if !joinExistingUser {
// write uid mappings
if len(s.container.config.UIDMappings) > 0 {
if s.container.config.RootlessEUID {
if path, err := exec.LookPath("newuidmap"); err == nil {
uidMapPath = path
}
}
}

// write gid mappings
if len(s.container.config.GIDMappings) > 0 {
if s.container.config.RootlessEUID {
if path, err := exec.LookPath("newgidmap"); err == nil {
gidMapPath = path
}
}
}
}

/* Set up mappings. */
if err := system.UpdateUidmap(uidMapPath, s.cmd.Process.Pid, s.container.config.UIDMappings); err != nil {
return err
}
return system.UpdateGidmap(gidMapPath, s.cmd.Process.Pid, s.container.config.GIDMappings)
}

// ParseNsExecSync runs the given callback function on each message received
// from the child. It will return once the child sends SYNC_RECVPID_PLS.
func ParseNsExecSync(r io.Reader, fn func(NsExecSyncMsg) error) error {
var (
msg NsExecSyncMsg
buf [bufSize]byte
)

native := nl.NativeEndian()

for {
if _, err := io.ReadAtLeast(r, buf[:], bufSize); err != nil {
return err
}
msg = NsExecSyncMsg(native.Uint32(buf[:]))
if err := fn(msg); err != nil {
return err
}
if msg == SyncRecvPidPls {
break
}
}
return nil
}

// AckNsExecSync is used to send a message to the child.
func AckNsExecSync(f *os.File, msg NsExecSyncMsg) error {
var buf [bufSize]byte
native := nl.NativeEndian()
native.PutUint32(buf[:], uint32(msg))
if _, err := unix.Write(int(f.Fd()), buf[:]); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we need to handle when it returns EINTR now that this is go. This applies to all functions that can return EINTR and are using the unix or syscall package.

logrus.Debugf("failed to write message to nsexec: %v", err)
return err
}
return nil
}
23 changes: 18 additions & 5 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ const (
initStandard initType = "standard"
)

type pid struct {
Pid int `json:"stage2_pid"`
PidFirstChild int `json:"stage1_pid"`
}

// network is an internal struct used to setup container networks.
type network struct {
configs.Network
Expand Down Expand Up @@ -227,6 +222,24 @@ func startInitialization() (retErr error) {
return err
}

if _, err := unix.Setsid(); err != nil {
return os.NewSyscallError("setsid", err)
}

if err := unix.Setuid(0); err != nil {
return os.NewSyscallError("setuid", err)
}

if err := unix.Setgid(0); err != nil {
return os.NewSyscallError("setgid", err)
}

if !config.Config.RootlessEUID && requiresRootOrMappingTool(config.Config.GIDMappings) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is requiresRootOrMappingTool() equivalent to config.is_setgroup?

if err := unix.Setgroups([]int{0}); err != nil {
return os.NewSyscallError("setgroups", err)
}
}

// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe)
}
Expand Down
16 changes: 5 additions & 11 deletions libcontainer/message_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,11 @@ import (
// list of known message types we want to send to bootstrap program
// The number is randomly chosen to not conflict with known netlink types
const (
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
NsPathsAttr uint16 = 27282
UidmapAttr uint16 = 27283
GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286
RootlessEUIDAttr uint16 = 27287
UidmapPathAttr uint16 = 27288
GidmapPathAttr uint16 = 27289
TimeOffsetsAttr uint16 = 27290
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
NsPathsAttr uint16 = 27282
OomScoreAdjAttr uint16 = 27286
TimeOffsetsAttr uint16 = 27290
)

type Int32msg struct {
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/nsenter/log.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void write_log(int level, const char *format, ...)
if (stage == NULL)
goto out;
} else {
ret = asprintf(&stage, "nsexec-%d", current_stage);
ret = asprintf(&stage, "nsexec-%d", current_stage + 1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this?

if (ret < 0) {
stage = NULL;
goto out;
Expand Down
Loading