diff --git a/features.go b/features.go index b636466bfe4..c5dff4a2d17 100644 --- a/features.go +++ b/features.go @@ -63,6 +63,9 @@ var featuresCommand = cli.Command{ Enabled: &t, }, }, + NetDevices: &features.NetDevices{ + Enabled: &t, + }, }, PotentiallyUnsafeConfigAnnotations: []string{ "bundle", diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 22fe0f9b4c1..b27a2dc781e 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -115,6 +115,9 @@ type Config struct { // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! Devices []*devices.Device `json:"devices"` + // NetDevices are key-value pairs, keyed by network device name, moved to the container's network namespace. + NetDevices map[string]*LinuxNetDevice `json:"netDevices"` + MountLabel string `json:"mount_label"` // Hostname optionally sets the container's hostname if provided diff --git a/libcontainer/configs/netdevices.go b/libcontainer/configs/netdevices.go new file mode 100644 index 00000000000..da1336a5f4e --- /dev/null +++ b/libcontainer/configs/netdevices.go @@ -0,0 +1,13 @@ +package configs + +// LinuxNetDevice represents a single network device to be added to the container's network namespace +type LinuxNetDevice struct { + // Name of the device in the container namespace + Name string `json:"name,omitempty"` + // Address is the IP address and Prefix in the container namespace in CIDR fornat + Addresses []string `json:"addresses,omitempty"` + // HardwareAddres represents a physical hardware address. + HardwareAddress string `json:"hardwareAddress,omitempty"` + // MTU Maximum Transfer Unit of the network device in the container namespace + MTU uint32 `json:"mtu,omitempty"` +} diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 37ece0aebbd..023e9a51bdd 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -3,6 +3,8 @@ package validate import ( "errors" "fmt" + "net" + "net/netip" "os" "path/filepath" "strings" @@ -24,6 +26,7 @@ func Validate(config *configs.Config) error { cgroupsCheck, rootfs, network, + netdevices, uts, security, namespaces, @@ -70,6 +73,58 @@ func rootfs(config *configs.Config) error { return nil } +// https://elixir.bootlin.com/linux/v6.12/source/net/core/dev.c#L1066 +func devValidName(name string) bool { + if len(name) == 0 || len(name) > unix.IFNAMSIZ { + return false + } + if (name == ".") || (name == "..") { + return false + } + if strings.Contains(name, "/") || strings.Contains(name, ":") || strings.Contains(name, " ") { + return false + } + return true +} + +func netdevices(config *configs.Config) error { + if len(config.NetDevices) == 0 { + return nil + } + if !config.Namespaces.Contains(configs.NEWNET) { + return errors.New("unable to move network devices without a private NET namespace") + } + path := config.Namespaces.PathOf(configs.NEWNET) + if path == "" { + return errors.New("unable to move network devices without a private NET namespace") + } + if config.RootlessEUID || config.RootlessCgroups { + return errors.New("network devices are not supported for rootless containers") + } + + for name, netdev := range config.NetDevices { + if !devValidName(name) { + return fmt.Errorf("invalid network device name %q", name) + } + if netdev.Name != "" { + if !devValidName(netdev.Name) { + return fmt.Errorf("invalid network device name %q", netdev.Name) + } + } + for _, address := range netdev.Addresses { + if _, err := netip.ParsePrefix(address); err != nil { + return fmt.Errorf("invalid network IP address %q", address) + } + } + if netdev.HardwareAddress != "" { + if _, err := net.ParseMAC(netdev.HardwareAddress); err != nil { + return fmt.Errorf("invalid hardware address %q", netdev.HardwareAddress) + } + } + } + return nil +} + func network(config *configs.Config) error { if !config.Namespaces.Contains(configs.NEWNET) { if len(config.Networks) > 0 || len(config.Routes) > 0 { diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index b0b740a122d..575838604b2 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -871,3 +871,174 @@ func TestValidateIOPriority(t *testing.T) { } } } + +func TestValidateNetDevices(t *testing.T) { + testCases := []struct { + name string + isErr bool + config *configs.Config + }{ + { + name: "network device", + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rename", + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "c0", + Addresses: []string{"192.168.2.34/24", "2001:db8::2/64"}, + HardwareAddress: "82:06:8c:49:7a:4a", + MTU: 1500, + }, + }, + }, + }, + { + name: "network device host network", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{}, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rootless", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + RootlessEUID: true, + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rootless", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + RootlessCgroups: true, + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device bad name", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "eth0/", + }, + }, + }, + }, + { + name: "network device wrong ip", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "eth0", + Addresses: []string{"wrongip"}, + }, + }, + }, + }, + { + name: "network device wrong mac", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "eth0", + Addresses: []string{"192.168.1.1/24"}, + HardwareAddress: "wrongmac!", + }, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + config := tc.config + config.Rootfs = "/var" + + err := Validate(config) + if tc.isErr && err == nil { + t.Error("expected error, got nil") + } + + if !tc.isErr && err != nil { + t.Error(err) + } + }) + } +} diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index b13f8bf9bb3..2351f730fcd 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -90,6 +90,18 @@ func Create(root, id string, config *configs.Config) (*Container, error) { if err := os.Mkdir(stateDir, 0o711); err != nil { return nil, err } + + // move the specified devices to the container network namespace + nsPath := getNetns(config) + if nsPath != "" { + for name, netDevice := range config.NetDevices { + err := netnsAttach(name, nsPath, *netDevice) + if err != nil { + return nil, err + } + } + } + c := &Container{ id: id, stateDir: stateDir, diff --git a/libcontainer/network_linux.go b/libcontainer/network_linux.go index 8915548b3bc..90a84c43188 100644 --- a/libcontainer/network_linux.go +++ b/libcontainer/network_linux.go @@ -3,13 +3,16 @@ package libcontainer import ( "bytes" "fmt" + "net" "os" "path/filepath" "strconv" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/types" + "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" + "github.com/vishvananda/netns" ) var strategies = map[string]networkStrategy{ @@ -98,3 +101,149 @@ func (l *loopback) attach(n *configs.Network) (err error) { func (l *loopback) detach(n *configs.Network) (err error) { return nil } + +// getNetns return the path to network namespace +// or an empty string if it is not available. +func getNetns(config *configs.Config) string { + if !config.Namespaces.Contains(configs.NEWNET) { + return "" + } + return config.Namespaces.PathOf(configs.NEWNET) +} + +// netnsAttach takes the network device referenced by name in the current network namespace +// and moves to the network namespace passed as a parameter. It also configure the +// network device inside the new network namespace with the passed parameters. +func netnsAttach(name string, nsPath string, device configs.LinuxNetDevice) error { + logrus.Debugf("attaching network device %s with attrs %#v to network namespace %s", name, device, nsPath) + link, err := netlink.LinkByName(name) + if err != nil { + return fmt.Errorf("link not found for interface %s on runtime namespace: %w", name, err) + } + attrs := netlink.NewLinkAttrs() + attrs.Index = link.Attrs().Index + + attrs.Name = name + if device.Name != "" { + attrs.Name = device.Name + } + + attrs.MTU = link.Attrs().MTU + if device.MTU > 0 { + attrs.MTU = int(device.MTU) + } + + attrs.HardwareAddr = link.Attrs().HardwareAddr + if device.HardwareAddress != "" { + attrs.HardwareAddr, err = net.ParseMAC(device.HardwareAddress) + if err != nil { + return err + } + } + + ns, err := netns.GetFromPath(nsPath) + if err != nil { + return fmt.Errorf("could not get network namespace from path %s for network device %s : %w", nsPath, name, err) + } + + attrs.Namespace = netlink.NsFd(ns) + + // set the interface down before we change the address inside the network namespace + err = netlink.LinkSetDown(link) + if err != nil { + return err + } + + dev := &netlink.Device{ + LinkAttrs: attrs, + } + + err = netlink.LinkModify(dev) + if err != nil { + return fmt.Errorf("could not modify network device %s : %w", name, err) + } + + // to avoid golang problem with goroutines we create the socket in the + // namespace and use it directly + nhNs, err := netlink.NewHandleAt(ns) + if err != nil { + return err + } + + nsLink, err := nhNs.LinkByName(dev.Name) + if err != nil { + return fmt.Errorf("link not found for interface %s on namespace %s: %w", dev.Name, nsPath, err) + } + + err = nhNs.LinkSetUp(nsLink) + if err != nil { + return fmt.Errorf("failt to set up interface %s on namespace %s: %w", nsLink.Attrs().Name, nsPath, err) + } + + for _, address := range device.Addresses { + addr, err := netlink.ParseAddr(address) + if err != nil { + return err + } + + err = nhNs.AddrAdd(nsLink, addr) + if err != nil { + return err + } + } + return nil +} + +// netnsDettach takes the network device referenced by name in the passed network namespace +// and moves to the root network namespace, restoring the original name. It also sets down +// the network device to avoid conflict with existing network configuraiton. +func netnsDettach(name string, nsPath string, device configs.LinuxNetDevice) error { + logrus.Debugf("dettaching network device %s with attrs %#v to network namespace %s", name, device, nsPath) + ns, err := netns.GetFromPath(nsPath) + if err != nil { + return fmt.Errorf("could not get network namespace from path %s for network device %s : %w", nsPath, name, err) + } + // to avoid golang problem with goroutines we create the socket in the + // namespace and use it directly + nhNs, err := netlink.NewHandleAt(ns) + if err != nil { + return fmt.Errorf("could not get network namespace handle: %w", err) + } + + devName := device.Name + if devName == "" { + devName = name + } + + nsLink, err := nhNs.LinkByName(devName) + if err != nil { + return fmt.Errorf("link not found for interface %s on namespace %s: %w", device.Name, nsPath, err) + } + + // set the device down to avoid network conflicts + // when it is restored to the original namespace + err = nhNs.LinkSetDown(nsLink) + if err != nil { + return err + } + + // restore the original name if it was renamed + if device.Name != name { + err = nhNs.LinkSetName(nsLink, name) + if err != nil { + return err + } + } + + rootNs, err := netns.Get() + if err != nil { + return err + } + defer rootNs.Close() + + err = nhNs.LinkSetNsFd(nsLink, int(netlink.NsFd(rootNs))) + if err != nil { + return fmt.Errorf("failed to restore original network namespace: %w", err) + } + return nil +} diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index e7c6faae347..ab87b3062e7 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -472,6 +472,17 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { } } + for name, netdev := range spec.Linux.NetDevices { + if config.NetDevices == nil { + config.NetDevices = make(map[string]*configs.LinuxNetDevice) + } + config.NetDevices[name] = &configs.LinuxNetDevice{ + Name: netdev.Name, + Addresses: netdev.Addresses, + HardwareAddress: netdev.HardwareAddress, + MTU: netdev.MTU, + } + } } // Set the host UID that should own the container's cgroup. diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index 8c7fb774f97..9ca627f330b 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -2,6 +2,7 @@ package specconv import ( "os" + "reflect" "strings" "testing" @@ -956,3 +957,110 @@ func TestCreateDevices(t *testing.T) { t.Errorf("device /dev/ram0 not found in config devices; got %v", conf.Devices) } } + +func TestCreateNetDevices(t *testing.T) { + testCases := []struct { + name string + netDevices map[string]specs.LinuxNetDevice + }{ + { + name: "no network devices", + }, + { + name: "one network devices", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + }, + }, + { + name: "multiple network devices", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + "eth2": {}, + }, + }, + { + name: "multiple network devices and rename", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + "eth2": { + Name: "ctr_eth2", + }, + }, + }, + { + name: "multiple network devices and addresses", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": { + Addresses: []string{"192.168.1.2/24", "fd00:1:2::9/64"}, + }, + "eth2": { + Name: "ctr_eth2", + }, + }, + }, + { + name: "multiple network devices and hardware address", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": { + Addresses: []string{"192.168.1.2/24", "fd00:1:2::9/64"}, + HardwareAddress: "e2:85:68:80:43:7a ", + }, + "eth2": { + Name: "ctr_eth2", + }, + }, + }, + { + name: "multiple network devices and mtu", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": { + Addresses: []string{"192.168.1.2/24", "fd00:1:2::9/64"}, + HardwareAddress: "e2:85:68:80:43:7a ", + }, + "eth2": { + Name: "ctr_eth2", + MTU: 1725, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + spec := Example() + spec.Linux.NetDevices = tc.netDevices + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + config, err := CreateLibcontainerConfig(opts) + if err != nil { + t.Errorf("Couldn't create libcontainer config: %v", err) + } + if len(config.NetDevices) != len(opts.Spec.Linux.NetDevices) { + t.Fatalf("expected %d network devices and got %d", len(config.NetDevices), len(opts.Spec.Linux.NetDevices)) + } + for name, netdev := range config.NetDevices { + ctrNetDev, ok := config.NetDevices[name] + if !ok { + t.Fatalf("network device %s not found in the configuration", name) + } + if ctrNetDev.Name != netdev.Name { + t.Fatalf("expected %s got %s", ctrNetDev.Name, netdev.Name) + } + if !reflect.DeepEqual(ctrNetDev.Addresses, netdev.Addresses) { + t.Fatalf("expected %v got %v", ctrNetDev.Addresses, netdev.Addresses) + } + if ctrNetDev.HardwareAddress != netdev.HardwareAddress { + t.Fatalf("expected %s got %s", ctrNetDev.HardwareAddress, netdev.HardwareAddress) + } + if ctrNetDev.MTU != netdev.MTU { + t.Fatalf("expected %d got %d", ctrNetDev.MTU, netdev.MTU) + } + } + }) + } + +} diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go index ad96f0801ea..49e79d6fe05 100644 --- a/libcontainer/state_linux.go +++ b/libcontainer/state_linux.go @@ -7,6 +7,7 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -47,6 +48,21 @@ func destroy(c *Container) error { // Likely to fail when c.config.RootlessCgroups is true _ = signalAllProcesses(c.cgroupManager, unix.SIGKILL) } + + // restore network devices + nsPath := getNetns(c.config) + if nsPath != "" { + for name, netDevice := range c.config.NetDevices { + err := netnsDettach(name, nsPath, *netDevice) + if err != nil { + // don't fail on the interface detachment to avoid problems with the container shutdown. + // In the worst case the OS will handle the cleanup, hardware interfaces will be back on the + // root namespace and virtual devices will be destroyed. + logrus.WithError(err).Warnf("failed to restore network device %s from network namespace %s", name, nsPath) + } + } + } + if err := c.cgroupManager.Destroy(); err != nil { return fmt.Errorf("unable to remove container's cgroup: %w", err) } diff --git a/tests/integration/netdev.bats b/tests/integration/netdev.bats new file mode 100644 index 00000000000..db717536a85 --- /dev/null +++ b/tests/integration/netdev.bats @@ -0,0 +1,156 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + setup_busybox +} + +function teardown() { + teardown_bundle +} + +@test "move network device to container network namespace" { + requires root + # create a dummy interface to move to the container + ip link add dummy0 type dummy + ip link set up dev dummy0 + ip addr add 169.254.169.13/32 dev dummy0 + + update_config ' .linux.netDevices |= {"dummy0": {} } + | .process.args |= ["ip", "address", "show", "dev", "dummy0"]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + + ip netns exec "$ns_name" ip link + ip netns del "$ns_name" + ip link del dev dummy0 +} + +@test "move network device to container network namespace and rename" { + requires root + # create a dummy interface to move to the container + ip link add dummy1 type dummy + ip link set up dev dummy1 + ip addr add 169.254.169.14/32 dev dummy1 + + update_config ' .linux.netDevices |= { "dummy1": { "name" : "ctr_dummy1" } } + | .process.args |= ["ip", "address", "show", "dev", "ctr_dummy1"]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + + ip netns del "$ns_name" + ip link del dev dummy1 +} + +@test "move network device to container network namespace and change ipv4 address" { + requires root + # create a dummy interface to move to the container + ip link add dummy1 type dummy + ip link set up dev dummy1 + ip addr add 169.254.169.14/32 dev dummy1 + + update_config ' .linux.netDevices |= { "dummy1": { "name" : "ctr_dummy1" , "addresses" : [ "10.0.0.2/24" ]} } + | .process.args |= ["ip", "address", "show", "dev", "ctr_dummy1" ]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + [[ "$output" == *"10.0.0.2/24"* ]] + + ip netns del "$ns_name" + ip link del dev dummy1 +} + +@test "move network device to container network namespace and change ipv6 address" { + requires root + # create a dummy interface to move to the container + ip link add dummy1 type dummy + ip link set up dev dummy1 + ip addr add 169.254.169.14/32 dev dummy1 + + update_config ' .linux.netDevices |= { "dummy1": { "name" : "ctr_dummy1" , "addresses" : [ "10.0.0.2/24" , "2001:db8::2/64" ]} } + | .process.args |= ["ip", "address", "show", "dev", "ctr_dummy1" ]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + [[ "$output" == *"2001:db8::2/64"* ]] + + ip netns del "$ns_name" + ip link del dev dummy1 +} + +@test "network device on root namespace fails" { + requires root + # create a dummy interface to move to the container + ip link add dummy2 type dummy + ip link set up dev dummy2 + ip addr add 169.254.169.13/32 dev dummy2 + + update_config ' .linux.netDevices |= {"dummy2": {} }' + runc run test_busybox + [ "$status" -ne 0 ] + [[ "$output" == *"unable to move network devices without a private NET namespace"* ]] + ip link del dev dummy2 +} + +@test "network device bad address fails" { + requires root + # create a dummy interface to move to the container + ip link add dummy2 type dummy + ip link set up dev dummy2 + ip addr add 169.254.169.13/32 dev dummy2 + + update_config '(.. | select(.type? == "network")) .path |= "'fake_net_ns'"' + update_config ' .linux.netDevices |= { "dummy2": { "name" : "ctr_dummy2" , "addresses" : [ "wrong_ip" ]} }' + + runc run test_busybox + [ "$status" -ne 0 ] + [[ "$output" == *"invalid network IP address"* ]] + ip link del dev dummy2 +}