Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Rootless Docker #12359

Merged
merged 1 commit into from
Aug 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions cmd/minikube/cmd/start_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,37 @@ func generateNewConfigFromFlags(cmd *cobra.Command, k8sVersion string, drvName s
}
}

if driver.IsKIC(drvName) {
si, err := oci.CachedDaemonInfo(drvName)
if err != nil {
exit.Message(reason.Usage, "Ensure your {{.driver_name}} is running and is healthy.", out.V{"driver_name": driver.FullName(drvName)})
}
if si.Rootless {
if cc.KubernetesConfig.ContainerRuntime != "containerd" {
exit.Message(reason.Usage, "Container runtime must be set to \"containerd\" for rootless")
// TODO: support cri-o (https://kubernetes.io/docs/tasks/administer-cluster/kubelet-in-userns/#configuring-cri)
}
// KubeletInUserNamespace feature gate is essential for rootless driver.
// See https://kubernetes.io/docs/tasks/administer-cluster/kubelet-in-userns/
cc.KubernetesConfig.FeatureGates = addFeatureGate(cc.KubernetesConfig.FeatureGates, "KubeletInUserNamespace=true")
}
}

return cc
}

func addFeatureGate(featureGates, s string) string {
split := strings.Split(featureGates, ",")
m := make(map[string]struct{}, len(split))
for _, v := range split {
m[v] = struct{}{}
}
if _, ok := m[s]; !ok {
split = append(split, s)
}
return strings.Join(split, ",")
}

func checkNumaCount(k8sVersion string) {
if viper.GetInt(kvmNUMACount) < 1 || viper.GetInt(kvmNUMACount) > 8 {
exit.Message(reason.Usage, "--kvm-numa-count range is 1-8")
Expand Down
19 changes: 18 additions & 1 deletion deploy/kicbase/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ RUN cd ./cmd/auto-pause/ && go build
FROM ubuntu:focal-20210401

ARG BUILDKIT_VERSION="v0.9.0"
ARG FUSE_OVERLAYFS_VERSION="v1.7.1"
ARG CONTAINERD_FUSE_OVERLAYFS_VERSION="1.0.3"

# copy in static files (configs, scripts)
COPY deploy/kicbase/10-network-security.conf /etc/sysctl.d/10-network-security.conf
Expand Down Expand Up @@ -113,14 +115,29 @@ RUN clean-install \
openssh-server \
dnsutils \
# libglib2.0-0 is required for conmon, which is required for podman
libglib2.0-0
libglib2.0-0 \
# fuse3 is required for fuse-overlayfs
fuse3

# install docker
RUN sh -c "echo 'deb https://download.docker.com/linux/ubuntu focal stable' > /etc/apt/sources.list.d/docker.list" && \
curl -L https://download.docker.com/linux/ubuntu/gpg -o docker.key && \
apt-key add - < docker.key && \
clean-install docker-ce docker-ce-cli containerd.io

# install fuse-overlayfs (used by rootless; apt-get version is old)
RUN curl -sSL --retry 5 --output /usr/local/bin/fuse-overlayfs https://github.com/containers/fuse-overlayfs/releases/download/${FUSE_OVERLAYFS_VERSION}/fuse-overlayfs-$(uname -m) \
&& chmod +x /usr/local/bin/fuse-overlayfs

# install containerd-fuse-overlayfs (used by rootless)
RUN export ARCH=$(dpkg --print-architecture | sed 's/ppc64el/ppc64le/' | sed 's/armhf/arm-v7/') \
&& echo "Installing containerd-fuse-overlayfs..." \
&& export CONTAINERD_FUSE_OVERLAYFS_BASE_URL="https://github.com/containerd/fuse-overlayfs-snapshotter/releases/download/v${CONTAINERD_FUSE_OVERLAYFS_VERSION}" \
&& curl -sSL --retry 5 --output /tmp/containerd-fuse-overlayfs.tgz "${CONTAINERD_FUSE_OVERLAYFS_BASE_URL}/containerd-fuse-overlayfs-${CONTAINERD_FUSE_OVERLAYFS_VERSION}-linux-${ARCH}.tar.gz" \
&& tar -C /usr/local/bin -xzvf /tmp/containerd-fuse-overlayfs.tgz \
&& rm -rf /tmp/containerd-fuse-overlayfs.tgz
COPY deploy/kicbase/containerd-fuse-overlayfs.service /etc/systemd/system/containerd-fuse-overlayfs.service

# install buildkit
RUN export ARCH=$(dpkg --print-architecture | sed 's/ppc64el/ppc64le/' | sed 's/armhf/arm-v7/') \
&& echo "Installing buildkit ..." \
Expand Down
13 changes: 13 additions & 0 deletions deploy/kicbase/containerd-fuse-overlayfs.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# From https://github.com/kubernetes-sigs/kind/blob/0d3780371091b2dc9ff6eea1b6054f14ff5d970a/images/base/files/etc/systemd/system/containerd-fuse-overlayfs.service
[Unit]
Description=containerd fuse-overlayfs snapshotter
PartOf=containerd.service

[Service]
ExecStart=/usr/local/bin/containerd-fuse-overlayfs-grpc /run/containerd-fuse-overlayfs.sock /var/lib/containerd-fuse-overlayfs
Type=notify
Restart=always
RestartSec=1

[Install]
WantedBy=multi-user.target
52 changes: 46 additions & 6 deletions deploy/kicbase/entrypoint
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,39 @@ set -o nounset
set -o pipefail
set -x

# If /proc/self/uid_map 4294967295 mappings, we are in the initial user namespace, i.e. the host.
# Otherwise we are in a non-initial user namespace.
# https://github.com/opencontainers/runc/blob/v1.0.0-rc92/libcontainer/system/linux.go#L109-L118
userns=""
if grep -Eqv "0[[:space:]]+0[[:space:]]+4294967295" /proc/self/uid_map; then
userns="1"
echo 'INFO: running in a user namespace (experimental)'
fi

validate_userns() {
if [[ -z "${userns}" ]]; then
return
fi

local nofile_hard
nofile_hard="$(ulimit -Hn)"
local nofile_hard_expected="64000"
if [[ "${nofile_hard}" -lt "${nofile_hard_expected}" ]]; then
echo "WARN: UserNS: expected RLIMIT_NOFILE to be at least ${nofile_hard_expected}, got ${nofile_hard}" >&2
fi

if [[ ! -f "/sys/fs/cgroup/cgroup.controllers" ]]; then
echo "ERROR: UserNS: cgroup v2 needs to be enabled, see https://rootlesscontaine.rs/getting-started/common/cgroup2/" >&2
exit 1
fi
for f in cpu memory pids; do
if ! grep -qw $f /sys/fs/cgroup/cgroup.controllers; then
echo "ERROR: UserNS: $f controller needs to be delegated, see https://rootlesscontaine.rs/getting-started/common/cgroup2/" >&2
exit 1
fi
done
}

configure_containerd() {
# we need to switch to the 'native' snapshotter on zfs
if [[ "$(stat -f -c %T /kind)" == 'zfs' ]]; then
Expand Down Expand Up @@ -73,12 +106,16 @@ fix_mount() {
sync
fi

echo 'INFO: remounting /sys read-only'
# systemd-in-a-container should have read only /sys
# https://systemd.io/CONTAINER_INTERFACE/
# however, we need other things from `docker run --privileged` ...
# and this flag also happens to make /sys rw, amongst other things
mount -o remount,ro /sys
if [[ -z "${userns}" ]]; then
echo 'INFO: remounting /sys read-only'
# systemd-in-a-container should have read only /sys
# https://systemd.io/CONTAINER_INTERFACE/
# however, we need other things from `docker run --privileged` ...
# and this flag also happens to make /sys rw, amongst other things
#
# This step is skipped when running inside UserNS, because it fails with EACCES.
mount -o remount,ro /sys
fi

echo 'INFO: making mounts shared' >&2
# for mount propagation
Expand Down Expand Up @@ -334,6 +371,9 @@ enable_network_magic(){
fi
}

# validate state
validate_userns

# run pre-init fixups
# NOTE: it's important that we do configure* first in this order to avoid races
configure_containerd
Expand Down
2 changes: 1 addition & 1 deletion hack/preload-images/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func generateTarball(kubernetesVersion, containerRuntime, tarballFilename string
if err != nil {
return errors.Wrap(err, "failed create new runtime")
}
if err := cr.Enable(true, false); err != nil {
if err := cr.Enable(true, false, false); err != nil {
return errors.Wrap(err, "enable container runtime")
}

Expand Down
27 changes: 27 additions & 0 deletions pkg/drivers/kic/oci/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,33 @@ import (
// RoutableHostIPFromInside returns the ip/dns of the host that container lives on
// is routable from inside the container
func RoutableHostIPFromInside(ociBin string, clusterName string, containerName string) (net.IP, error) {
si, err := CachedDaemonInfo(ociBin)
if err != nil {
return nil, err
}
if si.Rootless {
if IsExternalDaemonHost(ociBin) {
return nil, fmt.Errorf("function RoutableHostIPFromInside is not implemented for external rootless daemons")
// TODO: parse DaemonHost()
}
addrs, err := net.InterfaceAddrs()
if err != nil {
return nil, err
}
for _, addr := range addrs {
var ip net.IP
switch v := addr.(type) {
case *net.IPAddr:
ip = v.IP
case *net.IPNet:
ip = v.IP
}
if ip != nil && !ip.IsLoopback() {
return ip, nil
}
}
return nil, fmt.Errorf("could not detect host IP, tried %v", addrs)
}
if ociBin == Docker {
if runtime.GOOS == "linux" {
info, err := containerNetworkInspect(ociBin, clusterName)
Expand Down
3 changes: 3 additions & 0 deletions pkg/drivers/kic/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ func CreateContainerNode(p CreateParams) error {
// including some ones docker would otherwise do by default.
// for now this is what we want. in the future we may revisit this.
"--privileged",
// enable /dev/fuse explicitly for fuse-overlayfs
// (Rootless Docker does not automatically mount /dev/fuse with --privileged)
"--device", "/dev/fuse",
"--security-opt", "seccomp=unconfined", // ignore seccomp
"--tmpfs", "/tmp", // various things depend on working /tmp
"--tmpfs", "/run", // systemd wants a writable /run
Expand Down
29 changes: 25 additions & 4 deletions pkg/minikube/cruntime/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ oom_score = 0
[cgroup]
path = ""
[proxy_plugins]
# fuse-overlayfs is used for rootless
[proxy_plugins."fuse-overlayfs"]
type = "snapshot"
address = "/run/containerd-fuse-overlayfs.sock"
[plugins]
[plugins.cgroups]
no_prometheus = false
Expand All @@ -80,6 +86,7 @@ oom_score = 0
stats_collect_period = 10
enable_tls_streaming = false
max_container_log_line_size = 16384
restrict_oom_score_adj = {{ .RestrictOOMScoreAdj }}
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
Expand All @@ -90,7 +97,7 @@ oom_score = 0
SystemdCgroup = {{ .SystemdCgroup }}
[plugins.cri.containerd]
snapshotter = "overlayfs"
snapshotter = "{{ .Snapshotter }}"
[plugins.cri.containerd.default_runtime]
runtime_type = "io.containerd.runc.v2"
[plugins.cri.containerd.untrusted_workload_runtime]
Expand Down Expand Up @@ -193,23 +200,31 @@ func (r *Containerd) Available() error {
}

// generateContainerdConfig sets up /etc/containerd/config.toml
func generateContainerdConfig(cr CommandRunner, imageRepository string, kv semver.Version, forceSystemd bool, insecureRegistry []string) error {
func generateContainerdConfig(cr CommandRunner, imageRepository string, kv semver.Version, forceSystemd bool, insecureRegistry []string, inUserNamespace bool) error {
cPath := containerdConfigFile
t, err := template.New("containerd.config.toml").Parse(containerdConfigTemplate)
if err != nil {
return err
}
pauseImage := images.Pause(kv, imageRepository)
snapshotter := "overlayfs"
if inUserNamespace {
snapshotter = "fuse-overlayfs"
}
opts := struct {
PodInfraContainerImage string
SystemdCgroup bool
InsecureRegistry []string
CNIConfDir string
RestrictOOMScoreAdj bool
Snapshotter string
}{
PodInfraContainerImage: pauseImage,
SystemdCgroup: forceSystemd,
InsecureRegistry: insecureRegistry,
CNIConfDir: cni.ConfDir,
RestrictOOMScoreAdj: inUserNamespace,
Snapshotter: snapshotter,
}
var b bytes.Buffer
if err := t.Execute(&b, opts); err != nil {
Expand All @@ -223,7 +238,7 @@ func generateContainerdConfig(cr CommandRunner, imageRepository string, kv semve
}

// Enable idempotently enables containerd on a host
func (r *Containerd) Enable(disOthers, forceSystemd bool) error {
func (r *Containerd) Enable(disOthers, forceSystemd, inUserNamespace bool) error {
if disOthers {
if err := disableOthers(r, r.Runner); err != nil {
klog.Warningf("disableOthers: %v", err)
Expand All @@ -232,13 +247,19 @@ func (r *Containerd) Enable(disOthers, forceSystemd bool) error {
if err := populateCRIConfig(r.Runner, r.SocketPath()); err != nil {
return err
}
if err := generateContainerdConfig(r.Runner, r.ImageRepository, r.KubernetesVersion, forceSystemd, r.InsecureRegistry); err != nil {
if err := generateContainerdConfig(r.Runner, r.ImageRepository, r.KubernetesVersion, forceSystemd, r.InsecureRegistry, inUserNamespace); err != nil {
return err
}
if err := enableIPForwarding(r.Runner); err != nil {
return err
}

if inUserNamespace {
if err := r.Init.EnableNow("containerd-fuse-overlayfs"); err != nil {
return err
}
}

// Otherwise, containerd will fail API requests with 'Unimplemented'
return r.Init.Restart("containerd")
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/minikube/cruntime/crio.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@ func enableIPForwarding(cr CommandRunner) error {
}

// Enable idempotently enables CRIO on a host
func (r *CRIO) Enable(disOthers, _ bool) error {
func (r *CRIO) Enable(disOthers, _, inUserNamespace bool) error {
if inUserNamespace {
return errors.New("inUserNamespace must not be true for cri-o (yet)")
}
if disOthers {
if err := disableOthers(r, r.Runner); err != nil {
klog.Warningf("disableOthers: %v", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/minikube/cruntime/cruntime.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ type Manager interface {
// Version retrieves the current version of this runtime
Version() (string, error)
// Enable idempotently enables this runtime on a host
Enable(bool, bool) error
Enable(bool, bool, bool) error
// Disable idempotently disables this runtime on a host
Disable() error
// Active returns whether or not a runtime is active on a host
Expand Down
2 changes: 1 addition & 1 deletion pkg/minikube/cruntime/cruntime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ func TestEnable(t *testing.T) {
if err != nil {
t.Fatalf("New(%s): %v", tc.runtime, err)
}
err = cr.Enable(true, false)
err = cr.Enable(true, false, false)
if err != nil {
t.Errorf("%s disable unexpected error: %v", tc.runtime, err)
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/minikube/cruntime/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ func (r *Docker) Active() bool {
}

// Enable idempotently enables Docker on a host
func (r *Docker) Enable(disOthers, forceSystemd bool) error {
func (r *Docker) Enable(disOthers, forceSystemd, inUserNamespace bool) error {
if inUserNamespace {
return errors.New("inUserNamespace must not be true for docker")
}
containerdWasActive := r.Init.Active("containerd")

if disOthers {
Expand Down
10 changes: 9 additions & 1 deletion pkg/minikube/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,15 @@ func NeedsPortForward(name string) bool {
return true
}
// Docker for Desktop
return runtime.GOOS == "darwin" || runtime.GOOS == "windows" || detect.IsMicrosoftWSL()
if runtime.GOOS == "darwin" || runtime.GOOS == "windows" || detect.IsMicrosoftWSL() {
return true
}

si, err := oci.CachedDaemonInfo(name)
if err != nil {
panic(err)
}
return si.Rootless
}

// HasResourceLimits returns true if driver can set resource limits such as memory size or CPU count.
Expand Down
3 changes: 2 additions & 1 deletion pkg/minikube/node/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,8 @@ func configureRuntimes(runner cruntime.CommandRunner, cc config.ClusterConfig, k
}
}

err = cr.Enable(disableOthers, forceSystemd())
inUserNamespace := strings.Contains(cc.KubernetesConfig.FeatureGates, "KubeletInUserNamespace=true")
err = cr.Enable(disableOthers, forceSystemd(), inUserNamespace)
if err != nil {
exit.Error(reason.RuntimeEnable, "Failed to enable container runtime", err)
}
Expand Down
9 changes: 1 addition & 8 deletions pkg/minikube/registry/drvs/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,7 @@ func status() (retState registry.State) {
return suggestFix("info", -1, serr, fmt.Errorf("docker info error: %s", serr))
}

if si.Rootless {
return registry.State{
Reason: "PROVIDER_DOCKER_ROOTLESS",
Error: errors.New("rootless Docker not supported yet"),
Installed: true,
Healthy: false,
Doc: "https://github.com/kubernetes/minikube/issues/10836"}
}
// TODO: validate cgroup v2 delegation when si.Rootless is true

return checkNeedsImprovement()
}
Expand Down
Loading