Skip to content

Commit

Permalink
rootless: remove prerequisite of "net.netfilter.nf_conntrack_max"
Browse files Browse the repository at this point in the history
We can skip setting net.netfilter.nf_conntrack_max and facing an error,
just by setting `kubeProxyConfiguration.conntrack.maxPerCore` to 0.

Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed Mar 3, 2021
1 parent 99077d5 commit fd99e3c
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 78 deletions.
1 change: 0 additions & 1 deletion images/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ RUN echo "Ensuring scripts are executable ..." \
libseccomp2 pigz \
bash ca-certificates curl rsync \
nfs-common \
jq \
&& find /lib/systemd/system/sysinit.target.wants/ -name "systemd-tmpfiles-setup.service" -delete \
&& rm -f /lib/systemd/system/multi-user.target.wants/* \
&& rm -f /etc/systemd/system/*.wants/* \
Expand Down
2 changes: 0 additions & 2 deletions images/base/files/etc/containerd/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ version = 2
default_runtime_name = "runc"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = "runc"

# Setup a runtime with the magic name ("test-handler") used for Kubernetes
# runtime class tests ...
Expand Down
16 changes: 0 additions & 16 deletions images/base/files/usr/local/bin/entrypoint
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,6 @@ validate_userns() {
exit 1
fi

if ! [ -f "/proc/sys/net/netfilter/nf_conntrack_max" ]; then
echo "ERROR: UserNS: /proc/sys/net/netfilter/nf_conntrack_max does not exist (needs kernel 5.7 or later)" >&2
fi
local nf_conntrack_max
nf_conntrack_max="$(cat /proc/sys/net/netfilter/nf_conntrack_max)"
local nf_conntrack_max_expected="$((32768 * $(nproc)))"
if [[ "${nf_conntrack_max}" != "${nf_conntrack_max_expected}" ]]; then
# This ERROR can be demoted to WARNING when k/k PR gets merged: https://github.com/kubernetes/kubernetes/pull/92863
echo "ERROR: UserNS: expected net.netfilter.nf_conntrack_max to be ${nf_conntrack_max_expected}, got ${nf_conntrack_max}" >&2
exit 1
fi

local dmesg_restrict
dmesg_restrict="$(cat /proc/sys/kernel/dmesg_restrict)"
if [[ "${dmesg_restrict}" != "0" ]]; then
Expand Down Expand Up @@ -129,10 +117,6 @@ configure_containerd() {
fake_sysctl "kernel.panic_on_oops"
fake_sysctl "kernel.keys.root_maxkeys"
fake_sysctl "kernel.keys.root_maxbytes"

# Wrap runc to mount fake "/sys/module/nf_conntrack/parameters/hashsize" for kube-proxy.
# Workaround until https://github.com/kubernetes/kubernetes/pull/92863 gets merged in the upstream.
sed -i 's/BinaryName = "runc"/BinaryName = "userns-ociwrapper"/' /etc/containerd/config.toml
fi
}

Expand Down
51 changes: 0 additions & 51 deletions images/base/files/usr/local/bin/userns-ociwrapper

This file was deleted.

6 changes: 6 additions & 0 deletions pkg/cluster/internal/create/actions/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ func (a *Action) Execute(ctx *actions.ActionContext) error {
ctx.Status.Start("Writing configuration 📜")
defer ctx.Status.End(false)

providerInfo, err := ctx.Provider.Info()
if err != nil {
return err
}

allNodes, err := ctx.Nodes()
if err != nil {
return err
Expand Down Expand Up @@ -76,6 +81,7 @@ func (a *Action) Execute(ctx *actions.ActionContext) error {
IPv6: ctx.Config.Networking.IPFamily == "ipv6",
FeatureGates: ctx.Config.FeatureGates,
RuntimeConfig: ctx.Config.RuntimeConfig,
RootlessProvider: providerInfo.Rootless,
}

kubeadmConfigPlusPatches := func(node nodes.Node, data kubeadm.ConfigData) func() error {
Expand Down
15 changes: 15 additions & 0 deletions pkg/cluster/internal/kubeadm/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ type ConfigData struct {
// These auto-generated fields are available to Config templates,
// but not meant to be set by hand
DerivedConfigData

// Provider is running with rootless mode, so kube-proxy needs to be configured
// not to fail on sysctl error.
RootlessProvider bool
}

// DerivedConfigData fields are automatically derived by
Expand Down Expand Up @@ -382,6 +386,14 @@ mode: "{{ .KubeProxyMode }}"
{{end}}{{end}}
iptables:
minSyncPeriod: 1s
{{if .RootlessProvider}}conntrack:
# Skip setting sysctl value "net.netfilter.nf_conntrack_max"
maxPerCore: 0
# Skip setting "net.netfilter.nf_conntrack_tcp_timeout_established"
tcpEstablishedTimeout: 0s
# Skip setting "net.netfilter.nf_conntrack_tcp_timeout_close"
tcpCloseWaitTimeout: 0s
{{end}}
`

// Config returns a kubeadm config generated from config data, in particular
Expand All @@ -400,6 +412,9 @@ func Config(data ConfigData) (config string, err error) {
// assume the latest API version, then fallback if the k8s version is too low
templateSource := ConfigTemplateBetaV2
if ver.LessThan(version.MustParseSemantic("v1.15.0")) {
if data.RootlessProvider {
return "", errors.Errorf("version %q is not compatible with rootless provider", ver)
}
templateSource = ConfigTemplateBetaV1
}

Expand Down
32 changes: 32 additions & 0 deletions pkg/cluster/internal/providers/docker/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package docker

import (
"encoding/csv"
"encoding/json"
"fmt"
"net"
"os"
Expand Down Expand Up @@ -281,3 +283,33 @@ func (p *provider) CollectLogs(dir string, nodes []nodes.Node) error {
errs = append(errs, errors.AggregateConcurrent(fns))
return errors.NewAggregate(errs)
}

// Info returns the provider info.
func (p *provider) Info() (*providers.ProviderInfo, error) {
cmd := exec.Command("docker", "info", "--format", "{{json .SecurityOptions}}")
out, err := exec.Output(cmd)
if err != nil {
return nil, errors.Wrap(err, "failed to get docker info")
}
var securityOptions []string
if err := json.Unmarshal(out, &securityOptions); err != nil {
return nil, err
}
var info providers.ProviderInfo
for _, o := range securityOptions {
// o is like "name=seccomp,profile=default", or "name=rootless",
csvReader := csv.NewReader(strings.NewReader(o))
sliceSlice, err := csvReader.ReadAll()
if err != nil {
return nil, err
}
for _, f := range sliceSlice {
for _, ff := range f {
if ff == "name=rootless" {
info.Rootless = true
}
}
}
}
return &info, nil
}
8 changes: 8 additions & 0 deletions pkg/cluster/internal/providers/podman/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,11 @@ func (p *provider) CollectLogs(dir string, nodes []nodes.Node) error {
errs = append(errs, errors.AggregateConcurrent(fns))
return errors.NewAggregate(errs)
}

// Info returns the provider info.
func (p *provider) Info() (*providers.ProviderInfo, error) {
info := &providers.ProviderInfo{
Rootless: os.Geteuid() != 0,
}
return info, nil
}
7 changes: 7 additions & 0 deletions pkg/cluster/internal/providers/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,11 @@ type Provider interface {
GetAPIServerInternalEndpoint(cluster string) (string, error)
// CollectLogs will populate dir with cluster logs and other debug files
CollectLogs(dir string, nodes []nodes.Node) error
// Info returns the provider info
Info() (*ProviderInfo, error)
}

// ProviderInfo is the info of the provider
type ProviderInfo struct {
Rootless bool
}
8 changes: 0 additions & 8 deletions site/content/docs/user/rootless.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@ Starting with kind 0.11.0 and Docker 20.10, Rootless Docker can be used as the n
Rootless Podman is not supported at the moment.

## Host requirements
### Kernel
The kernel needs to be 5.7 or later currently.
In future, we may be able to support a broader range of the kernel version.

### cgroup v2
The host needs to be running with cgroup v2.

cgroup v2 is enabled by default on Fedora.
Expand All @@ -32,12 +27,9 @@ Delegate=yes

- Create `/etc/sysctl.d/99-rootless.conf` with the following content, and then run `sudo sysctl --system`:
```
net.netfilter.nf_conntrack_max=<32768 * the number of CPUs>
kernel.dmesg_restrict=0
```

e.g, When the number of CPUs (`nproc`) is 4, `net.netfilter.nf_conntrack_max=131072`.

## Restrictions

The restrictions of Rootless Docker apply to kind clusters as well.
Expand Down

0 comments on commit fd99e3c

Please sign in to comment.