diff --git a/benchexec/container.py b/benchexec/container.py index ba6dcfd95..603ca576e 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -14,12 +14,15 @@ import fcntl import logging import os +import re import resource # noqa: F401 @UnusedImport necessary to eagerly import this module import shlex +import shutil import signal import socket import struct import sys +import subprocess from benchexec import libc from benchexec import seccomp @@ -475,6 +478,28 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): overlay_count = 0 + # Check if we need to use fuse-overlayfs for all overlay mounts. + use_fuse = check_use_fuse_overlayfs(mount_base, dir_modes) + + # Create overlay mounts for all mount points. + fuse_overlay_mount_path = ( + setup_fuse_overlay(temp_base, work_base) if use_fuse else None + ) + + # For some reason there is a deadlock if our temp dir is not hidden, + # and we did not manage to solve this by forcing the mode to hidden here. + # The whole temp directory of the system needs to be hidden to have this working. + # cf. https://github.com/sosy-lab/benchexec/pull/1062#discussion_r1732494331 + if fuse_overlay_mount_path and ( + (temp_dir_mode := determine_directory_mode(dir_modes, temp_base)) + not in [None, DIR_HIDDEN] + ): + raise OSError( + "BenchExec needs to use fuse-overlayfs but the directory mode of " + f'the temp directory is "{temp_dir_mode}", which would lead to a deadlock. ' + 'Please use the default "hidden" directory mode for the temp directory.' + ) + for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): if not util.path_is_below(full_mountpoint, mount_base): continue @@ -529,33 +554,87 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): temp_path = temp_base + mountpoint if mode == DIR_OVERLAY: - overlay_count += 1 - work_path = work_base + b"/" + str(overlay_count).encode() - os.makedirs(temp_path, exist_ok=True) - os.makedirs(work_path, exist_ok=True) - try: - # Previous mount in this place not needed if replaced with overlay dir. - libc.umount(mount_path) - except OSError as e: - logging.debug(e) - try: - make_overlay_mount(mount_path, mountpoint, temp_path, work_path) - except OSError as e: - mp = mountpoint.decode() - raise OSError( - e.errno, - f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " - f"Please use other directory modes, " - f"for example '--read-only-dir {shlex.quote(mp)}'.", - ) + if os.path.ismount(mount_path): + try: + # Previous mount in this place not needed if replaced with overlay dir. + libc.umount(mount_path) + except OSError as e: + logging.debug(e) + + if use_fuse and fuse_overlay_mount_path: + fuse_mount_path = fuse_overlay_mount_path + mountpoint + make_bind_mount(fuse_mount_path, mount_path) + else: + overlay_count += 1 + os.makedirs(temp_path, exist_ok=True) + work_path = work_base + b"/" + str(overlay_count).encode() + os.makedirs(work_path, exist_ok=True) + try: + make_overlay_mount(mount_path, mountpoint, temp_path, work_path) + except OSError as e: + # Resort to fuse-overlayfs if kernel overlayfs is not available. + # This part of the code (using fuse-overlayfs as a fallback) is intentionally + # kept as a workaround for triple-nested execution with kernel overlayfs. + mp = mountpoint.decode() + if fuse_overlay_mount_path: + logging.debug( + "Fallback to fuse-overlayfs for overlay mount at '%s'.", + mp, + ) + fuse_mount_path = fuse_overlay_mount_path + mountpoint + make_bind_mount(fuse_mount_path, mount_path) + else: + if use_fuse: + # We tried to use overlayfs before, but it failed. + # No need to try again, just log the error accordingly. + if os.getenv("container") == "podman" or os.path.exists( + "/run/.containerenv" + ): + # benchexec running in a container without /dev/fuse + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Looks like BenchExec is running in a container, " + f"please either launch the container with '--device /dev/fuse' " + f"or use a different directory mode, " + f"such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e + else: + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Please either install version 1.10 or higher of fuse-overlayfs, " + f"or use a different directory mode such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e + else: + # We should try fuse-overlayfs here, this could handle triple-nested benchexec. + # (cf. https://github.com/sosy-lab/benchexec/issues/1067 + fuse_overlay_mount_path = setup_fuse_overlay( + temp_base, work_base + ) + if fuse_overlay_mount_path: + logging.debug( + "Fallback to fuse-overlayfs for overlay mount at '%s'.", + mp, + ) + fuse_mount_path = fuse_overlay_mount_path + mountpoint + make_bind_mount(fuse_mount_path, mount_path) + else: + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Please either install version 1.10 or higher of fuse-overlayfs, " + f"or use a different directory mode such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) - try: - # Previous mount in this place not needed if replaced with hidden dir. - libc.umount(mount_path) - except OSError as e: - logging.debug(e) + if os.path.ismount(mount_path): + try: + # Previous mount in this place not needed if replaced with hidden dir. + libc.umount(mount_path) + except OSError as e: + logging.debug(e) make_bind_mount(temp_path, mount_path) elif mode == DIR_READ_ONLY: @@ -638,7 +717,7 @@ def determine_directory_mode(dir_modes, path, fstype=None): result_mode == DIR_OVERLAY and fstype and ( - fstype.startswith(b"fuse.") + (fstype.startswith(b"fuse.") and fstype != b"fuse.fuse-overlayfs") or fstype == b"autofs" or fstype == b"vfat" or fstype == b"ntfs" @@ -716,6 +795,25 @@ def remount_with_additional_flags(mountpoint, fstype, existing_options, mountfla libc.mount(None, mountpoint, None, mountflags, None) +def escape_overlayfs_parameters(s): + """ + Safely encode a string for being used as a path for both kernel overlayfs + and fuse-overlayfs. + In addition to escaping ",", which separates mount options, + we need to escape ":", which overlayfs uses to separate multiple lower dirs + (cf. https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt). + Also, the path shall be nomalized to avoid issues with "//" in the beginning + (cf. https://github.com/sosy-lab/benchexec/pull/1062). + """ + assert s[0] == ord(b"/"), "Path must be absolute" + normalized_path = b"/" + s.lstrip(b"/") + return ( + normalized_path.replace(b"\\", rb"\\") + .replace(b":", rb"\:") + .replace(b",", rb"\,") + ) + + def make_overlay_mount(mount, lower, upper, work): logging.debug( "Creating overlay mount: target=%s, lower=%s, upper=%s, work=%s", @@ -725,28 +823,191 @@ def make_overlay_mount(mount, lower, upper, work): work, ) - def escape(s): - """ - Safely encode a string for being used as a path for overlayfs. - In addition to escaping ",", which separates mount options, - we need to escape ":", which overlayfs uses to separate multiple lower dirs - (cf. https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt). - """ - return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,") - libc.mount( b"none", mount, b"overlay", 0, b"lowerdir=" - + escape(lower) + + escape_overlayfs_parameters(lower) + + b",upperdir=" + + escape_overlayfs_parameters(upper) + + b",workdir=" + + escape_overlayfs_parameters(work), + ) + + +def check_use_fuse_overlayfs(mount_base, dir_modes): + """ + Check whether an overlay mountpoint requires the use of fuse-overlayfs + by determining if there are any sub-mounts below it. + """ + mount_points = [ + (full_mountpoint, fstype) + for _unused_source, full_mountpoint, fstype, _options in get_mount_points() + if util.path_is_below(full_mountpoint, mount_base) + ] + + for full_mountpoint, fstype in mount_points: + mountpoint = full_mountpoint[len(mount_base) :] or b"/" + mode = determine_directory_mode(dir_modes, mountpoint, fstype) + + if not mode or not os.path.exists(mountpoint): + continue + + if mode == DIR_OVERLAY: + # Check if there are any sub-mounts within the current overlay mount point + for sub_mountpoint, _unused_fstype in mount_points: + if ( + util.path_is_below(sub_mountpoint, mountpoint) + and sub_mountpoint != mountpoint + ): + logging.debug( + "Using fuse-overlayfs because of mount on '%s'", + mountpoint.decode(), + ) + return True + + return False + + +@contextlib.contextmanager +def permitted_cap_as_ambient(): + """ + Transfer all permitted capabilities to the inheritable set + and raise them in the ambient set if effective. + Finanlly drop all ambient capabilities by removing them from the ambient set, + and undo changes made to inheritable set. + + Used by fuse-based overlay mounts needing temporary capability elevation. + """ + header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) + data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + + libc.capget(header, data) + original_inheritable = [data[0].inheritable, data[1].inheritable] + cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") + + try: + data[0].inheritable = data[0].permitted + data[1].inheritable = data[1].permitted + libc.capset(header, data) + + effective = (data[1].effective << 32) | data[0].effective + for cap in range(cap_last_cap + 1): + if effective & (1 << cap): + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0) + + yield + finally: + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) + + data[0].inheritable, data[1].inheritable = original_inheritable + libc.capset(header, data) + + +def get_fuse_overlayfs_executable(): + """ + Retrieve the path to the fuse-overlayfs executable + if it is available and meets the version requirement. + + @return: The path to fuse-overlayfs executable if found and valid, None otherwise. + """ + fuse = shutil.which("fuse-overlayfs") + if fuse is None: + return None + + try: + result = subprocess.run( + args=(fuse, "--version"), + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + output = result.stdout + except subprocess.CalledProcessError as e: + logging.warning("%s not available: %s", fuse, e) + return None + + if match := re.search( + r"^fuse-overlayfs:.*?(\d+\.\d+(\.\d+)?)", output, re.MULTILINE + ): + version = [int(part) for part in match[1].split(".")] + if version >= [1, 10]: + logging.debug("%s version: %s", fuse, match[1]) + return fuse + else: + logging.warning( + "Ignoring %s because its version %s is broken. " + "Please install version 1.10 or newer.", + fuse, + match[1], + ) + return None + else: + logging.warning( + "Could not find version information of %s in output, but still attempt to use it.", + fuse, + ) + return fuse + + +def setup_fuse_overlay(temp_base, work_base): + """ + Check if fuse-overlayfs is available on the system and, + if so, creates a temporary overlay filesystem by stacking the root directory + with a specified temporary directory. + + @return: The path to the mounted overlay filesystem if successful, None otherwise. + """ + fuse = get_fuse_overlayfs_executable() + if fuse is None: + return None + temp_fuse = work_base + b"/fuse_mount" + work_fuse = work_base + b"/fuse_work" + os.makedirs(temp_fuse, exist_ok=True) + os.makedirs(work_fuse, exist_ok=True) + + logging.debug( + "Creating overlay mount with %s: target=%s, lower=%s, upper=%s, work=%s", + fuse, + temp_fuse, + b"/", + temp_base, + work_fuse, + ) + + cmd = ( + fuse, + b"-o", + b"lowerdir=/" + b",upperdir=" - + escape(upper) + + escape_overlayfs_parameters(temp_base) + b",workdir=" - + escape(work), + + escape_overlayfs_parameters(work_fuse), + escape_overlayfs_parameters(temp_fuse), ) + try: + with permitted_cap_as_ambient(): + # Temporarily elevate permitted capabilities to the inheritable set + # and raise them in the ambient set. + result = subprocess.run( + args=cmd, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + if result.stdout: + logging.debug("fuse-overlayfs: %s", result.stdout.decode()) + return temp_fuse + except subprocess.CalledProcessError as e: + logging.debug("Failed to create overlay mount with %s: %s", fuse, e) + return None + def mount_proc(container_system_config): """Mount the /proc filesystem. diff --git a/benchexec/containerized_tool.py b/benchexec/containerized_tool.py index 47856b7d5..3ecf46874 100644 --- a/benchexec/containerized_tool.py +++ b/benchexec/containerized_tool.py @@ -206,7 +206,6 @@ def _init_container( # Container config container.setup_user_mapping(os.getpid(), uid, gid) - _setup_container_filesystem(temp_dir, dir_modes, container_system_config) if container_system_config: socket.sethostname(container.CONTAINER_HOSTNAME) if not network_access: @@ -225,6 +224,10 @@ def _init_container( os.waitpid(pid, 0) os._exit(0) + # We setup the container's filesystem in the child process. + # Delaying this until after the fork can avoid "Transport endpoint not connected" issue. + _setup_container_filesystem(temp_dir, dir_modes, container_system_config) + # Finalize container setup in child container.mount_proc(container_system_config) # only possible in child container.drop_capabilities() diff --git a/benchexec/libc.py b/benchexec/libc.py index 2a808ec7e..f000c2d6f 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -184,14 +184,27 @@ class CapData(_ctypes.Structure): _ctypes.POINTER(CapData * 2), ] +capget = _libc.capget +"""Get the capabilities of the current thread.""" +capget.errcheck = _check_errno +capget.argtypes = [ + _ctypes.POINTER(CapHeader), + _ctypes.POINTER(CapData * 2), +] + LINUX_CAPABILITY_VERSION_3 = 0x20080522 # /usr/include/linux/capability.h +LINUX_CAPABILITY_U32S_3 = 2 # /usr/include/linux/capability.h CAP_SYS_ADMIN = 21 # /usr/include/linux/capability.h +PR_CAP_AMBIENT = 47 # /usr/include/linux/prctl.h +PR_CAP_AMBIENT_RAISE = 2 # /usr/include/linux/prctl.h +PR_CAP_AMBIENT_CLEAR_ALL = 4 # /usr/include/linux/prctl.h prctl = _libc.prctl """Modify options of processes: http://man7.org/linux/man-pages/man2/prctl.2.html""" prctl.errcheck = _check_errno prctl.argtypes = [c_int, c_ulong, c_ulong, c_ulong, c_ulong] + # /usr/include/linux/prctl.h PR_SET_DUMPABLE = 4 PR_GET_SECCOMP = 21 diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index ca84c1b95..6024b7ced 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1199,6 +1199,130 @@ def test_uptime_without_lxcfs(self): uptime, 10, f"Uptime {uptime}s unexpectedly low in container" ) + def test_fuse_overlay(self): + if not container.get_fuse_overlayfs_executable(): + self.skipTest("fuse-overlayfs not available") + with tempfile.TemporaryDirectory(prefix="BenchExec_test_") as temp_dir: + test_file_path = os.path.join(temp_dir, "test_file") + with open(test_file_path, "wb") as test_file: + test_file.write(b"TEST_TOKEN") + + self.setUp( + dir_modes={ + "/": containerexecutor.DIR_READ_ONLY, + "/home": containerexecutor.DIR_HIDDEN, + "/tmp": containerexecutor.DIR_HIDDEN, + temp_dir: containerexecutor.DIR_OVERLAY, + }, + ) + result, output = self.execute_run( + "/bin/sh", + "-c", + f"if [ $({self.cat} {test_file_path}) != TEST_TOKEN ]; then exit 1; fi; \ + {self.echo} TOKEN_CHANGED >{test_file_path}", + ) + self.check_result_keys(result, "returnvalue") + self.check_exitcode(result, 0, "exit code of inner runexec is not zero") + self.assertTrue( + os.path.exists(test_file_path), + f"File '{test_file_path}' removed, output was:\n" + "\n".join(output), + ) + with open(test_file_path, "rb") as test_file: + test_token = test_file.read() + self.assertEqual( + test_token.strip(), + b"TEST_TOKEN", + f"File '{test_file_path}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", + ) + + def test_triple_nested_runexec(self): + if not container.get_fuse_overlayfs_executable(): + self.skipTest("missing fuse-overlayfs") + + # Check if COV_CORE_SOURCE environment variable is set and remove it. + # This is necessary because the coverage tool will not work in the nested runexec. + coverage_env_var = os.environ.pop("COV_CORE_SOURCE", None) + + with tempfile.TemporaryDirectory(prefix="BenchExec_test_") as temp_dir: + overlay_dir = os.path.join(temp_dir, "overlay") + os.makedirs(overlay_dir) + test_file = os.path.join(overlay_dir, "TEST_FILE") + output_dir = os.path.join(temp_dir, "output") + os.makedirs(output_dir) + mid_output_file = os.path.join(output_dir, "mid_output.log") + inner_output_file = os.path.join(output_dir, "inner_output.log") + with open(test_file, "w") as f: + f.write("TEST_TOKEN") + f.seek(0) + + outer_cmd = [ + "python3", + runexec, + "--full-access-dir", + "/", + "--overlay-dir", + overlay_dir, + "--full-access-dir", + output_dir, + "--hidden-dir", + "/tmp", + "--output", + mid_output_file, + "--", + ] + mid_cmd = [ + "python3", + runexec, + "--full-access-dir", + "/", + "--overlay-dir", + overlay_dir, + "--full-access-dir", + output_dir, + "--hidden-dir", + "/tmp", + "--output", + inner_output_file, + "--", + ] + inner_cmd = [ + "/bin/sh", + "-c", + f"if [ $({self.cat} {test_file}) != TEST_TOKEN ]; then exit 1; fi; {self.echo} TOKEN_CHANGED >{test_file}", + ] + combined_cmd = outer_cmd + mid_cmd + inner_cmd + + self.setUp( + dir_modes={ + "/": containerexecutor.DIR_FULL_ACCESS, + "/tmp": containerexecutor.DIR_HIDDEN, + overlay_dir: containerexecutor.DIR_OVERLAY, + output_dir: containerexecutor.DIR_FULL_ACCESS, + }, + ) + outer_result, outer_output = self.execute_run(*combined_cmd) + self.check_result_keys(outer_result, "returnvalue") + self.check_exitcode( + outer_result, 0, "exit code of outer runexec is not zero" + ) + with open(mid_output_file, "r") as f: + self.assertIn("returnvalue=0", f.read().strip().splitlines()) + self.assertTrue( + os.path.exists(test_file), + f"File '{test_file}' removed, output was:\n" + "\n".join(outer_output), + ) + with open(test_file, "r") as f: + test_token = f.read() + self.assertEqual( + test_token.strip(), + "TEST_TOKEN", + f"File '{test_file}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", + ) + + # Restore COV_CORE_SOURCE environment variable + if coverage_env_var is not None: + os.environ["COV_CORE_SOURCE"] = coverage_env_var + class _StopRunThread(threading.Thread): def __init__(self, delay, runexecutor): diff --git a/debian/control b/debian/control index 73ddc6d2e..287cc2ea8 100644 --- a/debian/control +++ b/debian/control @@ -20,7 +20,7 @@ Package: benchexec Architecture: all Pre-Depends: ${misc:Pre-Depends} Depends: ${python3:Depends}, python3-pkg-resources, ${misc:Depends}, ucf -Recommends: cpu-energy-meter, libseccomp2, lxcfs, python3-coloredlogs, python3-pystemd +Recommends: cpu-energy-meter, fuse-overlayfs (>= 1.10), libseccomp2, lxcfs, python3-coloredlogs, python3-pystemd Description: Framework for Reliable Benchmarking and Resource Measurement BenchExec allows benchmarking non-interactive tools on Linux systems. It measures CPU time, wall time, and memory usage of a tool, diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 401b13551..8b6e96f59 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -20,6 +20,7 @@ SPDX-License-Identifier: Apache-2.0 The following packages are optional but recommended dependencies: - [cpu-energy-meter] will let BenchExec measure energy consumption on Intel CPUs. +- [fuse-overlayfs] (version 1.10 or newer) allows to use the overlay directory mode for containers in cases where the kernel-based overlayfs does not work. - [libseccomp2] provides better container isolation. - [LXCFS] provides better container isolation. - [coloredlogs] provides nicer log output. @@ -115,7 +116,7 @@ Of course you can also install BenchExec in a virtualenv if you are familiar wit On systems without systemd you can omit the `[systemd]` part. Please make sure to configure cgroups as [described below](#setting-up-cgroups) -and install [cpu-energy-meter], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. +and install [cpu-energy-meter], [fuse-overlayfs], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. ### Containerized Environments @@ -137,7 +138,7 @@ otherwise pip will try to download and build this module, which needs a compiler and several development header packages. Please make sure to configure cgroups as [described below](#setting-up-cgroups) -and install [cpu-energy-meter], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. +and install [cpu-energy-meter], [fuse-overlayfs], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. ## Kernel Requirements @@ -155,7 +156,7 @@ on **Linux 5.11 or newer**, so we suggest at least this kernel version. And if your system is using cgroups v2 (cf. below), the full feature set requires **Linux 5.19 or newer**. -On kernels than 5.11, you need to avoid using the overlay filesystem (cf. below), +On kernels older than 5.11, you need to avoid using the kernel-based overlay filesystem (cf. below), all other features are supported. However, we strongly recommend to use at least **Linux 4.14 or newer** because it reduces the overhead of BenchExec's memory measurements and limits. @@ -188,8 +189,13 @@ that are not usable on all distributions by default: - **Unprivileged Overlay Filesystem**: This is only available since Linux 5.11 (kernel option `CONFIG_OVERLAY_FS`), but also present in all Ubuntu kernels, even older ones. - Users of older kernels on other distributions can still use container mode, but have to choose a different mode - of mounting the file systems in the container, e.g., with `--read-only-dir /` (see below). + Users of older kernels on other distributions can still use container mode, + but have to install [fuse-overlayfs] or choose a different mode + of mounting the file systems in the container, e.g., with `--read-only-dir /` + (cf. [container configuration](container.md#directory-access-modes)). + Note that the kernel-based overlayfs does not support some specific configurations + (such as the default mode of overlay for `/`), + so [fuse-overlayfs] is often useful or required anyway. If container mode does not work, please check the [common problems](container.md#common-problems). @@ -382,6 +388,7 @@ Please refer to the [development instructions](DEVELOPMENT.md). [coloredlogs]: https://pypi.org/project/coloredlogs/ [cpu-energy-meter]: https://github.com/sosy-lab/cpu-energy-meter +[fuse-overlayfs]: https://github.com/containers/fuse-overlayfs [libseccomp2]: https://github.com/seccomp/libseccomp [LXCFS]: https://github.com/lxc/lxcfs [pqos]: https://github.com/intel/intel-cmt-cat/tree/master/pqos diff --git a/doc/benchexec-in-container.md b/doc/benchexec-in-container.md index 05449a7df..d6df4faa3 100644 --- a/doc/benchexec-in-container.md +++ b/doc/benchexec-in-container.md @@ -47,7 +47,8 @@ or ``` docker run --privileged --cap-drop=all -t my-container benchexec ``` - +If you want BenchExec to use `fuse-overlayfs` in the container, +also specify `--device /dev/fuse`. ## BenchExec in Interactive Containers diff --git a/doc/container.md b/doc/container.md index 46d89ab59..0d9c99cb4 100644 --- a/doc/container.md +++ b/doc/container.md @@ -69,7 +69,9 @@ For each directory in the container one of the following four access modes can b Writes to this directory will not be visible on the host. - **read-only**: This directory is visible in the container, but read-only. - **overlay**: This directory is visible in the container and - an [overlay filesystem](https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt) + an overlay filesystem (either from the + [kernel](https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt) + or [fuse-overlayfs]) is layered on top of it that redirects all write accesses. This means that write accesses are possible in the container, but the effect of any write is not visible on the host, only inside the container, and not written to disk. @@ -205,13 +207,13 @@ You can still use BenchExec if you completely disable the container mode with `- #### `Failed to configure container: [Errno 19] Creating overlay mount for '...' failed: No such device` Your kernel does not support the overlay filesystem, please check the [system requirements](INSTALL.md#kernel-requirements). -You can use a different access mode for directories, e.g., with `--read-only-dir /`. +You can use [fuse-overlayfs] or a different access mode for directories, e.g., with `--read-only-dir /`. If some directories need to be writable, specify other directory modes for these directories as described above. #### `Failed to configure container: [Errno 1] Creating overlay mount for '...' failed: Operation not permitted` Your kernel does not allow mounting the overlay filesystem inside a container. -For this you need either Ubuntu or kernel version 5.11 or newer. -Alternatively, if you cannot use either, +For this you need either Ubuntu, [fuse-overlayfs], or kernel version 5.11 or newer. +Alternatively, if you cannot use any of these, you can use a different access mode for directories, e.g., with `--read-only-dir /`. If some directories need to be writable, specify other directory modes for these directories as described above. @@ -226,6 +228,9 @@ Another limitation of the kernel is that one can only nest overlays twice, so if you want to run a container inside a container inside a container, at least one of these needs to use a non-overlay mode for this path. +We recommend the installation of [fuse-overlayfs] in version 1.10 or newer, +which supports all of these use cases. + #### `Cannot change into working directory inside container: [Errno 2] No such file or directory` Either you have specified an invalid directory as working directory with `--dir`, or your current directory on the host is hidden inside the container @@ -253,3 +258,5 @@ If it still occurs, please attach to all child process of BenchExec with `sudo gdb -p `, get a stack trace with `bt`, and [report an issue](https://github.com/sosy-lab/benchexec/issues/new) with as much information as possible. BenchExec will usually be able to continue if the hanging child process is killed. + +[fuse-overlayfs]: https://github.com/containers/fuse-overlayfs