From be28a5faba99347bb5d05541b1874854b485f6a9 Mon Sep 17 00:00:00 2001 From: younghojan Date: Tue, 2 Jul 2024 10:20:01 +0800 Subject: [PATCH 01/32] feat: Implement FUSE-based overlay mount for containerexec and runexec By preserving all capabilities granted in the parent user namespace for the child process, we successfully utilize fuse-overlayfs (fusermount) to perform overlay mounts. This enhancement is effective when using containerexec and runexec, as benchexec creates containers using unshare rather than cloning a new process. Support for benchexec is currently under development. --- benchexec/container.py | 85 ++++++++++++++++++++++++++++----- benchexec/containerexecutor.py | 7 +++ benchexec/containerized_tool.py | 1 + benchexec/libc.py | 10 ++++ 4 files changed, 92 insertions(+), 11 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index ca49ed4dd..edceec9d6 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -19,6 +19,7 @@ import socket import struct import sys +import subprocess from benchexec import libc from benchexec import seccomp @@ -33,6 +34,7 @@ "get_mount_points", "remount_with_additional_flags", "make_overlay_mount", + "make_fuse_overlay_mount", "mount_proc", "make_bind_mount", "get_my_pid_from_procfs", @@ -507,13 +509,16 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): work_path = work_base + b"/" + str(overlay_count).encode() os.makedirs(temp_path, exist_ok=True) os.makedirs(work_path, exist_ok=True) + if os.path.ismount(mount_path): + try: + # Previous mount in this place not needed if replaced with overlay dir. + # libc.umount(mount_path) + libc.umount2(mount_path, libc.MNT_DETACH) # lazy umount + except OSError as e: + logging.debug(e) try: - # Previous mount in this place not needed if replaced with overlay dir. - libc.umount(mount_path) - except OSError as e: - logging.debug(e) - try: - make_overlay_mount(mount_path, mountpoint, temp_path, work_path) + # make_overlay_mount(mount_path, mountpoint, temp_path, work_path) + make_fuse_overlay_mount(mount_path, mountpoint, temp_path, work_path) except OSError as e: mp = mountpoint.decode() raise OSError( @@ -525,11 +530,12 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) - try: - # Previous mount in this place not needed if replaced with hidden dir. - libc.umount(mount_path) - except OSError as e: - logging.debug(e) + if os.path.ismount(mount_path): + try: + # Previous mount in this place not needed if replaced with hidden dir. + libc.umount(mount_path) + except OSError as e: + logging.debug(e) make_bind_mount(temp_path, mount_path) elif mode == DIR_READ_ONLY: @@ -722,6 +728,37 @@ def escape(s): ) +def make_fuse_overlay_mount(mount, lower, upper, work): + logging.debug( + "Creating overlay mount: target=%s, lower=%s, upper=%s, work=%s", + mount, + lower, + upper, + work, + ) + + def escape(s): + return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,") + + cmd = ( + b"/usr/bin/fuse-overlayfs", + b"-o", + b"lowerdir=" + + escape(lower) + + b",upperdir=" + + escape(upper) + + b",workdir=" + + escape(work), + escape(mount), + ) + + try: + subprocess.run(args=cmd, check=True) + except subprocess.CalledProcessError as e: + logging.error("Error executing command: %s", e) + sys.exit(1) + + def mount_proc(container_system_config): """Mount the /proc filesystem. @param container_system_config: Whether to mount container-specific files in /proc @@ -812,6 +849,32 @@ def drop_capabilities(keep=[]): ) +def cap_permitted_to_ambient(): + """ + Python version of util-linux/lib/caputils.c: cap_permitted_to_ambient() + """ + + header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) + data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + + libc.capget(header, data) + + data[0].inheritable = data[0].permitted + data[1].inheritable = data[1].permitted + + libc.capset(header, data) + + effective = (data[1].effective << 32) | data[0].effective + for cap in range(64): + if cap > int(util.try_read_file("/proc/sys/kernel/cap_last_cap")): + continue + + if effective & (1 << cap): + libc.prctl( + 47, 2, cap, 0, 0 + ) # 47 = PR_CAP_AMBIENT, 2 = PR_CAP_AMBIENT_RAISE + + _FORBIDDEN_SYSCALLS = [ # Kernel keyrings are not namespaced before Linux 5.2. b"add_key", diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index ff2a31641..f3d3cb9de 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -684,6 +684,13 @@ def child(): container.get_my_pid_from_procfs(), ) + # Ensure that capabilities granted in the user namespace are + # preserved in the child process. + # + # TODO: We still don't know which capabilities are necessary, + # and the rest could be dropped. + container.cap_permitted_to_ambient() + # Put all received signals on hold until we handle them later. container.block_all_signals() diff --git a/benchexec/containerized_tool.py b/benchexec/containerized_tool.py index bcd0259b7..78f845e08 100644 --- a/benchexec/containerized_tool.py +++ b/benchexec/containerized_tool.py @@ -198,6 +198,7 @@ def _init_container( # Container config container.setup_user_mapping(os.getpid(), uid, gid) + container.cap_permitted_to_ambient() _setup_container_filesystem(temp_dir, dir_modes, container_system_config) if container_system_config: socket.sethostname(container.CONTAINER_HOSTNAME) diff --git a/benchexec/libc.py b/benchexec/libc.py index 2a808ec7e..1bf8b36d9 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -184,7 +184,16 @@ class CapData(_ctypes.Structure): _ctypes.POINTER(CapData * 2), ] +capget = _libc.capget +"""Get the capabilities of the current thread.""" +capget.errcheck = _check_errno +capget.argtypes = [ + _ctypes.POINTER(CapHeader), + _ctypes.POINTER(CapData * 2), +] + LINUX_CAPABILITY_VERSION_3 = 0x20080522 # /usr/include/linux/capability.h +LINUX_CAPABILITY_U32S_3 = 2 # /usr/include/linux/capability.h CAP_SYS_ADMIN = 21 # /usr/include/linux/capability.h prctl = _libc.prctl @@ -192,6 +201,7 @@ class CapData(_ctypes.Structure): prctl.errcheck = _check_errno prctl.argtypes = [c_int, c_ulong, c_ulong, c_ulong, c_ulong] + # /usr/include/linux/prctl.h PR_SET_DUMPABLE = 4 PR_GET_SECCOMP = 21 From 0facbcf326247657e114fe231f3364975adcfa45 Mon Sep 17 00:00:00 2001 From: younghojan Date: Tue, 2 Jul 2024 22:22:56 +0800 Subject: [PATCH 02/32] feat: Support FUSE-based overlay mount for benchexec By setting up the container's filesystem in the child process, bind mount-related errors in benchexec caused by fuse-overlayfs can be avoided. This change will not affect the normal operation of kernel overlayfs. --- benchexec/container.py | 6 +++--- benchexec/containerized_tool.py | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index edceec9d6..24075d4fd 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -512,12 +512,12 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): if os.path.ismount(mount_path): try: # Previous mount in this place not needed if replaced with overlay dir. - # libc.umount(mount_path) - libc.umount2(mount_path, libc.MNT_DETACH) # lazy umount + libc.umount(mount_path) except OSError as e: logging.debug(e) try: - # make_overlay_mount(mount_path, mountpoint, temp_path, work_path) + # In this prototype, we don't attempt to use kernel overlayfs at all. + # make_overlay_mount(mount_path, mountpoint, temp_path, work_path) # noqa: E800 make_fuse_overlay_mount(mount_path, mountpoint, temp_path, work_path) except OSError as e: mp = mountpoint.decode() diff --git a/benchexec/containerized_tool.py b/benchexec/containerized_tool.py index 78f845e08..5ea626a13 100644 --- a/benchexec/containerized_tool.py +++ b/benchexec/containerized_tool.py @@ -198,8 +198,6 @@ def _init_container( # Container config container.setup_user_mapping(os.getpid(), uid, gid) - container.cap_permitted_to_ambient() - _setup_container_filesystem(temp_dir, dir_modes, container_system_config) if container_system_config: socket.sethostname(container.CONTAINER_HOSTNAME) if not network_access: @@ -218,6 +216,11 @@ def _init_container( os.waitpid(pid, 0) os._exit(0) + # We setup the container's filesystem in the child process. + # Delaying this until after the fork can avoid "Transport endpoint not connected" issue. + container.cap_permitted_to_ambient() + _setup_container_filesystem(temp_dir, dir_modes, container_system_config) + # Finalize container setup in child container.mount_proc(container_system_config) # only possible in child container.drop_capabilities() From 85f02ca45832bdaade69101b06a28707ffde79ef Mon Sep 17 00:00:00 2001 From: younghojan Date: Sun, 7 Jul 2024 20:28:41 +0800 Subject: [PATCH 03/32] Only if the kernel overlay fails, try using fuse-overlayfs --- benchexec/container.py | 36 ++++++++++++++++++++++----------- benchexec/containerized_tool.py | 1 - 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 24075d4fd..74bbc2274 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -516,17 +516,29 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): except OSError as e: logging.debug(e) try: - # In this prototype, we don't attempt to use kernel overlayfs at all. - # make_overlay_mount(mount_path, mountpoint, temp_path, work_path) # noqa: E800 - make_fuse_overlay_mount(mount_path, mountpoint, temp_path, work_path) + make_overlay_mount(mount_path, mountpoint, temp_path, work_path) except OSError as e: - mp = mountpoint.decode() - raise OSError( - e.errno, - f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " - f"Please use other directory modes, " - f"for example '--read-only-dir {util.escape_string_shell(mp)}'.", - ) + # Resort to fuse-overlayfs if kernel overlayfs is not available. + fuse = util.find_executable2("fuse-overlayfs") + if fuse: + logging.debug( + "Cannot use kernel overlay for %s: %s. " + "Trying to use fuse-overlayfs instead.", + mountpoint.decode(), + e, + ) + cap_permitted_to_ambient() + make_fuse_overlay_mount( + fuse, mount_path, mountpoint, temp_path, work_path + ) + else: + mp = mountpoint.decode() + raise OSError( + e.errno, + f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " + f"Please use other directory modes, " + f"for example '--read-only-dir {util.escape_string_shell(mp)}'.", + ) elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) @@ -728,7 +740,7 @@ def escape(s): ) -def make_fuse_overlay_mount(mount, lower, upper, work): +def make_fuse_overlay_mount(exe, mount, lower, upper, work): logging.debug( "Creating overlay mount: target=%s, lower=%s, upper=%s, work=%s", mount, @@ -741,7 +753,7 @@ def escape(s): return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,") cmd = ( - b"/usr/bin/fuse-overlayfs", + exe, b"-o", b"lowerdir=" + escape(lower) diff --git a/benchexec/containerized_tool.py b/benchexec/containerized_tool.py index 5ea626a13..278dd7e0b 100644 --- a/benchexec/containerized_tool.py +++ b/benchexec/containerized_tool.py @@ -218,7 +218,6 @@ def _init_container( # We setup the container's filesystem in the child process. # Delaying this until after the fork can avoid "Transport endpoint not connected" issue. - container.cap_permitted_to_ambient() _setup_container_filesystem(temp_dir, dir_modes, container_system_config) # Finalize container setup in child From 69e929edc72f611a95ba12aeb22913c46a3c8e49 Mon Sep 17 00:00:00 2001 From: younghojan Date: Wed, 10 Jul 2024 17:13:07 +0800 Subject: [PATCH 04/32] refactor: added user-visible messages and some refactoring --- benchexec/container.py | 23 +++++++++++++---------- benchexec/libc.py | 2 ++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 74bbc2274..1d6a711ad 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -519,12 +519,13 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): make_overlay_mount(mount_path, mountpoint, temp_path, work_path) except OSError as e: # Resort to fuse-overlayfs if kernel overlayfs is not available. + mp = mountpoint.decode() fuse = util.find_executable2("fuse-overlayfs") if fuse: logging.debug( "Cannot use kernel overlay for %s: %s. " "Trying to use fuse-overlayfs instead.", - mountpoint.decode(), + mp, e, ) cap_permitted_to_ambient() @@ -532,7 +533,6 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): fuse, mount_path, mountpoint, temp_path, work_path ) else: - mp = mountpoint.decode() raise OSError( e.errno, f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " @@ -742,7 +742,7 @@ def escape(s): def make_fuse_overlay_mount(exe, mount, lower, upper, work): logging.debug( - "Creating overlay mount: target=%s, lower=%s, upper=%s, work=%s", + "Creating overlay mount with fuse-overlayfs: target=%s, lower=%s, upper=%s, work=%s", mount, lower, upper, @@ -765,9 +765,13 @@ def escape(s): ) try: - subprocess.run(args=cmd, check=True) + result = subprocess.run( + args=cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + if result.stdout: + logging.debug("fuse-overlayfs: %s", result.stdout.decode()) except subprocess.CalledProcessError as e: - logging.error("Error executing command: %s", e) + logging.error("Error executing command: %s\n, %s", e, e.stderr.decode()) sys.exit(1) @@ -877,14 +881,13 @@ def cap_permitted_to_ambient(): libc.capset(header, data) effective = (data[1].effective << 32) | data[0].effective - for cap in range(64): - if cap > int(util.try_read_file("/proc/sys/kernel/cap_last_cap")): + cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap")) + for cap in range(cap_last_cap + 1): + if cap > cap_last_cap: continue if effective & (1 << cap): - libc.prctl( - 47, 2, cap, 0, 0 - ) # 47 = PR_CAP_AMBIENT, 2 = PR_CAP_AMBIENT_RAISE + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0) _FORBIDDEN_SYSCALLS = [ diff --git a/benchexec/libc.py b/benchexec/libc.py index 1bf8b36d9..c4c88203f 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -195,6 +195,8 @@ class CapData(_ctypes.Structure): LINUX_CAPABILITY_VERSION_3 = 0x20080522 # /usr/include/linux/capability.h LINUX_CAPABILITY_U32S_3 = 2 # /usr/include/linux/capability.h CAP_SYS_ADMIN = 21 # /usr/include/linux/capability.h +PR_CAP_AMBIENT = 47 # /usr/include/linux/prctl.h +PR_CAP_AMBIENT_RAISE = 2 # /usr/include/linux/prctl.h prctl = _libc.prctl """Modify options of processes: http://man7.org/linux/man-pages/man2/prctl.2.html""" From 98187166e6a857a573519bebf530f142207287d0 Mon Sep 17 00:00:00 2001 From: younghojan Date: Sun, 4 Aug 2024 16:59:43 +0800 Subject: [PATCH 05/32] feat: Clear ambient capabilities in drop_capabilities() and add constants in libc.py --- benchexec/container.py | 1 + benchexec/libc.py | 1 + 2 files changed, 2 insertions(+) diff --git a/benchexec/container.py b/benchexec/container.py index 1d6a711ad..7f3ea293e 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -863,6 +863,7 @@ def drop_capabilities(keep=[]): ctypes.byref(libc.CapHeader(version=libc.LINUX_CAPABILITY_VERSION_3, pid=0)), ctypes.byref(capdata), ) + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) def cap_permitted_to_ambient(): diff --git a/benchexec/libc.py b/benchexec/libc.py index c4c88203f..f000c2d6f 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -197,6 +197,7 @@ class CapData(_ctypes.Structure): CAP_SYS_ADMIN = 21 # /usr/include/linux/capability.h PR_CAP_AMBIENT = 47 # /usr/include/linux/prctl.h PR_CAP_AMBIENT_RAISE = 2 # /usr/include/linux/prctl.h +PR_CAP_AMBIENT_CLEAR_ALL = 4 # /usr/include/linux/prctl.h prctl = _libc.prctl """Modify options of processes: http://man7.org/linux/man-pages/man2/prctl.2.html""" From a699b2f0d0c3b24457d234d6741cd4c84923a5c0 Mon Sep 17 00:00:00 2001 From: younghojan Date: Sun, 11 Aug 2024 23:36:11 +0800 Subject: [PATCH 06/32] chore: Fix bug in cap_permitted_to_ambient function The cap_permitted_to_ambient function was not handling the case where the "/proc/sys/kernel/cap_last_cap" file could not be read. This commit fixes the bug by adding a fallback value of 0 when reading the file fails. --- benchexec/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/container.py b/benchexec/container.py index bdf4dc9ba..9b43c54ce 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -906,7 +906,7 @@ def cap_permitted_to_ambient(): libc.capset(header, data) effective = (data[1].effective << 32) | data[0].effective - cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap")) + cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") for cap in range(cap_last_cap + 1): if cap > cap_last_cap: continue From 8006c20ab7fac60b4161b32be10ac67cec8216cc Mon Sep 17 00:00:00 2001 From: younghojan Date: Sun, 11 Aug 2024 23:50:40 +0800 Subject: [PATCH 07/32] fix: Use single fusermount for all fuse-based overlays, and avoid mixed usage of kernel and FUSE overlayfs. --- benchexec/container.py | 89 ++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 9b43c54ce..5490f577d 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -475,6 +475,41 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): overlay_count = 0 + # Check if we need to use fuse-overlayfs for all overlay mounts. + use_fuse = False + for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): + if not util.path_is_below(full_mountpoint, mount_base): + continue + mountpoint = full_mountpoint[len(mount_base) :] or b"/" + mode = determine_directory_mode(dir_modes, mountpoint, fstype) + if not mode or not os.path.exists(mountpoint): + continue + + if mode == DIR_OVERLAY: + for _unused_source, sub_mountpoint, _unused_fstype, _unused_opts in list( + get_mount_points() + ): + if ( + util.path_is_below(sub_mountpoint, mountpoint) + and sub_mountpoint != mountpoint + ): + use_fuse = True + break + + # Mount "/" once with fuse-overlayfs once, use it for all overlay mounts. + if use_fuse: + fuse = util.find_executable2("fuse-overlayfs") + if not fuse: + logging.warning("fuse-overlayfs is not available.") + use_fuse = False + else: + temp_fuse = temp_base + b"/fuse" + work_fuse = work_base + b"/0" + os.makedirs(temp_fuse, exist_ok=True) + os.makedirs(work_fuse, exist_ok=True) + cap_permitted_to_ambient() + make_fuse_overlay_mount(fuse, temp_fuse, b"/", temp_base, work_fuse) + for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): if not util.path_is_below(full_mountpoint, mount_base): continue @@ -539,30 +574,36 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): libc.umount(mount_path) except OSError as e: logging.debug(e) - try: - make_overlay_mount(mount_path, mountpoint, temp_path, work_path) - except OSError as e: - # Resort to fuse-overlayfs if kernel overlayfs is not available. - mp = mountpoint.decode() - fuse = util.find_executable2("fuse-overlayfs") - if fuse: - logging.debug( - "Cannot use kernel overlay for %s: %s. " - "Trying to use fuse-overlayfs instead.", - mp, - e, - ) - cap_permitted_to_ambient() - make_fuse_overlay_mount( - fuse, mount_path, mountpoint, temp_path, work_path - ) - else: - raise OSError( - e.errno, - f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " - f"Please use other directory modes, " - f"for example '--read-only-dir {shlex.quote(mp)}'.", - ) + if use_fuse: + fuse_mount_path = temp_fuse + mountpoint + make_bind_mount(fuse_mount_path, mount_path) + else: + try: + make_overlay_mount(mount_path, mountpoint, temp_path, work_path) + except OSError as e: + # Resort to fuse-overlayfs if kernel overlayfs is not available. + # This part of the code (using fuse-overlayfs as a fallback) is intentionally + # kept as a workaround for triple-nested execution with kernel overlayfs. + mp = mountpoint.decode() + fuse = util.find_executable2("fuse-overlayfs") + if fuse: + logging.debug( + "Cannot use kernel overlay for %s: %s. " + "Trying to use fuse-overlayfs instead.", + mp, + e, + ) + cap_permitted_to_ambient() + make_fuse_overlay_mount( + fuse, mount_path, mountpoint, temp_path, work_path + ) + else: + raise OSError( + e.errno, + f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " + f"Please use other directory modes, " + f"for example '--read-only-dir {shlex.quote(mp)}'.", + ) elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) From 195e4d06c3db6b042ec80239acada55927a27d7f Mon Sep 17 00:00:00 2001 From: younghojan Date: Tue, 13 Aug 2024 11:12:03 +0800 Subject: [PATCH 08/32] chore: Add functions and extracted some code into functions, add comments, and made some logical refactoring. --- benchexec/container.py | 157 ++++++++++++++++++--------------- benchexec/containerexecutor.py | 7 -- benchexec/libc.py | 1 + 3 files changed, 89 insertions(+), 76 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 5490f577d..6d9c03ce2 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -16,6 +16,7 @@ import os import resource # noqa: F401 @UnusedImport necessary to eagerly import this module import shlex +import shutil import signal import socket import struct @@ -476,39 +477,14 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): overlay_count = 0 # Check if we need to use fuse-overlayfs for all overlay mounts. - use_fuse = False - for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): - if not util.path_is_below(full_mountpoint, mount_base): - continue - mountpoint = full_mountpoint[len(mount_base) :] or b"/" - mode = determine_directory_mode(dir_modes, mountpoint, fstype) - if not mode or not os.path.exists(mountpoint): - continue - - if mode == DIR_OVERLAY: - for _unused_source, sub_mountpoint, _unused_fstype, _unused_opts in list( - get_mount_points() - ): - if ( - util.path_is_below(sub_mountpoint, mountpoint) - and sub_mountpoint != mountpoint - ): - use_fuse = True - break - + use_fuse = check_use_fuse_overlayfs(mount_base, dir_modes) # Mount "/" once with fuse-overlayfs once, use it for all overlay mounts. if use_fuse: - fuse = util.find_executable2("fuse-overlayfs") - if not fuse: - logging.warning("fuse-overlayfs is not available.") - use_fuse = False + fuse = shutil.which("fuse-overlayfs") + if fuse: + fuse_overlay_mount_root(fuse, temp_base, work_base) else: - temp_fuse = temp_base + b"/fuse" - work_fuse = work_base + b"/0" - os.makedirs(temp_fuse, exist_ok=True) - os.makedirs(work_fuse, exist_ok=True) - cap_permitted_to_ambient() - make_fuse_overlay_mount(fuse, temp_fuse, b"/", temp_base, work_fuse) + use_fuse = False for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): if not util.path_is_below(full_mountpoint, mount_base): @@ -575,7 +551,7 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): except OSError as e: logging.debug(e) if use_fuse: - fuse_mount_path = temp_fuse + mountpoint + fuse_mount_path = temp_base + b"/fuse" + mountpoint make_bind_mount(fuse_mount_path, mount_path) else: try: @@ -584,26 +560,23 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): # Resort to fuse-overlayfs if kernel overlayfs is not available. # This part of the code (using fuse-overlayfs as a fallback) is intentionally # kept as a workaround for triple-nested execution with kernel overlayfs. - mp = mountpoint.decode() - fuse = util.find_executable2("fuse-overlayfs") - if fuse: - logging.debug( - "Cannot use kernel overlay for %s: %s. " - "Trying to use fuse-overlayfs instead.", - mp, - e, - ) - cap_permitted_to_ambient() - make_fuse_overlay_mount( - fuse, mount_path, mountpoint, temp_path, work_path - ) - else: - raise OSError( - e.errno, - f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " - f"Please use other directory modes, " - f"for example '--read-only-dir {shlex.quote(mp)}'.", - ) + if not use_fuse: + fuse = shutil.which("fuse-overlayfs") + if fuse: + fuse_overlay_mount_root(fuse, temp_base, work_base) + use_fuse = True + else: + mp = mountpoint.decode() + logging.warning("fuse-overlayfs is not available.") + raise OSError( + e.errno, + f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " + f"Please use other directory modes, " + f"for example '--read-only-dir {shlex.quote(mp)}'.", + ) + + fuse_mount_path = temp_base + b"/fuse" + mountpoint + make_bind_mount(fuse_mount_path, mount_path) elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) @@ -773,6 +746,16 @@ def remount_with_additional_flags(mountpoint, fstype, existing_options, mountfla libc.mount(None, mountpoint, None, mountflags, None) +def escape(s): + """ + Safely encode a string for being used as a path for overlayfs. + In addition to escaping ",", which separates mount options, + we need to escape ":", which overlayfs uses to separate multiple lower dirs + (cf. https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt). + """ + return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,") + + def make_overlay_mount(mount, lower, upper, work): logging.debug( "Creating overlay mount: target=%s, lower=%s, upper=%s, work=%s", @@ -782,15 +765,6 @@ def make_overlay_mount(mount, lower, upper, work): work, ) - def escape(s): - """ - Safely encode a string for being used as a path for overlayfs. - In addition to escaping ",", which separates mount options, - we need to escape ":", which overlayfs uses to separate multiple lower dirs - (cf. https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt). - """ - return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,") - libc.mount( b"none", mount, @@ -805,6 +779,43 @@ def escape(s): ) +def check_use_fuse_overlayfs(mount_base, dir_modes): + mount_points = [ + (full_mountpoint, fstype) + for _unused_source, full_mountpoint, fstype, _options in get_mount_points() + if util.path_is_below(full_mountpoint, mount_base) + ] + + for full_mountpoint, fstype in mount_points: + mountpoint = full_mountpoint[len(mount_base) :] or b"/" + mode = determine_directory_mode(dir_modes, mountpoint, fstype) + + if not mode or not os.path.exists(mountpoint): + continue + + if mode == DIR_OVERLAY: + # Check if there are any sub-mounts within the current overlay mount point + for sub_mountpoint, _unused_fstype in mount_points: + if ( + util.path_is_below(sub_mountpoint, mountpoint) + and sub_mountpoint != mountpoint + ): + return True + + return False + + +def fuse_overlay_mount_root(fuse, temp_base, work_base): + """ + Mount "/" once with fuse-overlayfs once, use it for all overlay mounts. + """ + temp_fuse = temp_base + b"/fuse" + work_fuse = work_base + b"/0" + os.makedirs(temp_fuse, exist_ok=True) + os.makedirs(work_fuse, exist_ok=True) + make_fuse_overlay_mount(fuse, temp_fuse, b"/", temp_base, work_fuse) + + def make_fuse_overlay_mount(exe, mount, lower, upper, work): logging.debug( "Creating overlay mount with fuse-overlayfs: target=%s, lower=%s, upper=%s, work=%s", @@ -814,9 +825,6 @@ def make_fuse_overlay_mount(exe, mount, lower, upper, work): work, ) - def escape(s): - return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,") - cmd = ( exe, b"-o", @@ -830,14 +838,16 @@ def escape(s): ) try: + cap_permitted_to_ambient() result = subprocess.run( args=cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) if result.stdout: logging.debug("fuse-overlayfs: %s", result.stdout.decode()) except subprocess.CalledProcessError as e: - logging.error("Error executing command: %s\n, %s", e, e.stderr.decode()) - sys.exit(1) + sys.exit(f"Error executing command: {e}\n{e.stdout.decode()}") + finally: + drop_ambient_cap() def mount_proc(container_system_config): @@ -934,8 +944,10 @@ def drop_capabilities(keep=[]): def cap_permitted_to_ambient(): """ Python version of util-linux/lib/caputils.c: cap_permitted_to_ambient() - """ + Transfer all permitted capabilities to the inheritable set + and raise them in the ambient set if effective. + """ header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() @@ -949,13 +961,20 @@ def cap_permitted_to_ambient(): effective = (data[1].effective << 32) | data[0].effective cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") for cap in range(cap_last_cap + 1): - if cap > cap_last_cap: - continue - if effective & (1 << cap): libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0) +def drop_ambient_cap(): + """ + Drop all ambient capabilities by removing them from the ambient set. + Corresponds to the opposite of cap_permitted_to_ambient. + """ + cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") + for cap in range(cap_last_cap + 1): + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_LOWER, cap, 0, 0) + + _FORBIDDEN_SYSCALLS = [ # Kernel keyrings are not namespaced before Linux 5.2. b"add_key", diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index c4d77ba4f..1954729a7 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -684,13 +684,6 @@ def child(): container.get_my_pid_from_procfs(), ) - # Ensure that capabilities granted in the user namespace are - # preserved in the child process. - # - # TODO: We still don't know which capabilities are necessary, - # and the rest could be dropped. - container.cap_permitted_to_ambient() - # Put all received signals on hold until we handle them later. container.block_all_signals() diff --git a/benchexec/libc.py b/benchexec/libc.py index f000c2d6f..32ea762e7 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -197,6 +197,7 @@ class CapData(_ctypes.Structure): CAP_SYS_ADMIN = 21 # /usr/include/linux/capability.h PR_CAP_AMBIENT = 47 # /usr/include/linux/prctl.h PR_CAP_AMBIENT_RAISE = 2 # /usr/include/linux/prctl.h +PR_CAP_AMBIENT_LOWER = 3 # /usr/include/linux/prctl.h PR_CAP_AMBIENT_CLEAR_ALL = 4 # /usr/include/linux/prctl.h prctl = _libc.prctl From 00f9cb8fd2b3215e5f7f92c31a20a625786a391e Mon Sep 17 00:00:00 2001 From: younghojan Date: Tue, 13 Aug 2024 17:03:49 +0800 Subject: [PATCH 09/32] chore: Refactor some functions related to fuse-based overlay mounts and add comments. --- benchexec/container.py | 123 +++++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 6d9c03ce2..cce105db5 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -478,13 +478,12 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): # Check if we need to use fuse-overlayfs for all overlay mounts. use_fuse = check_use_fuse_overlayfs(mount_base, dir_modes) - # Mount "/" once with fuse-overlayfs once, use it for all overlay mounts. + + # Create overlay mounts for all mount points. if use_fuse: - fuse = shutil.which("fuse-overlayfs") - if fuse: - fuse_overlay_mount_root(fuse, temp_base, work_base) - else: - use_fuse = False + fuse_overlay_mount_path = setup_fuse_overlay(temp_base, work_base) + else: + use_fuse = False for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): if not util.path_is_below(full_mountpoint, mount_base): @@ -551,7 +550,7 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): except OSError as e: logging.debug(e) if use_fuse: - fuse_mount_path = temp_base + b"/fuse" + mountpoint + fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) else: try: @@ -560,22 +559,21 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): # Resort to fuse-overlayfs if kernel overlayfs is not available. # This part of the code (using fuse-overlayfs as a fallback) is intentionally # kept as a workaround for triple-nested execution with kernel overlayfs. - if not use_fuse: - fuse = shutil.which("fuse-overlayfs") - if fuse: - fuse_overlay_mount_root(fuse, temp_base, work_base) - use_fuse = True - else: - mp = mountpoint.decode() - logging.warning("fuse-overlayfs is not available.") - raise OSError( - e.errno, - f"Creating overlay mount for '{mp}' failed: {os.strerror(e.errno)}. " - f"Please use other directory modes, " - f"for example '--read-only-dir {shlex.quote(mp)}'.", - ) - - fuse_mount_path = temp_base + b"/fuse" + mountpoint + if fuse_overlay_mount_path is None: + fuse_overlay_mount_path = setup_fuse_overlay( + temp_base, work_base + ) + if fuse_overlay_mount_path is None: + mp = mountpoint.decode() + logging.warning("fuse-overlayfs is not available.") + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Consider using alternative directory modes, such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e + + logging.debug("Fallback to fuse-overlayfs for overlay mount.") + fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) elif mode == DIR_HIDDEN: @@ -746,7 +744,7 @@ def remount_with_additional_flags(mountpoint, fstype, existing_options, mountfla libc.mount(None, mountpoint, None, mountflags, None) -def escape(s): +def escape_overlayfs_parameters(s): """ Safely encode a string for being used as a path for overlayfs. In addition to escaping ",", which separates mount options, @@ -771,15 +769,19 @@ def make_overlay_mount(mount, lower, upper, work): b"overlay", 0, b"lowerdir=" - + escape(lower) + + escape_overlayfs_parameters(lower) + b",upperdir=" - + escape(upper) + + escape_overlayfs_parameters(upper) + b",workdir=" - + escape(work), + + escape_overlayfs_parameters(work), ) def check_use_fuse_overlayfs(mount_base, dir_modes): + """ + Check whether an overlay mountpoint requires the use of fuse-overlayfs + by determining if there are any sub-mounts below it. + """ mount_points = [ (full_mountpoint, fstype) for _unused_source, full_mountpoint, fstype, _options in get_mount_points() @@ -805,49 +807,53 @@ def check_use_fuse_overlayfs(mount_base, dir_modes): return False -def fuse_overlay_mount_root(fuse, temp_base, work_base): - """ - Mount "/" once with fuse-overlayfs once, use it for all overlay mounts. - """ +@contextlib.contextmanager +def permitted_cap_as_ambient(): + try: + original_inheritable = cap_permitted_to_ambient() + yield + finally: + drop_ambient_cap(original_inheritable) + + +def setup_fuse_overlay(temp_base, work_base): + fuse = shutil.which("fuse-overlayfs") + if fuse is None: + return None temp_fuse = temp_base + b"/fuse" work_fuse = work_base + b"/0" os.makedirs(temp_fuse, exist_ok=True) os.makedirs(work_fuse, exist_ok=True) - make_fuse_overlay_mount(fuse, temp_fuse, b"/", temp_base, work_fuse) - -def make_fuse_overlay_mount(exe, mount, lower, upper, work): logging.debug( "Creating overlay mount with fuse-overlayfs: target=%s, lower=%s, upper=%s, work=%s", - mount, - lower, - upper, - work, + temp_fuse, + b"/", + temp_base, + work_fuse, ) cmd = ( - exe, + fuse, b"-o", - b"lowerdir=" - + escape(lower) + b"lowerdir=/" + b",upperdir=" - + escape(upper) + + escape_overlayfs_parameters(temp_base) + b",workdir=" - + escape(work), - escape(mount), + + escape_overlayfs_parameters(work_fuse), + escape_overlayfs_parameters(temp_fuse), ) try: - cap_permitted_to_ambient() - result = subprocess.run( - args=cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - if result.stdout: - logging.debug("fuse-overlayfs: %s", result.stdout.decode()) + with permitted_cap_as_ambient(): + result = subprocess.run( + args=cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + if result.stdout: + logging.debug("fuse-overlayfs: %s", result.stdout.decode()) + return temp_fuse except subprocess.CalledProcessError as e: sys.exit(f"Error executing command: {e}\n{e.stdout.decode()}") - finally: - drop_ambient_cap() def mount_proc(container_system_config): @@ -938,7 +944,6 @@ def drop_capabilities(keep=[]): ctypes.byref(libc.CapHeader(version=libc.LINUX_CAPABILITY_VERSION_3, pid=0)), ctypes.byref(capdata), ) - libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) def cap_permitted_to_ambient(): @@ -953,6 +958,7 @@ def cap_permitted_to_ambient(): libc.capget(header, data) + original_inheritable = [data[0].inheritable, data[1].inheritable] data[0].inheritable = data[0].permitted data[1].inheritable = data[1].permitted @@ -964,8 +970,10 @@ def cap_permitted_to_ambient(): if effective & (1 << cap): libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0) + return original_inheritable -def drop_ambient_cap(): + +def drop_ambient_cap(original_inheritable): """ Drop all ambient capabilities by removing them from the ambient set. Corresponds to the opposite of cap_permitted_to_ambient. @@ -974,6 +982,13 @@ def drop_ambient_cap(): for cap in range(cap_last_cap + 1): libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_LOWER, cap, 0, 0) + header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) + data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + + libc.capget(header, data) + data[0].inheritable, data[1].inheritable = original_inheritable + libc.capset(header, data) + _FORBIDDEN_SYSCALLS = [ # Kernel keyrings are not namespaced before Linux 5.2. From 328aad4d8607ff6dfb426c1353b4f8d86ceabc7a Mon Sep 17 00:00:00 2001 From: younghojan Date: Tue, 13 Aug 2024 22:20:11 +0800 Subject: [PATCH 10/32] chore: Refactor functions related to fuse-based overlay mounts and improve comments --- benchexec/container.py | 86 ++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 49 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index cce105db5..301e3d52b 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -36,7 +36,6 @@ "get_mount_points", "remount_with_additional_flags", "make_overlay_mount", - "make_fuse_overlay_mount", "mount_proc", "make_bind_mount", "get_my_pid_from_procfs", @@ -483,7 +482,7 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): if use_fuse: fuse_overlay_mount_path = setup_fuse_overlay(temp_base, work_base) else: - use_fuse = False + fuse_overlay_mount_path = None for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): if not util.path_is_below(full_mountpoint, mount_base): @@ -549,7 +548,8 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): libc.umount(mount_path) except OSError as e: logging.debug(e) - if use_fuse: + + if use_fuse and fuse_overlay_mount_path: fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) else: @@ -810,10 +810,42 @@ def check_use_fuse_overlayfs(mount_base, dir_modes): @contextlib.contextmanager def permitted_cap_as_ambient(): try: - original_inheritable = cap_permitted_to_ambient() + """ + Transfer all permitted capabilities to the inheritable set + and raise them in the ambient set if effective. + """ + header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) + data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + + libc.capget(header, data) + + original_inheritable = [data[0].inheritable, data[1].inheritable] + data[0].inheritable = data[0].permitted + data[1].inheritable = data[1].permitted + + libc.capset(header, data) + + effective = (data[1].effective << 32) | data[0].effective + cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") + for cap in range(cap_last_cap + 1): + if effective & (1 << cap): + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0) yield finally: - drop_ambient_cap(original_inheritable) + """ + Drop all ambient capabilities by removing them from the ambient set, + and undo changes made to inheritable set. + """ + cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") + for cap in range(cap_last_cap + 1): + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_LOWER, cap, 0, 0) + + header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) + data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + + libc.capget(header, data) + data[0].inheritable, data[1].inheritable = original_inheritable + libc.capset(header, data) def setup_fuse_overlay(temp_base, work_base): @@ -946,50 +978,6 @@ def drop_capabilities(keep=[]): ) -def cap_permitted_to_ambient(): - """ - Python version of util-linux/lib/caputils.c: cap_permitted_to_ambient() - - Transfer all permitted capabilities to the inheritable set - and raise them in the ambient set if effective. - """ - header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) - data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() - - libc.capget(header, data) - - original_inheritable = [data[0].inheritable, data[1].inheritable] - data[0].inheritable = data[0].permitted - data[1].inheritable = data[1].permitted - - libc.capset(header, data) - - effective = (data[1].effective << 32) | data[0].effective - cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") - for cap in range(cap_last_cap + 1): - if effective & (1 << cap): - libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0) - - return original_inheritable - - -def drop_ambient_cap(original_inheritable): - """ - Drop all ambient capabilities by removing them from the ambient set. - Corresponds to the opposite of cap_permitted_to_ambient. - """ - cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") - for cap in range(cap_last_cap + 1): - libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_LOWER, cap, 0, 0) - - header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) - data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() - - libc.capget(header, data) - data[0].inheritable, data[1].inheritable = original_inheritable - libc.capset(header, data) - - _FORBIDDEN_SYSCALLS = [ # Kernel keyrings are not namespaced before Linux 5.2. b"add_key", From de867490fea0c7ae552f9d38cc51bec540b598fd Mon Sep 17 00:00:00 2001 From: younghojan Date: Wed, 14 Aug 2024 16:49:19 +0800 Subject: [PATCH 11/32] chore: Refactor functions related to fuse-based overlay mounts and improve comments --- benchexec/container.py | 57 +++++++++++++++++++++++------------------- benchexec/libc.py | 1 - 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 301e3d52b..69a434140 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -559,12 +559,12 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): # Resort to fuse-overlayfs if kernel overlayfs is not available. # This part of the code (using fuse-overlayfs as a fallback) is intentionally # kept as a workaround for triple-nested execution with kernel overlayfs. - if fuse_overlay_mount_path is None: + mp = mountpoint.decode() + if not fuse_overlay_mount_path: fuse_overlay_mount_path = setup_fuse_overlay( temp_base, work_base ) - if fuse_overlay_mount_path is None: - mp = mountpoint.decode() + if not fuse_overlay_mount_path: logging.warning("fuse-overlayfs is not available.") raise OSError( e.errno, @@ -572,7 +572,9 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): f"Consider using alternative directory modes, such as '--read-only-dir {shlex.quote(mp)}'.", ) from e - logging.debug("Fallback to fuse-overlayfs for overlay mount.") + logging.debug( + f"Fallback to fuse-overlayfs for overlay mount at '{mp}'." + ) fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) @@ -809,46 +811,47 @@ def check_use_fuse_overlayfs(mount_base, dir_modes): @contextlib.contextmanager def permitted_cap_as_ambient(): - try: - """ - Transfer all permitted capabilities to the inheritable set - and raise them in the ambient set if effective. - """ - header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) - data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + """ + Transfer all permitted capabilities to the inheritable set + and raise them in the ambient set if effective. + Finanlly drop all ambient capabilities by removing them from the ambient set, + and undo changes made to inheritable set. - libc.capget(header, data) + Used by fuse-based overlay mounts needing temporary capability elevation. + """ + header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) + data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + + libc.capget(header, data) + original_inheritable = [data[0].inheritable, data[1].inheritable] + cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") - original_inheritable = [data[0].inheritable, data[1].inheritable] + try: data[0].inheritable = data[0].permitted data[1].inheritable = data[1].permitted - libc.capset(header, data) effective = (data[1].effective << 32) | data[0].effective - cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") for cap in range(cap_last_cap + 1): if effective & (1 << cap): libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_RAISE, cap, 0, 0) + yield finally: - """ - Drop all ambient capabilities by removing them from the ambient set, - and undo changes made to inheritable set. - """ - cap_last_cap = int(util.try_read_file("/proc/sys/kernel/cap_last_cap") or "0") - for cap in range(cap_last_cap + 1): - libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_LOWER, cap, 0, 0) - - header = libc.CapHeader(libc.LINUX_CAPABILITY_VERSION_3, 0) - data = (libc.CapData * libc.LINUX_CAPABILITY_U32S_3)() + libc.prctl(libc.PR_CAP_AMBIENT, libc.PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) - libc.capget(header, data) data[0].inheritable, data[1].inheritable = original_inheritable libc.capset(header, data) def setup_fuse_overlay(temp_base, work_base): + """ + Check if fuse-overlayfs is available on the system and, + if so, creates a temporary overlay filesystem by stacking the root directory + with a specified temporary directory. + + @return: The path to the mounted overlay filesystem if successful, None otherwise. + """ fuse = shutil.which("fuse-overlayfs") if fuse is None: return None @@ -878,6 +881,8 @@ def setup_fuse_overlay(temp_base, work_base): try: with permitted_cap_as_ambient(): + # Temporarily elevate permitted capabilities to the inheritable set + # and raise them in the ambient set. result = subprocess.run( args=cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) diff --git a/benchexec/libc.py b/benchexec/libc.py index 32ea762e7..f000c2d6f 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -197,7 +197,6 @@ class CapData(_ctypes.Structure): CAP_SYS_ADMIN = 21 # /usr/include/linux/capability.h PR_CAP_AMBIENT = 47 # /usr/include/linux/prctl.h PR_CAP_AMBIENT_RAISE = 2 # /usr/include/linux/prctl.h -PR_CAP_AMBIENT_LOWER = 3 # /usr/include/linux/prctl.h PR_CAP_AMBIENT_CLEAR_ALL = 4 # /usr/include/linux/prctl.h prctl = _libc.prctl From a308c461236d645f21f8c00e8120610061c168c3 Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 15 Aug 2024 11:02:35 +0800 Subject: [PATCH 12/32] chore: Replace f-string in logging.debug with %s formatting --- benchexec/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/container.py b/benchexec/container.py index 69a434140..846e241b3 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -573,7 +573,7 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): ) from e logging.debug( - f"Fallback to fuse-overlayfs for overlay mount at '{mp}'." + "Fallback to fuse-overlayfs for overlay mount at '%s'.", mp ) fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) From b941329dd8c4585d3c87b37c3312823f3716dac7 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 16 Aug 2024 15:02:43 +0200 Subject: [PATCH 13/32] Add fuse-overlayfs to our recommended dependencies --- debian/control | 2 +- doc/INSTALL.md | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/debian/control b/debian/control index 73ddc6d2e..287cc2ea8 100644 --- a/debian/control +++ b/debian/control @@ -20,7 +20,7 @@ Package: benchexec Architecture: all Pre-Depends: ${misc:Pre-Depends} Depends: ${python3:Depends}, python3-pkg-resources, ${misc:Depends}, ucf -Recommends: cpu-energy-meter, libseccomp2, lxcfs, python3-coloredlogs, python3-pystemd +Recommends: cpu-energy-meter, fuse-overlayfs (>= 1.10), libseccomp2, lxcfs, python3-coloredlogs, python3-pystemd Description: Framework for Reliable Benchmarking and Resource Measurement BenchExec allows benchmarking non-interactive tools on Linux systems. It measures CPU time, wall time, and memory usage of a tool, diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 522f1592a..a68f88d07 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -20,6 +20,7 @@ SPDX-License-Identifier: Apache-2.0 The following packages are optional but recommended dependencies: - [cpu-energy-meter] will let BenchExec measure energy consumption on Intel CPUs. +- [fuse-overlayfs] (version 1.10 or newer) allows to use the overlay directory mode for containers in cases where the kernel-based overlayfs does not work. - [libseccomp2] provides better container isolation. - [LXCFS] provides better container isolation. - [coloredlogs] provides nicer log output. @@ -115,7 +116,7 @@ Of course you can also install BenchExec in a virtualenv if you are familiar wit On systems without systemd you can omit the `[systemd]` part. Please make sure to configure cgroups as [described below](#setting-up-cgroups) -and install [cpu-energy-meter], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. +and install [cpu-energy-meter], [fuse-overlayfs], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. ### Containerized Environments @@ -137,7 +138,7 @@ otherwise pip will try to download and build this module, which needs a compiler and several development header packages. Please make sure to configure cgroups as [described below](#setting-up-cgroups) -and install [cpu-energy-meter], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. +and install [cpu-energy-meter], [fuse-overlayfs], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. ## Kernel Requirements @@ -377,6 +378,7 @@ Please refer to the [development instructions](DEVELOPMENT.md). [coloredlogs]: https://pypi.org/project/coloredlogs/ [cpu-energy-meter]: https://github.com/sosy-lab/cpu-energy-meter +[fuse-overlayfs]: https://github.com/containers/fuse-overlayfs [libseccomp2]: https://github.com/seccomp/libseccomp [LXCFS]: https://github.com/lxc/lxcfs [pqos]: https://github.com/intel/intel-cmt-cat/tree/master/pqos From 2529120d81cc392aebc8b28917fdbec5f4d99581 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 16 Aug 2024 15:03:36 +0200 Subject: [PATCH 14/32] Update documentation on kernel overlayfs vs. fuse-overlayfs --- doc/INSTALL.md | 11 ++++++++--- doc/benchexec-in-container.md | 3 ++- doc/container.md | 15 +++++++++++---- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/INSTALL.md b/doc/INSTALL.md index a68f88d07..a53a4ac7e 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -154,7 +154,7 @@ For other distributions, please read the following detailed requirements. Except on Ubuntu, the full feature set of BenchExec is only usable on **Linux 5.11 or newer**, so we suggest at least this kernel version. -On older kernels, you need to avoid using the overlay filesystem (cf. below), +On older kernels, you need to avoid using the kernel-based overlay filesystem (cf. below), all other features are supported. However, we strongly recommend to use at least **Linux 4.14 or newer** because it reduces the overhead of BenchExec's memory measurements and limits. @@ -187,8 +187,13 @@ that are not usable on all distributions by default: - **Unprivileged Overlay Filesystem**: This is only available since Linux 5.11 (kernel option `CONFIG_OVERLAY_FS`), but also present in all Ubuntu kernels, even older ones. - Users of older kernels on other distributions can still use container mode, but have to choose a different mode - of mounting the file systems in the container, e.g., with `--read-only-dir /` (see below). + Users of older kernels on other distributions can still use container mode, + but have to install [fuse-overlayfs] or choose a different mode + of mounting the file systems in the container, e.g., with `--read-only-dir /` + (cf. [container configuration](container.md#directory-access-modes)). + Note that the kernel-based overlayfs does not support some specific configurations + (such as the default mode of overlay for `/`), + so [fuse-overlayfs] is often useful or required anyway. If container mode does not work, please check the [common problems](container.md#common-problems). diff --git a/doc/benchexec-in-container.md b/doc/benchexec-in-container.md index 59401a48d..08a400f08 100644 --- a/doc/benchexec-in-container.md +++ b/doc/benchexec-in-container.md @@ -47,7 +47,8 @@ or ``` docker run --privileged --cap-drop=all -t my-container benchexec ``` - +If you want BenchExec to use `fuse-overlayfs` in the container, +also specify `--device /dev/fuse`. ## BenchExec in Interactive Containers diff --git a/doc/container.md b/doc/container.md index 46d89ab59..0d9c99cb4 100644 --- a/doc/container.md +++ b/doc/container.md @@ -69,7 +69,9 @@ For each directory in the container one of the following four access modes can b Writes to this directory will not be visible on the host. - **read-only**: This directory is visible in the container, but read-only. - **overlay**: This directory is visible in the container and - an [overlay filesystem](https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt) + an overlay filesystem (either from the + [kernel](https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt) + or [fuse-overlayfs]) is layered on top of it that redirects all write accesses. This means that write accesses are possible in the container, but the effect of any write is not visible on the host, only inside the container, and not written to disk. @@ -205,13 +207,13 @@ You can still use BenchExec if you completely disable the container mode with `- #### `Failed to configure container: [Errno 19] Creating overlay mount for '...' failed: No such device` Your kernel does not support the overlay filesystem, please check the [system requirements](INSTALL.md#kernel-requirements). -You can use a different access mode for directories, e.g., with `--read-only-dir /`. +You can use [fuse-overlayfs] or a different access mode for directories, e.g., with `--read-only-dir /`. If some directories need to be writable, specify other directory modes for these directories as described above. #### `Failed to configure container: [Errno 1] Creating overlay mount for '...' failed: Operation not permitted` Your kernel does not allow mounting the overlay filesystem inside a container. -For this you need either Ubuntu or kernel version 5.11 or newer. -Alternatively, if you cannot use either, +For this you need either Ubuntu, [fuse-overlayfs], or kernel version 5.11 or newer. +Alternatively, if you cannot use any of these, you can use a different access mode for directories, e.g., with `--read-only-dir /`. If some directories need to be writable, specify other directory modes for these directories as described above. @@ -226,6 +228,9 @@ Another limitation of the kernel is that one can only nest overlays twice, so if you want to run a container inside a container inside a container, at least one of these needs to use a non-overlay mode for this path. +We recommend the installation of [fuse-overlayfs] in version 1.10 or newer, +which supports all of these use cases. + #### `Cannot change into working directory inside container: [Errno 2] No such file or directory` Either you have specified an invalid directory as working directory with `--dir`, or your current directory on the host is hidden inside the container @@ -253,3 +258,5 @@ If it still occurs, please attach to all child process of BenchExec with `sudo gdb -p `, get a stack trace with `bt`, and [report an issue](https://github.com/sosy-lab/benchexec/issues/new) with as much information as possible. BenchExec will usually be able to continue if the hanging child process is killed. + +[fuse-overlayfs]: https://github.com/containers/fuse-overlayfs From dde34eacb7c021a4a849b702b995aa100de50edc Mon Sep 17 00:00:00 2001 From: younghojan Date: Sat, 17 Aug 2024 20:44:17 +0800 Subject: [PATCH 15/32] test: Add tests for checking fuse-overlayfs functionality and triple-nested containers. --- benchexec/test_runexecutor.py | 101 ++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 75cf1886a..990ddee84 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1176,6 +1176,107 @@ def test_uptime_without_lxcfs(self): uptime, 10, f"Uptime {uptime}s unexpectedly low in container" ) + def test_fuse_overlay(self): + if not shutil.which("fuse-overlayfs"): + self.skipTest("fuse-overlayfs not available") + + with tempfile.NamedTemporaryFile() as test_file: + test_file.write(b"TEST_TOKEN") + test_file.seek(0) + self.setUp( + dir_modes={ + "/": containerexecutor.DIR_OVERLAY, + }, + ) + result, output = self.execute_run( + "/bin/sh", "-c", f"{self.echo} TOKEN_CHANGED >{test_file.name}" + ) + self.check_result_keys(result, "returnvalue") + self.check_exitcode(result, 0, "exit code of inner runexec is not zero") + self.assertTrue( + os.path.exists(test_file.name), + f"File '{test_file.name}' removed, output was:\n" + "\n".join(output), + ) + test_token = test_file.read() + self.assertEqual( + test_token.strip(), + b"TEST_TOKEN", + f"File '{test_file.name}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", + ) + + def test_triple_nested_runexec(self): + if not shutil.which("fuse-overlayfs"): + self.skipTest("missing fuse-overlayfs") + + with tempfile.TemporaryDirectory(prefix="BenchExec_test_") as temp_dir: + overlay_dir = os.path.join(temp_dir, "overlay") + os.makedirs(overlay_dir) + test_file = os.path.join(overlay_dir, "TEST_FILE") + output_dir = os.path.join(temp_dir, "output") + os.makedirs(output_dir) + mid_output_file = os.path.join(output_dir, "mid_output.log") + inner_output_file = os.path.join(output_dir, "inner_output.log") + with open(test_file, "w") as f: + f.write("TEST_TOKEN") + f.seek(0) + + outer_cmd = [ + "python3", + runexec, + "--container", + "--read-only-dir", + "/", + "--overlay-dir", + overlay_dir, + "--full-access-dir", + "/tmp", + "--output", + mid_output_file, + "--", + ] + mid_cmd = [ + "python3", + runexec, + "--container", + "--read-only-dir", + "/", + "--overlay-dir", + overlay_dir, + "--output", + inner_output_file, + "--", + ] + inner_cmd = ["/bin/sh", "-c", f"{self.echo} TOKEN_CHANGED >{test_file}"] + combined_cmd = outer_cmd + mid_cmd + inner_cmd + + self.setUp( + dir_modes={ + "/": containerexecutor.DIR_READ_ONLY, + overlay_dir: containerexecutor.DIR_OVERLAY, + "/tmp": containerexecutor.DIR_FULL_ACCESS, + }, + ) + outer_result, outer_output = self.execute_run(*combined_cmd) + self.check_result_keys(outer_result, "returnvalue") + self.check_exitcode( + outer_result, 0, "exit code of inner runexec is not zero" + ) + self.assertTrue( + os.path.exists(test_file), + f"File '{test_file}' removed, output was:\n" + "\n".join(outer_output), + ) + self.assertTrue( + os.path.exists(test_file), + f"File '{test_file}' removed, output was:\n" + "\n".join(outer_output), + ) + with open(test_file, "r") as f: + test_token = f.read() + self.assertEqual( + test_token.strip(), + "TEST_TOKEN", + f"File '{test_file}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", + ) + class _StopRunThread(threading.Thread): def __init__(self, delay, runexecutor): From b1a02d6c802b896ba0467843ed7e77ed4e14e2d6 Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 22 Aug 2024 14:36:39 +0800 Subject: [PATCH 16/32] fix: Specify stdin=subprocess.DEVNULL when launching the fuse-overlayfs subprocess to avoid pytest failures, along with some code changes. --- benchexec/container.py | 17 +++++++++++------ benchexec/test_runexecutor.py | 2 ++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 846e241b3..ab096fda7 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -539,9 +539,6 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): if mode == DIR_OVERLAY: overlay_count += 1 - work_path = work_base + b"/" + str(overlay_count).encode() - os.makedirs(temp_path, exist_ok=True) - os.makedirs(work_path, exist_ok=True) if os.path.ismount(mount_path): try: # Previous mount in this place not needed if replaced with overlay dir. @@ -553,6 +550,9 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) else: + os.makedirs(temp_path, exist_ok=True) + work_path = work_base + b"/" + str(overlay_count).encode() + os.makedirs(work_path, exist_ok=True) try: make_overlay_mount(mount_path, mountpoint, temp_path, work_path) except OSError as e: @@ -856,12 +856,13 @@ def setup_fuse_overlay(temp_base, work_base): if fuse is None: return None temp_fuse = temp_base + b"/fuse" - work_fuse = work_base + b"/0" + work_fuse = work_base + b"/fuse" os.makedirs(temp_fuse, exist_ok=True) os.makedirs(work_fuse, exist_ok=True) logging.debug( - "Creating overlay mount with fuse-overlayfs: target=%s, lower=%s, upper=%s, work=%s", + "Creating overlay mount with %s: target=%s, lower=%s, upper=%s, work=%s", + fuse, temp_fuse, b"/", temp_base, @@ -884,7 +885,11 @@ def setup_fuse_overlay(temp_base, work_base): # Temporarily elevate permitted capabilities to the inheritable set # and raise them in the ambient set. result = subprocess.run( - args=cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + args=cmd, + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, ) if result.stdout: logging.debug("fuse-overlayfs: %s", result.stdout.decode()) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 990ddee84..671a122df 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1186,6 +1186,8 @@ def test_fuse_overlay(self): self.setUp( dir_modes={ "/": containerexecutor.DIR_OVERLAY, + "/home": containerexecutor.DIR_HIDDEN, + "/tmp": containerexecutor.DIR_HIDDEN, }, ) result, output = self.execute_run( From 1f6d696101b27fe93ae7dc4b9f7da9fe447e7f77 Mon Sep 17 00:00:00 2001 From: younghojan Date: Mon, 26 Aug 2024 16:48:17 +0800 Subject: [PATCH 17/32] feat: Check if fuse-overlayfs meets the minimum version requirement, verify the existence of /dev/fuse when running inside a container, and optimize error handling. --- benchexec/container.py | 98 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 17 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index ab096fda7..ffe7cea3a 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -14,6 +14,7 @@ import fcntl import logging import os +import re import resource # noqa: F401 @UnusedImport necessary to eagerly import this module import shlex import shutil @@ -479,7 +480,8 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): use_fuse = check_use_fuse_overlayfs(mount_base, dir_modes) # Create overlay mounts for all mount points. - if use_fuse: + fuse_version = get_fuse_overlayfs_version() + if use_fuse and fuse_version and fuse_version >= [1, 10]: fuse_overlay_mount_path = setup_fuse_overlay(temp_base, work_base) else: fuse_overlay_mount_path = None @@ -538,7 +540,6 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): temp_path = temp_base + mountpoint if mode == DIR_OVERLAY: - overlay_count += 1 if os.path.ismount(mount_path): try: # Previous mount in this place not needed if replaced with overlay dir. @@ -550,6 +551,7 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) else: + overlay_count += 1 os.makedirs(temp_path, exist_ok=True) work_path = work_base + b"/" + str(overlay_count).encode() os.makedirs(work_path, exist_ok=True) @@ -560,23 +562,59 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): # This part of the code (using fuse-overlayfs as a fallback) is intentionally # kept as a workaround for triple-nested execution with kernel overlayfs. mp = mountpoint.decode() - if not fuse_overlay_mount_path: - fuse_overlay_mount_path = setup_fuse_overlay( - temp_base, work_base + if fuse_overlay_mount_path: + logging.debug( + "Fallback to fuse-overlayfs for overlay mount at '%s'.", + mp, ) - if not fuse_overlay_mount_path: - logging.warning("fuse-overlayfs is not available.") + fuse_mount_path = fuse_overlay_mount_path + mountpoint + make_bind_mount(fuse_mount_path, mount_path) + elif not fuse_version: # fuse-overlayfs doesn't exist raise OSError( e.errno, - f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " - f"Consider using alternative directory modes, such as '--read-only-dir {shlex.quote(mp)}'.", + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " + f"Please either install fuse-overlayfs or use a different directory mode, " + f"such as '--read-only-dir {shlex.quote(mp)}'.", ) from e - - logging.debug( - "Fallback to fuse-overlayfs for overlay mount at '%s'.", mp - ) - fuse_mount_path = fuse_overlay_mount_path + mountpoint - make_bind_mount(fuse_mount_path, mount_path) + elif fuse_version < [1, 10]: # fuse-overlayfs is too old + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " + f"and fuse-overlayfs is too old. " + f"Please either upgrade fuse-overlayfs to version 1.10 or higher " + f"or use a different directory mode, " + f"such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e + else: + fuse_overlay_mount_path = setup_fuse_overlay( + temp_base, work_base + ) + if fuse_overlay_mount_path: + logging.debug( + "Fallback to fuse-overlayfs for overlay mount at '%s'.", + mp, + ) + fuse_mount_path = fuse_overlay_mount_path + mountpoint + make_bind_mount(fuse_mount_path, mount_path) + # benchexec running in a container without /dev/fuse + elif os.getenv("container") == "podman" or os.path.exists( + "/run/.containerenv" + ): + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Looks like you are running in a container, " + f"please either launch the container with --device /dev/fuse " + f"or use a different directory mode, " + f"such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e + else: + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " + f"Please either install fuse-overlayfs or use a different directory mode, " + f"such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) @@ -844,6 +882,32 @@ def permitted_cap_as_ambient(): libc.capset(header, data) +def get_fuse_overlayfs_version(): + fuse = shutil.which("fuse-overlayfs") + if fuse is None: + return None + try: + result = subprocess.run( + args=(fuse, "--version"), + check=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + output = result.stdout.decode() + + if match := re.search( + r"^fuse-overlayfs:.*?(\d+\.\d+(\.\d+)?)", output, re.MULTILINE + ): + logging.debug("fuse-overlayfs version: %s", match[1]) + return [int(part) for part in match[1].split(".")] + else: + logging.warning("Could not find version information of %s in output.", fuse) + return None + except subprocess.CalledProcessError: + return None + + def setup_fuse_overlay(temp_base, work_base): """ Check if fuse-overlayfs is available on the system and, @@ -894,8 +958,8 @@ def setup_fuse_overlay(temp_base, work_base): if result.stdout: logging.debug("fuse-overlayfs: %s", result.stdout.decode()) return temp_fuse - except subprocess.CalledProcessError as e: - sys.exit(f"Error executing command: {e}\n{e.stdout.decode()}") + except subprocess.CalledProcessError: + return None def mount_proc(container_system_config): From dc482b27cc7dabf603fec4b36e18d3f9cb77f5c6 Mon Sep 17 00:00:00 2001 From: younghojan Date: Wed, 28 Aug 2024 14:24:48 +0800 Subject: [PATCH 18/32] fix: fix issue of checking for fuse-overlayfs functionality outside of the path being overlaid and add a check before modifying TEST_TOKEN. --- benchexec/test_runexecutor.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index a2b4d7001..82b765fd1 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1203,31 +1203,43 @@ def test_fuse_overlay(self): if not shutil.which("fuse-overlayfs"): self.skipTest("fuse-overlayfs not available") - with tempfile.NamedTemporaryFile() as test_file: - test_file.write(b"TEST_TOKEN") - test_file.seek(0) + test_dir = "/tmp/fuse_test/" + os.makedirs(test_dir, exist_ok=True) + test_file_path = os.path.join(test_dir, "test_file") + + try: + with open(test_file_path, "wb") as test_file: + test_file.write(b"TEST_TOKEN") + self.setUp( dir_modes={ - "/": containerexecutor.DIR_OVERLAY, + "/": containerexecutor.DIR_READ_ONLY, "/home": containerexecutor.DIR_HIDDEN, "/tmp": containerexecutor.DIR_HIDDEN, + test_dir: containerexecutor.DIR_OVERLAY, }, ) result, output = self.execute_run( - "/bin/sh", "-c", f"{self.echo} TOKEN_CHANGED >{test_file.name}" + "/bin/sh", + "-c", + f"if [ $({self.cat} {test_file_path}) != TEST_TOKEN ]; then exit 1; fi; \ + {self.echo} TOKEN_CHANGED >{test_file_path}", ) self.check_result_keys(result, "returnvalue") self.check_exitcode(result, 0, "exit code of inner runexec is not zero") self.assertTrue( - os.path.exists(test_file.name), - f"File '{test_file.name}' removed, output was:\n" + "\n".join(output), + os.path.exists(test_file_path), + f"File '{test_file_path}' removed, output was:\n" + "\n".join(output), ) - test_token = test_file.read() + with open(test_file_path, "rb") as test_file: + test_token = test_file.read() self.assertEqual( test_token.strip(), b"TEST_TOKEN", - f"File '{test_file.name}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", + f"File '{test_file_path}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", ) + finally: + shutil.rmtree(test_dir) def test_triple_nested_runexec(self): if not shutil.which("fuse-overlayfs"): From e0aec8ce9054ed43338ccdeab38199b572efec1b Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 29 Aug 2024 15:08:17 +0800 Subject: [PATCH 19/32] chore: Refactor and improve test_triple_nested_runexec --- benchexec/test_runexecutor.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 82b765fd1..b3f5346ed 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1200,7 +1200,7 @@ def test_uptime_without_lxcfs(self): ) def test_fuse_overlay(self): - if not shutil.which("fuse-overlayfs"): + if not container.get_fuse_overlayfs_executable(): self.skipTest("fuse-overlayfs not available") test_dir = "/tmp/fuse_test/" @@ -1242,7 +1242,7 @@ def test_fuse_overlay(self): shutil.rmtree(test_dir) def test_triple_nested_runexec(self): - if not shutil.which("fuse-overlayfs"): + if not container.get_fuse_overlayfs_executable(): self.skipTest("missing fuse-overlayfs") with tempfile.TemporaryDirectory(prefix="BenchExec_test_") as temp_dir: @@ -1260,13 +1260,12 @@ def test_triple_nested_runexec(self): outer_cmd = [ "python3", runexec, - "--container", "--read-only-dir", "/", "--overlay-dir", overlay_dir, "--full-access-dir", - "/tmp", + output_dir, "--output", mid_output_file, "--", @@ -1274,16 +1273,21 @@ def test_triple_nested_runexec(self): mid_cmd = [ "python3", runexec, - "--container", "--read-only-dir", "/", "--overlay-dir", overlay_dir, + "--full-access-dir", + output_dir, "--output", inner_output_file, "--", ] - inner_cmd = ["/bin/sh", "-c", f"{self.echo} TOKEN_CHANGED >{test_file}"] + inner_cmd = [ + "/bin/sh", + "-c", + f"if [ $({self.cat} {test_file}) != TEST_TOKEN ]; then exit 1; fi; {self.echo} TOKEN_CHANGED >{test_file}", + ] combined_cmd = outer_cmd + mid_cmd + inner_cmd self.setUp( @@ -1298,19 +1302,17 @@ def test_triple_nested_runexec(self): self.check_exitcode( outer_result, 0, "exit code of inner runexec is not zero" ) + with open(mid_output_file, "r") as mid_output_file: + self.assertIn("returnvalue=0", mid_output_file.read()) self.assertTrue( os.path.exists(test_file), f"File '{test_file}' removed, output was:\n" + "\n".join(outer_output), ) - self.assertTrue( - os.path.exists(test_file), - f"File '{test_file}' removed, output was:\n" + "\n".join(outer_output), - ) - with open(test_file, "r") as f: - test_token = f.read() + with open(test_file, "rb") as test_file: + test_token = test_file.read() self.assertEqual( test_token.strip(), - "TEST_TOKEN", + b"TEST_TOKEN", f"File '{test_file}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", ) From e0833b3eda6965225f4cc3b3f7577bb49185aa10 Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 29 Aug 2024 19:47:43 +0800 Subject: [PATCH 20/32] chore: Refactor fuse-overlayfs setup and error handling --- benchexec/container.py | 89 +++++++++++++++++++---------------- benchexec/test_runexecutor.py | 9 ++-- 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index db82328e0..afab3e0fc 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -482,11 +482,9 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): use_fuse = check_use_fuse_overlayfs(mount_base, dir_modes) # Create overlay mounts for all mount points. - fuse_version = get_fuse_overlayfs_version() - if use_fuse and fuse_version and fuse_version >= [1, 10]: - fuse_overlay_mount_path = setup_fuse_overlay(temp_base, work_base) - else: - fuse_overlay_mount_path = None + fuse_overlay_mount_path = ( + setup_fuse_overlay(temp_base, work_base) if use_fuse else None + ) for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): if not util.path_is_below(full_mountpoint, mount_base): @@ -571,23 +569,16 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): ) fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) - elif not fuse_version: # fuse-overlayfs doesn't exist - raise OSError( - e.errno, - f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " - f"Please either install fuse-overlayfs or use a different directory mode, " - f"such as '--read-only-dir {shlex.quote(mp)}'.", - ) from e - elif fuse_version < [1, 10]: # fuse-overlayfs is too old - raise OSError( - e.errno, - f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " - f"and fuse-overlayfs is too old. " - f"Please either upgrade fuse-overlayfs to version 1.10 or higher " - f"or use a different directory mode, " - f"such as '--read-only-dir {shlex.quote(mp)}'.", - ) from e else: + if use_fuse: + # We tried to use overlayfs before, but it failed. + # No need to try again, just log the error. + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " + f"Please either install fuse-overlayfs in at least version 1.10, " + f"or use a different directory mode such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e fuse_overlay_mount_path = setup_fuse_overlay( temp_base, work_base ) @@ -598,10 +589,10 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): ) fuse_mount_path = fuse_overlay_mount_path + mountpoint make_bind_mount(fuse_mount_path, mount_path) - # benchexec running in a container without /dev/fuse elif os.getenv("container") == "podman" or os.path.exists( "/run/.containerenv" ): + # benchexec running in a container without /dev/fuse raise OSError( e.errno, f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " @@ -610,13 +601,6 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): f"or use a different directory mode, " f"such as '--read-only-dir {shlex.quote(mp)}'.", ) from e - else: - raise OSError( - e.errno, - f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " - f"Please either install fuse-overlayfs or use a different directory mode, " - f"such as '--read-only-dir {shlex.quote(mp)}'.", - ) from e elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) @@ -884,10 +868,17 @@ def permitted_cap_as_ambient(): libc.capset(header, data) -def get_fuse_overlayfs_version(): +def get_fuse_overlayfs_executable(): + """ + Retrieve the path to the fuse-overlayfs executable + if it is available and meets the version requirement. + + @return: The path to fuse-overlayfs executable if found and valid, None otherwise. + """ fuse = shutil.which("fuse-overlayfs") if fuse is None: return None + try: result = subprocess.run( args=(fuse, "--version"), @@ -895,19 +886,34 @@ def get_fuse_overlayfs_version(): stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, ) - output = result.stdout.decode() + output = result.stdout + except subprocess.CalledProcessError as e: + logging.warning("%s not available: %s", fuse, e) + return None - if match := re.search( - r"^fuse-overlayfs:.*?(\d+\.\d+(\.\d+)?)", output, re.MULTILINE - ): - logging.debug("fuse-overlayfs version: %s", match[1]) - return [int(part) for part in match[1].split(".")] + if match := re.search( + r"^fuse-overlayfs:.*?(\d+\.\d+(\.\d+)?)", output, re.MULTILINE + ): + version = [int(part) for part in match[1].split(".")] + if version >= [1, 10]: + logging.debug("%s version: %s", fuse, match[1]) + return fuse else: - logging.warning("Could not find version information of %s in output.", fuse) + logging.warning( + "Ignoring %s because its version %s is broken. " + "Please install version 1.10 or newer.", + fuse, + match[1], + ) return None - except subprocess.CalledProcessError: - return None + else: + logging.warning( + "Could not find version information of %s in output, but still attempt to use it.", + fuse, + ) + return fuse def setup_fuse_overlay(temp_base, work_base): @@ -918,7 +924,7 @@ def setup_fuse_overlay(temp_base, work_base): @return: The path to the mounted overlay filesystem if successful, None otherwise. """ - fuse = shutil.which("fuse-overlayfs") + fuse = get_fuse_overlayfs_executable() if fuse is None: return None temp_fuse = temp_base + b"/fuse" @@ -960,7 +966,8 @@ def setup_fuse_overlay(temp_base, work_base): if result.stdout: logging.debug("fuse-overlayfs: %s", result.stdout.decode()) return temp_fuse - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as e: + logging.critical("Failed to create overlay mount with %s: %s", fuse, e) return None diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index b3f5346ed..634e867d5 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1293,6 +1293,7 @@ def test_triple_nested_runexec(self): self.setUp( dir_modes={ "/": containerexecutor.DIR_READ_ONLY, + "/home": containerexecutor.DIR_OVERLAY, overlay_dir: containerexecutor.DIR_OVERLAY, "/tmp": containerexecutor.DIR_FULL_ACCESS, }, @@ -1300,10 +1301,12 @@ def test_triple_nested_runexec(self): outer_result, outer_output = self.execute_run(*combined_cmd) self.check_result_keys(outer_result, "returnvalue") self.check_exitcode( - outer_result, 0, "exit code of inner runexec is not zero" + outer_result, 0, "exit code of outer runexec is not zero" ) - with open(mid_output_file, "r") as mid_output_file: - self.assertIn("returnvalue=0", mid_output_file.read()) + with open(mid_output_file, "rb") as mid_output_file: + self.assertIn( + b"returnvalue=0", mid_output_file.read().strip().splitlines() + ) self.assertTrue( os.path.exists(test_file), f"File '{test_file}' removed, output was:\n" + "\n".join(outer_output), From b63db003136c3e889d1c325df7f6e9ffc14c5631 Mon Sep 17 00:00:00 2001 From: younghojan Date: Mon, 2 Sep 2024 19:48:56 +0800 Subject: [PATCH 21/32] Refactor and improve fuse-overlay related tests --- benchexec/test_runexecutor.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 634e867d5..cd1664046 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1202,12 +1202,8 @@ def test_uptime_without_lxcfs(self): def test_fuse_overlay(self): if not container.get_fuse_overlayfs_executable(): self.skipTest("fuse-overlayfs not available") - - test_dir = "/tmp/fuse_test/" - os.makedirs(test_dir, exist_ok=True) - test_file_path = os.path.join(test_dir, "test_file") - - try: + with tempfile.TemporaryDirectory(prefix="BenchExec_test_") as temp_dir: + test_file_path = os.path.join(temp_dir, "test_file") with open(test_file_path, "wb") as test_file: test_file.write(b"TEST_TOKEN") @@ -1216,7 +1212,7 @@ def test_fuse_overlay(self): "/": containerexecutor.DIR_READ_ONLY, "/home": containerexecutor.DIR_HIDDEN, "/tmp": containerexecutor.DIR_HIDDEN, - test_dir: containerexecutor.DIR_OVERLAY, + temp_dir: containerexecutor.DIR_OVERLAY, }, ) result, output = self.execute_run( @@ -1238,8 +1234,6 @@ def test_fuse_overlay(self): b"TEST_TOKEN", f"File '{test_file_path}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", ) - finally: - shutil.rmtree(test_dir) def test_triple_nested_runexec(self): if not container.get_fuse_overlayfs_executable(): @@ -1260,12 +1254,14 @@ def test_triple_nested_runexec(self): outer_cmd = [ "python3", runexec, - "--read-only-dir", + "--full-access-dir", "/", "--overlay-dir", overlay_dir, "--full-access-dir", output_dir, + "--hidden-dir", + "/tmp", "--output", mid_output_file, "--", @@ -1273,12 +1269,14 @@ def test_triple_nested_runexec(self): mid_cmd = [ "python3", runexec, - "--read-only-dir", + "--full-access-dir", "/", "--overlay-dir", overlay_dir, "--full-access-dir", output_dir, + "--hidden-dir", + "/tmp", "--output", inner_output_file, "--", @@ -1292,10 +1290,10 @@ def test_triple_nested_runexec(self): self.setUp( dir_modes={ - "/": containerexecutor.DIR_READ_ONLY, - "/home": containerexecutor.DIR_OVERLAY, + "/": containerexecutor.DIR_FULL_ACCESS, + "/tmp": containerexecutor.DIR_HIDDEN, overlay_dir: containerexecutor.DIR_OVERLAY, - "/tmp": containerexecutor.DIR_FULL_ACCESS, + output_dir: containerexecutor.DIR_FULL_ACCESS, }, ) outer_result, outer_output = self.execute_run(*combined_cmd) From 38a0508450111b7822350910dd9ac8ab30d9849f Mon Sep 17 00:00:00 2001 From: younghojan Date: Wed, 4 Sep 2024 14:37:39 +0800 Subject: [PATCH 22/32] Omit test_triple_nested_runexec when coverage testing --- benchexec/test_runexecutor.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index cd1664046..4ede603c6 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -6,6 +6,7 @@ # SPDX-License-Identifier: Apache-2.0 import contextlib +from difflib import restore import logging import os import re @@ -1239,6 +1240,13 @@ def test_triple_nested_runexec(self): if not container.get_fuse_overlayfs_executable(): self.skipTest("missing fuse-overlayfs") + # Check if COV_CORE_SOURCE environment variable is set and remove it. + # This is necessary because the coverage tool will not work in the nested runexec. + restore_env = False + if "COV_CORE_SOURCE" in os.environ: + restore_env = True + del os.environ["COV_CORE_SOURCE"] + with tempfile.TemporaryDirectory(prefix="BenchExec_test_") as temp_dir: overlay_dir = os.path.join(temp_dir, "overlay") os.makedirs(overlay_dir) @@ -1317,6 +1325,10 @@ def test_triple_nested_runexec(self): f"File '{test_file}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", ) + # Restore COV_CORE_SOURCE environment variable + if restore_env: + os.environ["COV_CORE_SOURCE"] = "1" + class _StopRunThread(threading.Thread): def __init__(self, delay, runexecutor): From 5d2a349d3e27e29a083cd027cab6aa165dcc57e4 Mon Sep 17 00:00:00 2001 From: younghojan Date: Wed, 4 Sep 2024 17:42:33 +0800 Subject: [PATCH 23/32] Refactor COV_CORE_SOURCE environment variable handling --- benchexec/test_runexecutor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 4ede603c6..aa4bf6d4d 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -6,7 +6,6 @@ # SPDX-License-Identifier: Apache-2.0 import contextlib -from difflib import restore import logging import os import re From a8a351696e5c58df2541614547d5c1be02ee3a11 Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 5 Sep 2024 13:49:13 +0800 Subject: [PATCH 24/32] Safely encode string for fuse-overlayfs paths --- benchexec/container.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index afab3e0fc..2a4372ee7 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -772,12 +772,21 @@ def remount_with_additional_flags(mountpoint, fstype, existing_options, mountfla def escape_overlayfs_parameters(s): """ - Safely encode a string for being used as a path for overlayfs. + Safely encode a string for being used as a path for both kernel overlayfs + and fuse-overlayfs. In addition to escaping ",", which separates mount options, we need to escape ":", which overlayfs uses to separate multiple lower dirs (cf. https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt). + Also, the path shall be nomalized to avoid issues with "//" in the beginning + (cf. https://github.com/sosy-lab/benchexec/pull/1062). """ - return s.replace(b"\\", rb"\\").replace(b":", rb"\:").replace(b",", rb"\,") + assert s[0] == ord(b"/"), "Path must be absolute" + normalized_path = b"/" + s.lstrip(b"/") + return ( + normalized_path.replace(b"\\", rb"\\") + .replace(b":", rb"\:") + .replace(b",", rb"\,") + ) def make_overlay_mount(mount, lower, upper, work): From 34f57f16bf61ad31513dd9c5c7da0fce2b23d576 Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 5 Sep 2024 14:54:43 +0800 Subject: [PATCH 25/32] Refactor determine_directory_mode function for fuse-overlayfs compatibility --- benchexec/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/container.py b/benchexec/container.py index 2a4372ee7..d4deb4b83 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -692,7 +692,7 @@ def determine_directory_mode(dir_modes, path, fstype=None): result_mode == DIR_OVERLAY and fstype and ( - fstype.startswith(b"fuse.") + (fstype.startswith(b"fuse.") and fstype != b"fuse.fuse-overlayfs") or fstype == b"autofs" or fstype == b"vfat" or fstype == b"ntfs" From 2fd26ffcc7fa77f1f249d90e04024170f6604609 Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 5 Sep 2024 14:55:20 +0800 Subject: [PATCH 26/32] Refactor file handling in test_runexecutor.py for better readability --- benchexec/test_runexecutor.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index aa4bf6d4d..53017b70c 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1308,19 +1308,17 @@ def test_triple_nested_runexec(self): self.check_exitcode( outer_result, 0, "exit code of outer runexec is not zero" ) - with open(mid_output_file, "rb") as mid_output_file: - self.assertIn( - b"returnvalue=0", mid_output_file.read().strip().splitlines() - ) + with open(mid_output_file, "r") as f: + self.assertIn("returnvalue=0", f.read().strip().splitlines()) self.assertTrue( os.path.exists(test_file), f"File '{test_file}' removed, output was:\n" + "\n".join(outer_output), ) - with open(test_file, "rb") as test_file: - test_token = test_file.read() + with open(test_file, "r") as f: + test_token = f.read() self.assertEqual( test_token.strip(), - b"TEST_TOKEN", + "TEST_TOKEN", f"File '{test_file}' content is incorrect. Expected 'TEST_TOKEN', but got:\n{test_token}", ) From 88db4199613470b4411fa796662b8a8a60562ae5 Mon Sep 17 00:00:00 2001 From: younghojan Date: Thu, 5 Sep 2024 19:08:01 +0800 Subject: [PATCH 27/32] Refactor overlay mount error handling for better compatibility --- benchexec/container.py | 69 ++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index d4deb4b83..e15652291 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -572,35 +572,46 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): else: if use_fuse: # We tried to use overlayfs before, but it failed. - # No need to try again, just log the error. - raise OSError( - e.errno, - f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}, " - f"Please either install fuse-overlayfs in at least version 1.10, " - f"or use a different directory mode such as '--read-only-dir {shlex.quote(mp)}'.", - ) from e - fuse_overlay_mount_path = setup_fuse_overlay( - temp_base, work_base - ) - if fuse_overlay_mount_path: - logging.debug( - "Fallback to fuse-overlayfs for overlay mount at '%s'.", - mp, + # No need to try again, just log the error accordingly. + if os.getenv("container") == "podman" or os.path.exists( + "/run/.containerenv" + ): + # benchexec running in a container without /dev/fuse + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Looks like benchexec is running in a container, " + f"please either launch the container with '--device /dev/fuse' " + f"or use a different directory mode, " + f"such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e + else: + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Please either install version 1.10 or higher of fuse-overlayfs, " + f"or use a different directory mode such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e + else: + # We should try fuse-overlayfs here, this could handle triple-nested benchexec. + # (cf. https://github.com/sosy-lab/benchexec/issues/1067 + fuse_overlay_mount_path = setup_fuse_overlay( + temp_base, work_base ) - fuse_mount_path = fuse_overlay_mount_path + mountpoint - make_bind_mount(fuse_mount_path, mount_path) - elif os.getenv("container") == "podman" or os.path.exists( - "/run/.containerenv" - ): - # benchexec running in a container without /dev/fuse - raise OSError( - e.errno, - f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " - f"Looks like you are running in a container, " - f"please either launch the container with --device /dev/fuse " - f"or use a different directory mode, " - f"such as '--read-only-dir {shlex.quote(mp)}'.", - ) from e + if fuse_overlay_mount_path: + logging.debug( + "Fallback to fuse-overlayfs for overlay mount at '%s'.", + mp, + ) + fuse_mount_path = fuse_overlay_mount_path + mountpoint + make_bind_mount(fuse_mount_path, mount_path) + else: + raise OSError( + e.errno, + f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " + f"Please either install version 1.10 or higher of fuse-overlayfs, " + f"or use a different directory mode such as '--read-only-dir {shlex.quote(mp)}'.", + ) from e elif mode == DIR_HIDDEN: os.makedirs(temp_path, exist_ok=True) @@ -976,7 +987,7 @@ def setup_fuse_overlay(temp_base, work_base): logging.debug("fuse-overlayfs: %s", result.stdout.decode()) return temp_fuse except subprocess.CalledProcessError as e: - logging.critical("Failed to create overlay mount with %s: %s", fuse, e) + logging.debug("Failed to create overlay mount with %s: %s", fuse, e) return None From 2f9d52e7a6775bebddc442894d0b9164b143e219 Mon Sep 17 00:00:00 2001 From: younghojan Date: Sun, 15 Sep 2024 10:55:51 +0800 Subject: [PATCH 28/32] Fix typo --- benchexec/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/container.py b/benchexec/container.py index e15652291..8fdf64f3e 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -580,7 +580,7 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): raise OSError( e.errno, f"Failed to create overlay mount for '{mp}': {os.strerror(e.errno)}. " - f"Looks like benchexec is running in a container, " + f"Looks like BenchExec is running in a container, " f"please either launch the container with '--device /dev/fuse' " f"or use a different directory mode, " f"such as '--read-only-dir {shlex.quote(mp)}'.", From 1c49af2caeee92f103b1aebcdd43fee10353aa81 Mon Sep 17 00:00:00 2001 From: younghojan Date: Sun, 15 Sep 2024 10:56:08 +0800 Subject: [PATCH 29/32] Refactor handling of COV_CORE_SOURCE environment variable in TestRunExecutorWithContainer --- benchexec/test_runexecutor.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 53017b70c..6024b7ced 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -1241,10 +1241,7 @@ def test_triple_nested_runexec(self): # Check if COV_CORE_SOURCE environment variable is set and remove it. # This is necessary because the coverage tool will not work in the nested runexec. - restore_env = False - if "COV_CORE_SOURCE" in os.environ: - restore_env = True - del os.environ["COV_CORE_SOURCE"] + coverage_env_var = os.environ.pop("COV_CORE_SOURCE", None) with tempfile.TemporaryDirectory(prefix="BenchExec_test_") as temp_dir: overlay_dir = os.path.join(temp_dir, "overlay") @@ -1323,8 +1320,8 @@ def test_triple_nested_runexec(self): ) # Restore COV_CORE_SOURCE environment variable - if restore_env: - os.environ["COV_CORE_SOURCE"] = "1" + if coverage_env_var is not None: + os.environ["COV_CORE_SOURCE"] = coverage_env_var class _StopRunThread(threading.Thread): From ea92000133b7fe0b6492aedbf6a06f0d3346c61b Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 19 Sep 2024 09:56:39 +0200 Subject: [PATCH 30/32] Change internal paths used for fuse-overlayfs mounts The temp_base directory (.../temp) is the one that BenchExec uses to store output files of the tool, and after a run we iterate through it and copy files from there to the output directory. Thus we should not use it for internal stuff. But the work_base directory is fine for that. So let's move the fuse mountpoint to work_base as well. --- benchexec/container.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchexec/container.py b/benchexec/container.py index 8fdf64f3e..9ef40e9ea 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -947,8 +947,8 @@ def setup_fuse_overlay(temp_base, work_base): fuse = get_fuse_overlayfs_executable() if fuse is None: return None - temp_fuse = temp_base + b"/fuse" - work_fuse = work_base + b"/fuse" + temp_fuse = work_base + b"/fuse_mount" + work_fuse = work_base + b"/fuse_work" os.makedirs(temp_fuse, exist_ok=True) os.makedirs(work_fuse, exist_ok=True) From ec11b7f143922bf7380d71ec486e5ab1eef23868 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 19 Sep 2024 10:46:30 +0200 Subject: [PATCH 31/32] Add logging about why fuse-overlayfs is used --- benchexec/container.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchexec/container.py b/benchexec/container.py index 9ef40e9ea..340148d54 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -848,6 +848,10 @@ def check_use_fuse_overlayfs(mount_base, dir_modes): util.path_is_below(sub_mountpoint, mountpoint) and sub_mountpoint != mountpoint ): + logging.debug( + "Using fuse-overlayfs because of mount on '%s'", + mountpoint.decode(), + ) return True return False From 33249f172beb6a7f7b73b16b894d14d9dfa5ad2a Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 19 Sep 2024 11:00:02 +0200 Subject: [PATCH 32/32] Detect and error out if temp is not hidden and we use fuse-overlayfs Somehow this causes deadlocks that we did not manage to solve even by making our own temp directory hidden. So let's at least avoid the deadlock and provide a proper error message. More background is in the discussions: https://github.com/sosy-lab/benchexec/pull/1062#discussion_r1732494331 https://github.com/sosy-lab/benchexec/pull/1062#issuecomment-2354889831 --- benchexec/container.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/benchexec/container.py b/benchexec/container.py index 340148d54..603ca576e 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -486,6 +486,20 @@ def duplicate_mount_hierarchy(mount_base, temp_base, work_base, dir_modes): setup_fuse_overlay(temp_base, work_base) if use_fuse else None ) + # For some reason there is a deadlock if our temp dir is not hidden, + # and we did not manage to solve this by forcing the mode to hidden here. + # The whole temp directory of the system needs to be hidden to have this working. + # cf. https://github.com/sosy-lab/benchexec/pull/1062#discussion_r1732494331 + if fuse_overlay_mount_path and ( + (temp_dir_mode := determine_directory_mode(dir_modes, temp_base)) + not in [None, DIR_HIDDEN] + ): + raise OSError( + "BenchExec needs to use fuse-overlayfs but the directory mode of " + f'the temp directory is "{temp_dir_mode}", which would lead to a deadlock. ' + 'Please use the default "hidden" directory mode for the temp directory.' + ) + for _unused_source, full_mountpoint, fstype, options in list(get_mount_points()): if not util.path_is_below(full_mountpoint, mount_base): continue