Skip to content

Commit

Permalink
Merge pull request containers#23487 from Luap99/ebpf
Browse files Browse the repository at this point in the history
add epbf program to trace podman cleanup errors in CI
  • Loading branch information
openshift-merge-bot[bot] authored Sep 24, 2024
2 parents 66139fc + 09ba5db commit af4b061
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,9 @@ bindings_task:
server_log_artifacts:
path: ./podman-server.log
type: text/plain
cleanup_tracer_artifacts:
path: ./podman-cleanup-tracer.log
type: text/plain
df_script: '$SCRIPT_BASE/logcollector.sh df'
audit_log_script: '$SCRIPT_BASE/logcollector.sh audit'
journal_script: '$SCRIPT_BASE/logcollector.sh journal'
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@ contrib/win-installer/shasums
contrib/win-installer/*.wixobj
contrib/win-installer/*.wixpdb
contrib/win-installer/*.log
podman-cleanup-tracer.log*
14 changes: 14 additions & 0 deletions contrib/cirrus/runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,15 @@ msg "************************************************************"
((${SETUP_ENVIRONMENT:-0})) || \
die "Expecting setup_environment.sh to have completed successfully"

if [[ "$UID" -eq 0 ]] && ((CONTAINER==0)); then
# start ebpf cleanup tracer (#23487)
msg "start ebpf cleanup tracer"
# replace zero bytes to make the log more readable
bpftrace $GOSRC/hack/podman_cleanup_tracer.bt |& \
tr '\0' ' ' >$GOSRC/podman-cleanup-tracer.log &
TRACER_PID=$!
fi

# shellcheck disable=SC2154
if [[ "$PRIV_NAME" == "rootless" ]] && [[ "$UID" -eq 0 ]]; then
# Remove /var/lib/cni, it is not required for rootless cni.
Expand Down Expand Up @@ -499,4 +508,9 @@ fi

showrun $handler

if [[ -n "$TRACER_PID" ]]; then
# ignore any error here
kill "$TRACER_PID" || true
fi

showrun echo "finished"
151 changes: 151 additions & 0 deletions hack/podman_cleanup_tracer.bt
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env bpftrace

// Traces podman and conmon commands for there exit status and all process that
// send signals to them. Also prints stderr for all podman container cleanup processes.
// The goal is to be able to trace all podman cleanup errors, while there is an
// option to log the errors to the syslog that only works of the process if the
// process got there and not if it was killed before. This program here will trace
// the exit status as well so we know even if it was killed by a signal.
//
// This is script uses https://github.com/bpftrace/bpftrace to compile this into
// kernel ebpf.
//
// Usage: sudo ./podman_cleanup_tracer.bt
// see below for output format

BEGIN {
print("Output format is one of:");
print("exec TIME PID PPID COMM ARGV(truncated)");
print("cmd TIME PID PPID ARGV(full)");
print("exit TIME PID PPID COMM EXIT_CODE EXIT_SIGNAL");
print("kill TIME PID PPID COMM SIGNAL TPID RET");
print("stderr TIME PID PPID COMM OUTPUT");
}

// Trace all exec calls to find all podman + conmon processes, it will also catch
// netavark and aardvark-dns as they have podman in the path as well but this is
// good so we can see any errors there as well.
tracepoint:syscalls:sys_enter_exec*
/ strcontains(str(args.argv[0]),"podman") || strcontains(str(args.argv[0]), "conmon") /
{

// create entry in pid map so we can check the pid later
@pids[pid] = 1;

// Find the podman cleanup process spawned by conmon.
// I tried matching argv but there seems to be no way to iterate over it.
// In practise parent name conmon and argv0 podman should contain all the
// cleanup processes we care about.
if (comm == "conmon" && strcontains(str(args.argv[0]), "podman")) {
@cleanupPids[pid] = 1;
}

printf("%-6s %s %-8d %-8d %-12s ",
"exec",
strftime("%H:%M:%S.%f", nsecs),
pid,
curtask->real_parent->tgid,
comm
);

// Unfortunately this doesn't print the full argv0 as there is some limit,
// as such we use the /proc/<>/cmdline reading hack below on exec exit.
join(args->argv);
}


// Print the full cmdline
tracepoint:syscalls:sys_exit_exec*
/ @pids[pid] /
{
printf("%-6s %s %-8d %-8d ",
"cmd",
strftime("%H:%M:%S.%f", nsecs),
pid,
curtask->real_parent->tgid
);
// This can fail to open the file it is done in user space and
// thus racy if the process exits quickly.
cat("/proc/%d/cmdline", pid);
print("");
}


// Trace all exits for the pids we matched above,
// pid == tid is used to only match the main exit
// and not all thread exits which we do not care about.
tracepoint:sched:sched_process_exit
/ @pids[pid] && pid == tid /
{
printf("%-6s %s %-8d %-8d %-12s %d %d\n",
"exit",
strftime("%H:%M:%S.%f", nsecs),
pid,
curtask->real_parent->tgid,
comm,
curtask->exit_code >> 8, // actual exit code
curtask->exit_code & 0xFF // signal number if killed
);

// process is done remove pid from map
delete(@pids[pid]);
delete(@cleanupPids[pid]);
}

// Trace all kill calls that target our pids.
// This will not catch signals send via pidfd_send_signal because
// I don't see a way to translate the pidfd to our target pids and
// I don't want to log all send signals on the system.
tracepoint:syscalls:sys_enter_kill
/ @pids[args.pid] /
{
@tpid[tid] = args.pid;
@tsig[tid] = args.sig;
}

tracepoint:syscalls:sys_exit_kill
/ @tpid[tid] /
{
printf("%-6s %s %-8d %-8d %-12s %d %-8d %d\n",
"kill",
strftime("%H:%M:%S.%f", nsecs),
pid,
curtask->real_parent->tgid,
comm,
@tsig[tid],
@tpid[tid],
args.ret
);
delete(@tpid[tid]);
delete(@tsig[tid]);
}


// Print anything written on stderr for the podman container cleanup process.
tracepoint:syscalls:sys_enter_write
/ @cleanupPids[pid] && args.fd == 2 /
{
printf("%-6s %s %-8d %-8d %-12s",
"stderr",
strftime("%H:%M:%S.%f", nsecs),
pid,
curtask->real_parent->tgid,
comm
);

// String size limit is is 64 by default, this includes the 0 byte and when we
// hit the string limit it also adds "..." when using 63 so we use 63 as len here.
// While upstream fixed these low string limits (https://github.com/bpftrace/bpftrace/issues/305)
// it is not yet in older distro version we use so we cannot use that yet.
// Thus manually print several times.
$len = 62;
$offset = 62;
printf("%s", str(args.buf, $len));

unroll(10) {
if ((int64)args.count > $offset ) {
printf("%s", str(args.buf + $offset, $len));
}
$offset += $len
}
}

0 comments on commit af4b061

Please sign in to comment.