diff --git a/man/man8/runqlat.8 b/man/man8/runqlat.8 index 9cc6176c0f7f..2986ff5bbb69 100644 --- a/man/man8/runqlat.8 +++ b/man/man8/runqlat.8 @@ -2,7 +2,7 @@ .SH NAME runqlat \- Run queue (scheduler) latency as a histogram. .SH SYNOPSIS -.B runqlat [\-h] [\-T] [\-m] [\-P] [\-p PID] [interval] [count] +.B runqlat [\-h] [\-T] [\-m] [\-P] [\-\-pidnss] [\-L] [\-p PID] [interval] [count] .SH DESCRIPTION This measures the time a task spends waiting on a run queue (or equivalent scheduler data structure) for a turn on-CPU, and shows this time as a @@ -44,6 +44,13 @@ Output histogram in milliseconds. \-P Print a histogram for each PID. .TP +\-\-pidnss +Print a histogram for each PID namespace (short for PID namespaces). For +container analysis. +.TP +\-L +Print a histogram for each thread ID. +.TP \-p PID Only show this PID (filtered in kernel for efficiency). .TP diff --git a/tools/runqlat.py b/tools/runqlat.py index bae0138d0fa4..1eb5d9ac9b44 100755 --- a/tools/runqlat.py +++ b/tools/runqlat.py @@ -37,7 +37,7 @@ ./runqlat -p 185 # trace PID 185 only """ parser = argparse.ArgumentParser( - description="Summarize run queue (schedular) latency as a histogram", + description="Summarize run queue (scheduler) latency as a histogram", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("-T", "--timestamp", action="store_true", @@ -46,6 +46,10 @@ help="millisecond histogram") parser.add_argument("-P", "--pids", action="store_true", help="print a histogram per process ID") +# PID options are --pid and --pids, so namespaces should be --pidns (not done +# yet) and --pidnss: +parser.add_argument("--pidnss", action="store_true", + help="print a histogram per PID namespace") parser.add_argument("-L", "--tids", action="store_true", help="print a histogram per thread ID") parser.add_argument("-p", "--pid", @@ -62,11 +66,19 @@ bpf_text = """ #include #include +#include +#include typedef struct pid_key { u64 id; // work around u64 slot; } pid_key_t; + +typedef struct pidns_key { + u64 id; // work around + u64 slot; +} pidns_key_t; + BPF_HASH(start, u32); STORAGE @@ -145,6 +157,13 @@ bpf_text = bpf_text.replace('STORE', 'pid_key_t key = {.id = ' + pid + ', .slot = bpf_log2l(delta)}; ' + 'dist.increment(key);') +elif args.pidnss: + section = "pidns" + bpf_text = bpf_text.replace('STORAGE', + 'BPF_HISTOGRAM(dist, pidns_key_t);') + bpf_text = bpf_text.replace('STORE', 'pidns_key_t key = ' + + '{.id = prev->nsproxy->pid_ns_for_children->ns.inum, ' + + '.slot = bpf_log2l(delta)}; dist.increment(key);') else: section = "" bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);') diff --git a/tools/runqlat_example.txt b/tools/runqlat_example.txt index ee6335609be7..857e5165df37 100644 --- a/tools/runqlat_example.txt +++ b/tools/runqlat_example.txt @@ -486,12 +486,75 @@ tid = 7 [...] +And a --pidnss option (short for PID namespaces) will print for each PID +namespace, for analyzing container performance: + +# ./runqlat --pidnss -m +Tracing run queue latency... Hit Ctrl-C to end. +^C + +pidns = 4026532870 + msecs : count distribution + 0 -> 1 : 40 |****************************************| + 2 -> 3 : 1 |* | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 2 |** | + 64 -> 127 : 5 |***** | + +pidns = 4026532809 + msecs : count distribution + 0 -> 1 : 67 |****************************************| + +pidns = 4026532748 + msecs : count distribution + 0 -> 1 : 63 |****************************************| + +pidns = 4026532687 + msecs : count distribution + 0 -> 1 : 7 |****************************************| + +pidns = 4026532626 + msecs : count distribution + 0 -> 1 : 45 |****************************************| + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 0 | | + 16 -> 31 : 0 | | + 32 -> 63 : 0 | | + 64 -> 127 : 3 |** | + +pidns = 4026531836 + msecs : count distribution + 0 -> 1 : 314 |****************************************| + 2 -> 3 : 1 | | + 4 -> 7 : 11 |* | + 8 -> 15 : 28 |*** | + 16 -> 31 : 137 |***************** | + 32 -> 63 : 86 |********** | + 64 -> 127 : 1 | | + +pidns = 4026532382 + msecs : count distribution + 0 -> 1 : 285 |****************************************| + 2 -> 3 : 5 | | + 4 -> 7 : 16 |** | + 8 -> 15 : 9 |* | + 16 -> 31 : 69 |********* | + 32 -> 63 : 25 |*** | + +Many of these distributions have two modes: the second, in this case, is +caused by capping CPU usage via CPU shares. + + USAGE message: # ./runqlat -h -usage: runqlat [-h] [-T] [-m] [-P] [-L] [-p PID] [interval] [count] +usage: runqlat.py [-h] [-T] [-m] [-P] [--pidnss] [-L] [-p PID] + [interval] [count] -Summarize run queue (schedular) latency as a histogram +Summarize run queue (scheduler) latency as a histogram positional arguments: interval output interval, in seconds @@ -502,6 +565,7 @@ optional arguments: -T, --timestamp include timestamp on output -m, --milliseconds millisecond histogram -P, --pids print a histogram per process ID + --pidnss print a histogram per PID namespace -L, --tids print a histogram per thread ID -p PID, --pid PID trace this PID only