From 7a249fa5c2ee5ed4584deb99a6e86e5149644eef Mon Sep 17 00:00:00 2001 From: Brian Martin Date: Tue, 28 Jan 2025 12:20:26 -0800 Subject: [PATCH 1/2] full breakdown of syscalls by cgroup Complete breakdown of syscalls by cgroup into each of the same categories we use for system-wide metrics. --- src/samplers/syscall/linux/counts/mod.bpf.c | 78 +++++++++++++++++++++ src/samplers/syscall/linux/counts/mod.rs | 31 +++++--- src/samplers/syscall/linux/counts/stats.rs | 66 ++++++++++++++--- 3 files changed, 157 insertions(+), 18 deletions(-) diff --git a/src/samplers/syscall/linux/counts/mod.bpf.c b/src/samplers/syscall/linux/counts/mod.bpf.c index b3d95756..cb6d9862 100644 --- a/src/samplers/syscall/linux/counts/mod.bpf.c +++ b/src/samplers/syscall/linux/counts/mod.bpf.c @@ -93,6 +93,60 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_write SEC(".maps"); +// per-cgroup syscalls - write +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CGROUPS); +} cgroup_syscall_poll SEC(".maps"); + +// per-cgroup syscalls - write +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CGROUPS); +} cgroup_syscall_lock SEC(".maps"); + +// per-cgroup syscalls - write +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CGROUPS); +} cgroup_syscall_time SEC(".maps"); + +// per-cgroup syscalls - write +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CGROUPS); +} cgroup_syscall_sleep SEC(".maps"); + +// per-cgroup syscalls - write +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CGROUPS); +} cgroup_syscall_socket SEC(".maps"); + +// per-cgroup syscalls - write +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CGROUPS); +} cgroup_syscall_yield SEC(".maps"); + SEC("tracepoint/raw_syscalls/sys_enter") int sys_enter(struct trace_event_raw_sys_enter *args) { @@ -138,6 +192,12 @@ int sys_enter(struct trace_event_raw_sys_enter *args) bpf_map_update_elem(&cgroup_syscall_other, &cgroup_id, &zero, BPF_ANY); bpf_map_update_elem(&cgroup_syscall_read, &cgroup_id, &zero, BPF_ANY); bpf_map_update_elem(&cgroup_syscall_write, &cgroup_id, &zero, BPF_ANY); + bpf_map_update_elem(&cgroup_syscall_poll, &cgroup_id, &zero, BPF_ANY); + bpf_map_update_elem(&cgroup_syscall_lock, &cgroup_id, &zero, BPF_ANY); + bpf_map_update_elem(&cgroup_syscall_time, &cgroup_id, &zero, BPF_ANY); + bpf_map_update_elem(&cgroup_syscall_sleep, &cgroup_id, &zero, BPF_ANY); + bpf_map_update_elem(&cgroup_syscall_socket, &cgroup_id, &zero, BPF_ANY); + bpf_map_update_elem(&cgroup_syscall_yield, &cgroup_id, &zero, BPF_ANY); // initialize the cgroup info struct cgroup_info cginfo = { @@ -167,6 +227,24 @@ int sys_enter(struct trace_event_raw_sys_enter *args) case 2: array_incr(&cgroup_syscall_write, cgroup_id); break; + case 3: + array_incr(&cgroup_syscall_poll, cgroup_id); + break; + case 4: + array_incr(&cgroup_syscall_lock, cgroup_id); + break; + case 5: + array_incr(&cgroup_syscall_time, cgroup_id); + break; + case 6: + array_incr(&cgroup_syscall_sleep, cgroup_id); + break; + case 7: + array_incr(&cgroup_syscall_socket, cgroup_id); + break; + case 8: + array_incr(&cgroup_syscall_yield, cgroup_id); + break; default: array_incr(&cgroup_syscall_other, cgroup_id); break; diff --git a/src/samplers/syscall/linux/counts/mod.rs b/src/samplers/syscall/linux/counts/mod.rs index 989f8cf8..380e13e0 100644 --- a/src/samplers/syscall/linux/counts/mod.rs +++ b/src/samplers/syscall/linux/counts/mod.rs @@ -2,15 +2,8 @@ /// * `raw_syscalls/sys_enter` /// /// And produces these stats: -/// * `syscall/total` -/// * `syscall/read` -/// * `syscall/write` -/// * `syscall/poll` -/// * `syscall/lock` -/// * `syscall/time` -/// * `syscall/sleep` -/// * `syscall/socket` -/// * `syscall/yield` +/// * `syscall` +/// * `cgroup_syscall` const NAME: &str = "syscall_counts"; @@ -63,7 +56,13 @@ fn handle_event(data: &[u8]) -> i32 { if !name.is_empty() { CGROUP_SYSCALL_OTHER.insert_metadata(id as usize, "name".to_string(), name.clone()); CGROUP_SYSCALL_READ.insert_metadata(id as usize, "name".to_string(), name.clone()); - CGROUP_SYSCALL_WRITE.insert_metadata(id as usize, "name".to_string(), name); + CGROUP_SYSCALL_WRITE.insert_metadata(id as usize, "name".to_string(), name.clone()); + CGROUP_SYSCALL_POLL.insert_metadata(id as usize, "name".to_string(), name.clone()); + CGROUP_SYSCALL_LOCK.insert_metadata(id as usize, "name".to_string(), name.clone()); + CGROUP_SYSCALL_TIME.insert_metadata(id as usize, "name".to_string(), name.clone()); + CGROUP_SYSCALL_SLEEP.insert_metadata(id as usize, "name".to_string(), name.clone()); + CGROUP_SYSCALL_SOCKET.insert_metadata(id as usize, "name".to_string(), name.clone()); + CGROUP_SYSCALL_YIELD.insert_metadata(id as usize, "name".to_string(), name); } } @@ -94,6 +93,12 @@ fn init(config: Arc) -> SamplerResult { .packed_counters("cgroup_syscall_other", &CGROUP_SYSCALL_OTHER) .packed_counters("cgroup_syscall_read", &CGROUP_SYSCALL_READ) .packed_counters("cgroup_syscall_write", &CGROUP_SYSCALL_WRITE) + .packed_counters("cgroup_syscall_poll", &CGROUP_SYSCALL_POLL) + .packed_counters("cgroup_syscall_lock", &CGROUP_SYSCALL_LOCK) + .packed_counters("cgroup_syscall_time", &CGROUP_SYSCALL_TIME) + .packed_counters("cgroup_syscall_sleep", &CGROUP_SYSCALL_SLEEP) + .packed_counters("cgroup_syscall_socket", &CGROUP_SYSCALL_SOCKET) + .packed_counters("cgroup_syscall_yield", &CGROUP_SYSCALL_YIELD) .ringbuf_handler("cgroup_info", handle_event) .build()?; @@ -107,6 +112,12 @@ impl SkelExt for ModSkel<'_> { "cgroup_syscall_other" => &self.maps.cgroup_syscall_other, "cgroup_syscall_read" => &self.maps.cgroup_syscall_read, "cgroup_syscall_write" => &self.maps.cgroup_syscall_write, + "cgroup_syscall_poll" => &self.maps.cgroup_syscall_poll, + "cgroup_syscall_lock" => &self.maps.cgroup_syscall_lock, + "cgroup_syscall_time" => &self.maps.cgroup_syscall_time, + "cgroup_syscall_sleep" => &self.maps.cgroup_syscall_sleep, + "cgroup_syscall_socket" => &self.maps.cgroup_syscall_socket, + "cgroup_syscall_yield" => &self.maps.cgroup_syscall_yield, "counters" => &self.maps.counters, "syscall_lut" => &self.maps.syscall_lut, _ => unimplemented!(), diff --git a/src/samplers/syscall/linux/counts/stats.rs b/src/samplers/syscall/linux/counts/stats.rs index 7380556c..f53ac07c 100644 --- a/src/samplers/syscall/linux/counts/stats.rs +++ b/src/samplers/syscall/linux/counts/stats.rs @@ -2,6 +2,17 @@ use metriken::*; use crate::common::*; +/* + * system-wide + */ + +#[metric( + name = "syscall", + description = "The total number of syscalls", + metadata = { unit = "syscalls", op = "other" } +)] +pub static SYSCALL_OTHER: LazyCounter = LazyCounter::new(Counter::default); + #[metric( name = "syscall", description = "The number of read related syscalls (read, recvfrom, ...)", @@ -58,30 +69,69 @@ pub static SYSCALL_SOCKET: LazyCounter = LazyCounter::new(Counter::default); )] pub static SYSCALL_YIELD: LazyCounter = LazyCounter::new(Counter::default); +/* + * per-cgroup + */ + #[metric( - name = "syscall", - description = "The total number of syscalls", + name = "cgroup_syscall", + description = "The total number of syscalls on a per-cgroup basis", metadata = { unit = "syscalls", op = "other" } )] -pub static SYSCALL_OTHER: LazyCounter = LazyCounter::new(Counter::default); +pub static CGROUP_SYSCALL_OTHER: CounterGroup = CounterGroup::new(MAX_CGROUPS); #[metric( name = "cgroup_syscall", - description = "The number of read related syscalls (read, recvfrom, ...)", + description = "The number of read related syscalls on a per-cgroup basis (read, recvfrom, ...)", metadata = { unit = "syscalls", op = "read" } )] pub static CGROUP_SYSCALL_READ: CounterGroup = CounterGroup::new(MAX_CGROUPS); #[metric( name = "cgroup_syscall", - description = "The number of write related syscalls (write, sendto, ...)", + description = "The number of write related syscalls on a per-cgroup basis (write, sendto, ...)", metadata = { unit = "syscalls", op = "write" } )] pub static CGROUP_SYSCALL_WRITE: CounterGroup = CounterGroup::new(MAX_CGROUPS); #[metric( name = "cgroup_syscall", - description = "The total number of syscalls on a per-cgroup basis", - metadata = { unit = "syscalls", op = "other" } + description = "The number of poll related syscalls on a per-cgroup basis (poll, select, epoll, ...)", + metadata = { unit = "syscalls", op = "poll" } )] -pub static CGROUP_SYSCALL_OTHER: CounterGroup = CounterGroup::new(MAX_CGROUPS); +pub static CGROUP_SYSCALL_POLL: CounterGroup = CounterGroup::new(MAX_CGROUPS); + +#[metric( + name = "cgroup_syscall", + description = "The number of lock related syscalls on a per-cgroup basis (futex, ...)", + metadata = { unit = "syscalls", op = "lock" } +)] +pub static CGROUP_SYSCALL_LOCK: CounterGroup = CounterGroup::new(MAX_CGROUPS); + +#[metric( + name = "cgroup_syscall", + description = "The number of time related syscalls on a per-cgroup basis (clock_gettime, clock_settime, clock_getres, ...)", + metadata = { unit = "syscalls", op = "time" } +)] +pub static CGROUP_SYSCALL_TIME: CounterGroup = CounterGroup::new(MAX_CGROUPS); + +#[metric( + name = "cgroup_syscall", + description = "The number of sleep related syscalls on a per-cgroup basis (nanosleep, clock_nanosleep, ...)", + metadata = { unit = "syscalls", op = "sleep" } +)] +pub static CGROUP_SYSCALL_SLEEP: CounterGroup = CounterGroup::new(MAX_CGROUPS); + +#[metric( + name = "cgroup_syscall", + description = "The number of socket related syscalls on a per-cgroup basis (accept, connect, bind, setsockopt, ...)", + metadata = { unit = "syscalls", op = "socket" } +)] +pub static CGROUP_SYSCALL_SOCKET: CounterGroup = CounterGroup::new(MAX_CGROUPS); + +#[metric( + name = "cgroup_syscall", + description = "The number of socket related syscalls on a per-cgroup basis (sched_yield, ...)", + metadata = { unit = "syscalls", op = "yield" } +)] +pub static CGROUP_SYSCALL_YIELD: CounterGroup = CounterGroup::new(MAX_CGROUPS); From db672f3fd90e5e61db367087f8b4e8bdae749c82 Mon Sep 17 00:00:00 2001 From: Brian Martin Date: Tue, 28 Jan 2025 13:21:49 -0800 Subject: [PATCH 2/2] fix comments --- src/samplers/syscall/linux/counts/mod.bpf.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/samplers/syscall/linux/counts/mod.bpf.c b/src/samplers/syscall/linux/counts/mod.bpf.c index cb6d9862..43631db8 100644 --- a/src/samplers/syscall/linux/counts/mod.bpf.c +++ b/src/samplers/syscall/linux/counts/mod.bpf.c @@ -66,7 +66,10 @@ struct { __uint(max_entries, MAX_SYSCALL_ID); } syscall_lut SEC(".maps"); -// per-cgroup syscalls - other +/* + * per-cgroup counters + */ + struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -75,7 +78,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_other SEC(".maps"); -// per-cgroup syscalls - read struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -84,7 +86,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_read SEC(".maps"); -// per-cgroup syscalls - write struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -93,7 +94,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_write SEC(".maps"); -// per-cgroup syscalls - write struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -102,7 +102,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_poll SEC(".maps"); -// per-cgroup syscalls - write struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -111,7 +110,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_lock SEC(".maps"); -// per-cgroup syscalls - write struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -120,7 +118,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_time SEC(".maps"); -// per-cgroup syscalls - write struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -129,7 +126,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_sleep SEC(".maps"); -// per-cgroup syscalls - write struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE); @@ -138,7 +134,6 @@ struct { __uint(max_entries, MAX_CGROUPS); } cgroup_syscall_socket SEC(".maps"); -// per-cgroup syscalls - write struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(map_flags, BPF_F_MMAPABLE);