diff --git a/MagicEyes/src/cpu_watcher/Makefile b/MagicEyes/src/cpu_watcher/Makefile new file mode 100644 index 000000000..6d2efd924 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/Makefile @@ -0,0 +1,171 @@ +# Copyright 2023 The LMP Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# author: zhangziheng0525@163.com +# +# compile the current folder code + +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG ?= clang +LIBBPF_SRC := $(abspath ../../libbpf/src) +BPFTOOL_SRC := $(abspath ../../bpftool/src) +LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a) +BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool) +BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool +LIBBLAZESYM_SRC := $(abspath ../../blazesym/) +LIBBLAZESYM_INC := $(abspath $(LIBBLAZESYM_SRC)/include) +LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a) +ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ + | sed 's/arm.*/arm/' \ + | sed 's/aarch64/arm64/' \ + | sed 's/ppc64le/powerpc/' \ + | sed 's/mips.*/mips/' \ + | sed 's/riscv64/riscv/' \ + | sed 's/loongarch64/loongarch/') +VMLINUX := ../vmlinux/$(ARCH)/vmlinux.h +# Use our own libbpf API headers and Linux UAPI headers distributed with +# libbpf to avoid dependency on system-wide headers, which could be missing or +# outdated +INCLUDES := -I$(OUTPUT) -I../../../../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(LIBBLAZESYM_INC) -I./include +CFLAGS := -g -Wall +ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) + +APPS =cs_delay sar sc_delay preempt schedule_delay mq_delay mutrace +TARGETS=cpu_watcher +CONTROLLER := controller + +SRC_DIR = ./include + + +# Get Clang's default includes on this system. We'll explicitly add these dirs +# to the includes list when compiling with `-target bpf` because otherwise some +# architecture-specific dirs will be "missing" on some architectures/distros - +# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h, +# sys/cdefs.h etc. might be missing. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +ifeq ($(V),1) + Q = + msg = +else + Q = @ + msg = @printf ' %-8s %s%s\n' \ + "$(1)" \ + "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \ + "$(if $(3), $(3))"; + MAKEFLAGS += --no-print-directory +endif + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)cc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +.PHONY: all +all: $(CONTROLLER) $(TARGETS) + +.PHONY: clean +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) $(TARGETS) $(CONTROLLER) + +$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ + +# Build libbpf +$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf + $(call msg,LIB,$@) + $(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \ + OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \ + INCLUDEDIR= LIBDIR= UAPIDIR= \ + install + +# Build bpftool +$(BPFTOOL): | $(BPFTOOL_OUTPUT) + $(call msg,BPFTOOL,$@) + $(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap + + +$(LIBBLAZESYM_SRC)/target/release/libblazesym.a:: + $(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --release + +$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB, $@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@ + +# Build BPF code +$(OUTPUT)/%.bpf.o: bpf/%.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \ + $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \ + -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + $(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + +# Generate BPF skeletons +.PHONY: $(APPS) +$(APPS): %: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $(OUTPUT)/$@.skel.h + +# Build user-space code +$(OUTPUT)/%.o: $(SRC_DIR)/%.c | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(OUTPUT)/%.o: $(CONTROLLER).c | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(OUTPUT)/$(TARGETS).o: $(TARGETS).c $(APPS) | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +# Build application binary +$(CONTROLLER): %: $(OUTPUT)/%.o $(COMMON_OBJ) $(LIBBPF_OBJ) | $(OUTPUT) + $(call msg,BINARY,$@) + $(Q)$(CC) $^ $(ALL_LDFLAGS) -lstdc++ -lelf -lz -o $@ + +$(TARGETS): %: $(OUTPUT)/%.o $(COMMON_OBJ) $(LIBBPF_OBJ) | $(OUTPUT) + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lstdc++ -lelf -lz -o $@ + +SUCCESS_MESSAGE: + @echo "\e[38;2;255;0;0m __ __ \e[0m" + @echo "\e[38;2;255;128;0m _________ __ __ _ ______ _/ /______/ /_ ___ _____\e[0m" + @echo "\e[38;2;255;255;0m / ___/ __ \/ / / / | | /| / / __ / __/ ___/ __ \/ _ \/ ___/\e[0m" + @echo "\e[38;2;128;255;0m/ /__/ /_/ / /_/ / | |/ |/ / /_/ / /_/ /__/ / / / __/ / \e[0m" + @echo "\e[38;2;0;255;0m\___/ .___/\__,_/ |__/|__/\__,_/\__/\___/_/ /_/\___/_/ \e[0m" + @echo "\e[38;2;0;255;128m /_/ \e[0m" + @echo "\e[38;2;0;255;255mSuccessful to compile cpu_watcher tools: \e[0m" + @echo "\e[38;2;0;255;255mPlease start your use ~ \e[0m" + + +all: $(TARGETS) SUCCESS_MESSAGE + + +# delete failed targets +.DELETE_ON_ERROR: + +# keep intermediate (.skel.h, .bpf.o, etc) targets +.SECONDARY: \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/README.md b/MagicEyes/src/cpu_watcher/README.md new file mode 100644 index 000000000..ce938ded0 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/README.md @@ -0,0 +1,255 @@ +# cpu_watcher:动态CPU指标实时监测 + +## 一、项目简介 + +​ `CPU_Watcher`是一项基于`eBPF(Extended Berkeley Packet Filter)`技术的项目,旨在实现对`CPU`各项指标的实时动态监测和分析,可以清晰且直观的了解CPU资源利用率以及事件的发生的速率。 + +​ 本工具使用ebpf工具进行实现,`eBPF`是`Linux`内核中的一种强大的工具,它允许在内核空间执行小型程序,用于在运行时过滤、转发和监控系统事件。 + +​ `CPU_Watcher`利用`eBPF`的这一特性,通过在内核空间执行精简的程序来捕获`CPU`相关的事件和指标,从而实现对`CPU`性能的实时监测和分析。 + +## 二、使用方法 + +### 1.使用环境 + +- OS:Ubuntu 22.04 +- kernel:Linux 6.2 + +### 2.编译运行 + +```shell +make 编译 +sudo ./cpu_watcher -[options] 运行 +make clean 清除生成文件 +``` + +## 三、功能介绍 + +​ `cpu_watcher`是一个用于监视系统 CPU 使用情况的工具,它可以帮助用户了解系统在不同负载下的性能表现,并提供详细的统计数据。该工具分为以下几个部分,通过不同的参数控制相关的`ebpf`捕获程序是否加载到内核中: + +| 参数 | 描述 | +| :----------------: | :----------------------------------------: | +| -s :SAR | 实时采集SAR的各项指标 | +| -i:interval | 修改SAR功能的输出间隔 | +| -P:percent | 按照百分比输出SAR功能的各项指标 | +| -p:preempt_time | 实时采集当前系统的每次抢占调度详细信息 | +| -d:schedule_delay | 实时采集当前系统的调度时延 | +| -S:syscall_delay | 实时采集当前系统调用时间 | +| -m:mq_delay | 实时采集当前消息队列通信时延 | +| -c:cs_delay | 实时对内核函数schedule()的执行时长进行测试 | + +### 1.SAR 统计功能: + +```shell +./cpu_watcher -s +``` + +#### 输出效果: + +``` + time proc/s cswch/s runqlen irqTime/us softirq/us idle/ms kthread/us sysc/ms utime/ms sys/ms +16:18:03 29 1216 1 1277 19394 1087 2908 662 747 665 +16:18:04 43 2036 2 1262 24823 1432 3981 72 171 76 +16:18:05 0 1371 2 4927 16949 1152 2489 538 636 541 +16:18:06 11 2569 4 10900 9085 518 2967 941 1121 944 +16:18:07 3 5166 4 9929 15864 469 10778 482 1020 493 +16:18:08 30 2426 2 2436 17877 1435 5086 90 262 96 +16:18:09 43 1257 1 351 20457 1713 3040 8 40 11 +16:18:10 0 813 1 20071 30563 1727 117472 41 0 159 +16:18:11 0 751 1 748 14532 1855 3935 16 50 20 +16:18:12 0 1118 1 1115 20750 1733 1956 1 50 3 +16:18:13 29 1083 1 286 18081 1698 3861 50 10 54 +16:18:14 43 1032 1 577 19513 1704 3919 26 10 30 +``` + +​ 使用参数i可以调整输出间隔,默认为1s,参数p可以按照cpu核数和自定义的输出间隔对数据进行归一化,并以百分比的形式输出,且大于60%的数据会标红输出: + +```shell +./cpu_watcher -s -i 2 -P +``` +#### 输出效果: + +![image13](image/image13.png) + + +对上述参数的解释: + +- `proc/s`: 每秒创建的进程数,此数值是通过fork数来统计的。 +- `cswch/s`: 每秒上下文切换数。 +- `runqlen`:此时CPU的运行队列的长度。 +- `irqTime/us`:CPU响应`irq`中断所占用的时间,是所有CPU时间的叠加。 +- `softirq/us`: CPU执行`softirq`所占用的时间,是所有CPU时间的叠加。 +- `idle/ms`: CPU处于空闲状态的时间,是所有CPU时间的叠加。 +- `kthread/us`: CPU执行内核线程所占用的时间,是所有CPU的叠加。不包括IDLE-0进程,因为此进程只执行空闲指令使CPU闲置。 +- `sysc/ms`: CPU执行用户程序系统调用(`syscall`)所占用的时间,是所有CPU的叠加。 +- ` utime/ms`:CPU执行普通用户进程时,花在用户态的时间,是所有CPU的叠加。 + +原理介绍: + +[libbpf_sar工具原理分析](docs/libbpf_sar.md) + +### **2.统计抢占调度时间:** + +​ 统计系统中发生抢占调度的情况,包括抢占进程的`pid`与进程名,以及被强占进程的`pid`,和本次抢占时间,单位纳秒。 + +#### 输出效果: + +``` +COMM prev_pid next_pid duration_ns +node 14221 2589 3014 +kworker/u256:1 15144 13516 1277 +node 14221 2589 3115 +kworker/u256:1 15144 13516 1125 +kworker/u256:1 15144 13516 974 +node 14221 2589 2560 +kworker/u256:1 15144 13516 1132 +node 14221 2589 2717 +kworker/u256:1 15144 13516 1206 +kworker/u256:1 15144 13516 1131 +node 14221 2589 3355 +``` + +原理介绍: + +[抢占调度原理分析](docs/preempt_time.md) + +### 3.**统计调度延迟:** + +​ 分析系统中进程调度的延迟情况,提供相关统计数据,输出包括当前系统的最大调度延迟、最小调度延迟、平均调度延迟,以及对应进程的名字。 + +#### 输出效果: + +``` + TIME avg_delay/μs max_delay/μs max_proc_name min_delay/μs min_proc_name +22:06:02 642.770000 60711.755000 node 5.227000 cpu_watcher +22:06:03 510.041000 60711.755000 node 5.227000 cpu_watcher +22:06:04 491.107000 60711.755000 node 5.227000 cpu_watcher +22:06:05 468.128000 60711.755000 node 5.227000 cpu_watcher +22:06:06 454.244000 60711.755000 node 5.227000 cpu_watcher +22:06:07 472.455000 61931.163000 node 5.227000 cpu_watcher +22:06:08 441.756000 61931.163000 node 3.360000 cpu_watcher +22:06:09 442.631000 61931.163000 node 3.360000 cpu_watcher +22:06:10 407.389000 61931.163000 node 2.549000 cpu_watcher +22:06:11 426.593000 62247.982000 node 2.549000 cpu_watcher +``` +原理介绍: + +[调度延迟原理分析](docs/schedule_delay.md) + +### 4.**统计系统调用响应时间:** + +​ 记录系统调用的响应时间,帮助用户评估系统对外部请求的处理效率, 其输出包括发起本次系统调用的进程的进程名、pid、系统调用号以及响应时间。 + +#### 输出效果: + +``` +Time Pid comm syscall_id delay/us +21:28:07 276073 cpu_watcher 1 21 +21:28:07 2579 node 0 7 +21:28:07 276073 cpu_watcher 4 8 +21:28:07 2579 node 232 6 +21:28:07 2579 node 0 6 +21:28:07 276073 cpu_watcher 1 22 +21:28:07 276073 cpu_watcher 4 8 +``` + +### 5.**统计消息队列延迟:** + +​ 统计进程间通过消息队列通信时,消息块从发送到接收的延迟情况,以便用户了解系统中进程间通信的效率和延迟,其输出内容包括发消息动作(mq_send)的延迟、接收消息动作(mq_receive)的延迟、消息块从发送到接收过程的延迟。 + +#### 输出效果: + +```c + Time Mqdes SND_PID RCV_PID SND_Delay/ms RCV_Delay/ms Delay/ms +21:40:36 3 281101 281167 0.02 0.02 2161.58542 +21:40:46 3 281432 281493 0.02 0.03 1373.68176 +21:40:52 3 281680 281741 0.03 0.05 1494.31408 +21:40:58 3 281909 281945 0.03 0.02 1434.06373 +21:41:01 3 282019 282088 0.03 0.02 1401.26321 +``` + +原理介绍: + +[消息队列延迟原理分析](docs/mq_delay.md) + +### 6.对内核函数schedule()的执行时长进行测试 + +​ 统计每次调度的执行时间,可以输出本次调度的时间,单位为微秒,并用直方图展示汇总结果: + +#### 输出效果: + +``` +t1:4817139183 t2:4817139248 delay:65 +t1:4817139255 t2:4817139319 delay:64 +t1:4817139454 t2:4817139505 delay:51 +t1:4817139512 t2:4817139557 delay:45 +t1:4817139675 t2:4817139735 delay:60 +t1:4817139742 t2:4817139800 delay:58 +t1:4817139936 t2:4817139998 delay:62 +t1:4817140005 t2:4817140065 delay:60 +t1:4817140488 t2:4817140552 delay:64 +t1:4817140559 t2:4817140621 delay:62 +t1:4817140816 t2:4817140878 delay:62 +t1:4817141241 t2:4817141303 delay:62 +``` + +```c +Time : 21:46:45 +cs_delay Count Distribution +0 => 1 585 | +2 => 3 856 | +4 => 7 2271 |** +8 => 15 5792 |***** +16 => 31 8641 |******** +32 => 63 9762 |********* +64 => 127 2041 |** +128 => 255 2158 |** +256 => 511 2075 |** +512 => 1023 751 | +1024 => 2047 301 | +2048 => 4095 112 | +4096 => 8191 36 | +8192 => 16383 0 | +16384 => 32767 0 | +32768 => 65535 0 | +65536 => 131071 0 | +131072 => 262143 0 | +262144 => 524287 0 | +524288 => 1048575 0 | +per_len = 1000 +``` + + + +## 四、实现方式 + +### 1.使用kprobe捕获内核函数的参数 + +​ 使用kprobe、kretprobe捕获挂载的内核函数的参数,从参数中提取有效的数据。比如从finish_task_switch.isra.0内核函数的参数中拿取关于prev进程的相关信息。 + +### 2.使用内核提供的tracepoint捕获特定时间 + +​ 使用tracepoint捕获特定状态的开始和结束,计算持续时间。比如softirq运行时间就是通过内核提供的tracepoint计算的。 + +### 3.获取内核全局变量 + +​ 获取内核全局变量,直接从内核全局变量读取信息。如proc/s就是通过直接读取total_forks内核全局变量来计算每秒产生进程数的。 + +## 五、cpu_watcher可视化 + +[cpu_watcher可视化指南](docs/cpu_watcher_vis_guide.md) + +## 六、未来展望 + +目前`cpu_watcher`工具的总体框架已经完成,工具所能满足的功能已覆盖CPU所涉及的大部分性能指标。下一阶段,本工具将从以下几个方向进行开发和优化: + +* 完善工具可视化; +* 功能模块化; +* 更细粒度的提取CPU相关指标; +* 完善工具,使其适配更多场景; + + + +如果你也对cpu_watcher或ebpf感兴趣,欢迎加入我们一起开发cpu_watcher工具,希望我们可以共同成长。 + +**cpu_watcher负责人:** albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/bpf/cs_delay.bpf.c b/MagicEyes/src/cpu_watcher/bpf/cs_delay.bpf.c new file mode 100644 index 000000000..caaf8c408 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/bpf/cs_delay.bpf.c @@ -0,0 +1,78 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include +#include +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; +const int ctrl_key = 0; +//记录时间戳; +BPF_ARRAY(start,int,u64,1); +BPF_ARRAY(cs_ctrl_map,int,struct cs_ctrl,1); +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps"); + +static inline struct cs_ctrl *get_cs_ctrl(void) { + struct cs_ctrl *cs_ctrl; + cs_ctrl = bpf_map_lookup_elem(&cs_ctrl_map, &ctrl_key); + if (!cs_ctrl || !cs_ctrl->cs_func) { + return NULL; + } + return cs_ctrl; +} + +SEC("kprobe/schedule") +int BPF_KPROBE(schedule) +{ + struct cs_ctrl *cs_ctrl = get_cs_ctrl(); + u64 t1; + t1 = bpf_ktime_get_ns()/1000; + int key =0; + bpf_map_update_elem(&start,&key,&t1,BPF_ANY); + return 0; +} + +SEC("kretprobe/schedule") +int BPF_KRETPROBE(schedule_exit) +{ + struct cs_ctrl *cs_ctrl = get_cs_ctrl(); + u64 t2 = bpf_ktime_get_ns()/1000; + u64 t1,delay; + int key = 0; + u64 *val = bpf_map_lookup_elem(&start,&key); + if (val != 0) + { + t1 = *val; + delay = t2 - t1; + bpf_map_delete_elem(&start, &key); + }else{ + return 0; + } + struct event *e; + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) return 0; + e->t1=t1; + e->t2=t2; + e->delay=delay; + bpf_ringbuf_submit(e, 0); + return 0; +} + diff --git a/MagicEyes/src/cpu_watcher/bpf/mq_delay.bpf.c b/MagicEyes/src/cpu_watcher/bpf/mq_delay.bpf.c new file mode 100644 index 000000000..3aa9dd92e --- /dev/null +++ b/MagicEyes/src/cpu_watcher/bpf/mq_delay.bpf.c @@ -0,0 +1,248 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include "vmlinux.h" +#include //包含了BPF 辅助函数 +#include +#include + +#include "cpu_watcher.h" + + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; +const int ctrl_key = 0; +BPF_HASH(send_msg1,pid_t,struct send_events,1024);//记录pid->u_msg_ptr的关系;do_mq_timedsend入参 +BPF_HASH(send_msg2,u64,struct send_events,1024);//记录msg->time的关系; +BPF_HASH(rcv_msg1,pid_t,struct rcv_events,1024);//记录pid->u_msg_ptr的关系;do_mq_timedsend入参 +BPF_ARRAY(mq_ctrl_map,int,struct mq_ctrl,1); +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps"); + +static inline struct mq_ctrl *get_mq_ctrl(void) { + struct mq_ctrl *mq_ctrl; + mq_ctrl = bpf_map_lookup_elem(&mq_ctrl_map, &ctrl_key); + if (!mq_ctrl || !mq_ctrl->mq_func) { + return NULL; + } + return mq_ctrl; +} + + +// int print_send_info(struct send_events * mq_send_info,int flag){ +// bpf_printk("---------------------test----------------------------test--------------------------test--------------------------------------------test---------------------test---------------------test\n"); +// bpf_printk("send_msg_prio: %-8lu\n",mq_send_info->msg_prio); +// bpf_printk("mqdes: %-08lu send_pid: %-08lu send_enter_time: %-16lu\n",mq_send_info->mqdes,mq_send_info->send_pid,mq_send_info->send_enter_time); +// if(flag > 0){ +// bpf_printk("u_msg_ptr: 0x%08lx src: 0x%08lx\n",mq_send_info->u_msg_ptr,mq_send_info->src); +// if(flag==2) bpf_printk("Key_msg_ptr: 0x%08lx \n",mq_send_info->Key_msg_ptr); +// } +// bpf_printk("---------------------test----------------------------test--------------------------test--------------------------------------------test---------------------test---------------------test\n"); +// return 0; +// } + +// int print_rcv_info(struct rcv_events * mq_rcv_info,int flag){ +// bpf_printk("---------------------test----------------------------test--------------------------test--------------------------------------------test---------------------test---------------------test\n"); +// bpf_printk("rcv_msg_prio: %-8lu\n",mq_rcv_info->msg_prio); +// bpf_printk("mqdes: %-08lu rcv_pid: %-08lu rcv_enter_time: %-16lu\n",mq_rcv_info->mqdes,mq_rcv_info->rcv_pid,mq_rcv_info->rcv_enter_time); +// if(flag > 0){ +// bpf_printk("u_msg_ptr: 0x%08lx dest: 0x%08lx\n",mq_rcv_info->u_msg_ptr,mq_rcv_info->dest); +// if(flag==2) bpf_printk("Key_msg_ptr: 0x%08lx \n",mq_rcv_info->Key_msg_ptr); +// } +// bpf_printk("---------------------test----------------------------test--------------------------test--------------------------------------------test---------------------test---------------------test\n"); +// return 0; +// } + + +/*获取 mq_send_info -> send_time send_pid mdqes u_msg_ptr msg_len msg_prio*/ +SEC("kprobe/do_mq_timedsend") +int BPF_KPROBE(mq_timedsend,mqd_t mqdes, const char *u_msg_ptr, + size_t msg_len, unsigned int msg_prio, + struct timespec64 *ts) +{ + struct mq_ctrl *mq_ctrl = get_mq_ctrl(); + u64 send_enter_time = bpf_ktime_get_ns();//开始发送信息时间; + int pid = bpf_get_current_pid_tgid();//发送端pid + + /*将消息暂存至send_events结构体中*/ + struct send_events mq_send_info ={}; + mq_send_info.send_pid= pid; + mq_send_info.send_enter_time = send_enter_time; + mq_send_info.mqdes= mqdes; + mq_send_info.msg_len = msg_len; + mq_send_info.msg_prio = msg_prio; + mq_send_info.u_msg_ptr = u_msg_ptr; + + bpf_map_update_elem(&send_msg1, &pid, &mq_send_info, BPF_ANY);//pid->u_msg_ptr + return 0; +} + +/*仅获取mq_send_info -> src*/ +SEC("kprobe/load_msg") +int BPF_KPROBE(load_msg_enter,const void *src, size_t len){ + struct mq_ctrl *mq_ctrl = get_mq_ctrl(); + int pid = bpf_get_current_pid_tgid();//发送端pid + /*记录load入参src*/ + struct send_events *mq_send_info = bpf_map_lookup_elem(&send_msg1, &pid); + if(!mq_send_info){ + return 0; + }else{ + mq_send_info->src = src; + } + return 0; +} + +/*获取消息块作为key,并建立 message -> mq_send_info 的哈希表*/ +SEC("kretprobe/load_msg") +int BPF_KRETPROBE(load_msg_exit,void *ret){ + struct mq_ctrl *mq_ctrl = get_mq_ctrl(); + int pid = bpf_get_current_pid_tgid();//发送端pid + /*构建消息块结构体,作为key*/ + struct send_events *mq_send_info = bpf_map_lookup_elem(&send_msg1, &pid); + if(!mq_send_info){ + return 0; + } + + /*make key*/ + u64 Key_msg_ptr; + if(mq_send_info->u_msg_ptr == mq_send_info->src && pid == mq_send_info->send_pid){ + /*该load_msg为do_mq_timedsend调用*/ + Key_msg_ptr =(u64)ret; + mq_send_info->Key_msg_ptr = Key_msg_ptr; + } + else { + return 0; + } + /*已经获得key*/ + bpf_map_update_elem(&send_msg2, &Key_msg_ptr, mq_send_info, BPF_ANY);//key_messege->mq_send_info; + return 0; +} + +SEC("kretprobe/do_mq_timedsend") +int BPF_KRETPROBE(do_mq_timedsend_exit,void *ret) +{ + struct mq_ctrl *mq_ctrl = get_mq_ctrl(); + bpf_printk("do_mq_timedsend_exit----------------------------------------------------------------\n"); + u64 send_exit_time = bpf_ktime_get_ns();//开始发送信息时间; + int pid = bpf_get_current_pid_tgid();//发送端pid + u64 Key; + + struct send_events *mq_send_info1 = bpf_map_lookup_elem(&send_msg1, &pid); + if(!mq_send_info1){ + return 0; + } + Key = mq_send_info1->Key_msg_ptr; + struct send_events *mq_send_info2 = bpf_map_lookup_elem(&send_msg2, &Key); + if(!mq_send_info2){ + return 0; + } + mq_send_info2->send_exit_time = send_exit_time; + bpf_map_delete_elem(&send_msg1,&pid); + return 0; +} +/*-----------------------------------------------------------------------------发送端--------------------------------------------------------------------------------------------------------*/ +/* 分界 */ +/*-----------------------------------------------------------------------------接收端--------------------------------------------------------------------------------------------------------*/ +/*接收端*/ +SEC("kprobe/do_mq_timedreceive") +int BPF_KPROBE(mq_timedreceive_entry,mqd_t mqdes, const char __user *u_msg_ptr, + size_t msg_len, unsigned int msg_prio, + struct timespec64 *ts) +{ + struct mq_ctrl *mq_ctrl = get_mq_ctrl(); + u64 rcv_enter_time = bpf_ktime_get_ns(); + int pid = bpf_get_current_pid_tgid(); + + /*赋值*/ + struct rcv_events mq_rcv_info ={}; + mq_rcv_info.rcv_pid= pid; + mq_rcv_info.rcv_enter_time = rcv_enter_time; + mq_rcv_info.mqdes= mqdes; + mq_rcv_info.u_msg_ptr = u_msg_ptr; + bpf_map_update_elem(&rcv_msg1, &pid, &mq_rcv_info, BPF_ANY);//pid->u_msg_ptr + + return 0; +} + +SEC("kprobe/store_msg") +int BPF_KPROBE(store_msg,void __user *dest, struct msg_msg *msg, size_t len) +{ + struct mq_ctrl *mq_ctrl = get_mq_ctrl(); + int pid = bpf_get_current_pid_tgid(); + + /*make key*/ + u64 Key_msg_ptr = (u64)msg; + struct send_events *mq_send_info = bpf_map_lookup_elem(&send_msg2, &Key_msg_ptr); + if(!mq_send_info){ + return 0; + } + + struct rcv_events *mq_rcv_info = bpf_map_lookup_elem(&rcv_msg1, &pid); + if(!mq_rcv_info){ + return 0; + } + /*拿到mq_rcv_info*/ + if(dest == mq_rcv_info->u_msg_ptr && pid == mq_rcv_info->rcv_pid){ + mq_rcv_info->Key_msg_ptr = Key_msg_ptr; + mq_rcv_info->dest = dest; + mq_rcv_info->msg_prio = BPF_CORE_READ(msg,m_type); + mq_rcv_info->msg_len = BPF_CORE_READ(msg,m_ts); + }else{ + return 0; + } + return 0; +} + +SEC("kretprobe/do_mq_timedreceive") +int BPF_KRETPROBE(do_mq_timedreceive_exit,void *ret){ + struct mq_ctrl *mq_ctrl = get_mq_ctrl(); + u64 rcv_exit_time = bpf_ktime_get_ns(); + int pid = bpf_get_current_pid_tgid(); + u64 send_enter_time,delay; + u64 Key; + + /*获取发送端、接收端信息*/ + struct rcv_events *mq_rcv_info = bpf_map_lookup_elem(&rcv_msg1, &pid); + if(!mq_rcv_info){ + return 0; + } + Key = mq_rcv_info->Key_msg_ptr; + struct send_events *mq_send_info = bpf_map_lookup_elem(&send_msg2,&Key); + if(!mq_send_info){ + return 0; + } + + /*ringbuffer传值*/ + struct mq_events *e; + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) return 0; + e->send_pid = mq_send_info->send_pid; + e->rcv_pid = pid; + e->mqdes = mq_send_info->mqdes; + e->msg_len = mq_send_info->msg_len; + e->msg_prio = mq_send_info->msg_prio; + + e->send_enter_time = mq_send_info->send_enter_time; + e->send_exit_time = mq_send_info->send_exit_time; + e->rcv_enter_time = mq_rcv_info->rcv_enter_time; + e->rcv_exit_time = rcv_exit_time; + bpf_ringbuf_submit(e, 0); + bpf_map_delete_elem(&send_msg2, &Key);//暂时性删除 + bpf_map_delete_elem(&rcv_msg1,&pid);//删除rcv_msg1 map; + return 0; + +} diff --git a/MagicEyes/src/cpu_watcher/bpf/mutrace.bpf.c b/MagicEyes/src/cpu_watcher/bpf/mutrace.bpf.c new file mode 100644 index 000000000..79212119b --- /dev/null +++ b/MagicEyes/src/cpu_watcher/bpf/mutrace.bpf.c @@ -0,0 +1,272 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include +#include +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +const int ctrl_key = 0; +BPF_HASH(kmutex_info_map, u64, struct mutex_info, 1024); +BPF_HASH(umutex_info_map, u64, struct mutex_info, 1024); +BPF_HASH(trylock_map, u64, struct trylock_info, 1024); +BPF_ARRAY(mu_ctrl_map, int, struct mu_ctrl, 1); +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps"); + +static inline struct mu_ctrl *get_mu_ctrl(void) { + struct mu_ctrl *mu_ctrl; + mu_ctrl = bpf_map_lookup_elem(&mu_ctrl_map, &ctrl_key); + if (!mu_ctrl || !mu_ctrl->mu_func) { + return NULL; + } + return mu_ctrl; +} + +/*----------------------------------------------*/ +/* 内核态互斥锁 */ +/*----------------------------------------------*/ + +SEC("kprobe/mutex_lock") +int BPF_KPROBE(trace_mutex_lock, struct mutex *lock) { + u64 lock_addr = (u64)lock; // 获取锁地址 + u64 ts = bpf_ktime_get_ns(); + struct mutex_info *info = bpf_map_lookup_elem(&kmutex_info_map, &lock_addr); + if (info) { + info->acquire_time = ts; // 保存锁获取时间 + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .count = 0, + .last_owner = 0, + .acquire_time = ts, + .ptr = lock_addr + }; + __builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name)); + bpf_map_update_elem(&kmutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + return 0; +} + +SEC("kprobe/mutex_trylock") +int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) { + int ret = PT_REGS_RC(ctx); + if (ret != 0) { // 成功获取锁 + u64 lock_addr = (u64)lock; // 获取锁地址 + u64 ts = bpf_ktime_get_ns(); + struct mutex_info *info = bpf_map_lookup_elem(&kmutex_info_map, &lock_addr); + if (info) { + info->acquire_time = ts; + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .count = 0, + .last_owner = 0, + .acquire_time = ts, + .ptr = lock_addr + }; + __builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name)); + bpf_map_update_elem(&kmutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + } + return 0; +} + +SEC("kprobe/__mutex_lock_slowpath") +int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) { + struct mu_ctrl *mu_ctrl = get_mu_ctrl(); + struct mutex_contention_event *e; + struct task_struct *owner_task; + struct task_struct *contender_task; + pid_t pid = bpf_get_current_pid_tgid(); + long owner; + u64 lock_addr = (u64)lock; + u64 ts = bpf_ktime_get_ns(); + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) { + return 0; + } + e->contender_pid = pid; + e->ptr = lock_addr; + bpf_get_current_comm(&e->contender_name, sizeof(e->contender_name)); + bpf_probe_read_kernel(&owner, sizeof(owner), &lock->owner); + owner_task = (struct task_struct *)(owner & ~0x1L); + contender_task = (struct task_struct *)bpf_get_current_task(); + bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio); + if (owner_task) { + bpf_probe_read_kernel(&e->owner_pid, sizeof(e->owner_pid), &owner_task->pid); + bpf_probe_read_kernel_str(&e->owner_name, sizeof(e->owner_name), owner_task->comm); + bpf_probe_read_kernel(&e->owner_prio, sizeof(e->owner_prio), &owner_task->prio); + } else { + e->owner_pid = 0; + __builtin_memset(e->owner_name, 0, sizeof(e->owner_name)); + } + struct mutex_info *info = bpf_map_lookup_elem(&kmutex_info_map, &lock_addr); + if (info) { + u64 contention_start = ts; + info->contended_total += (contention_start - info->acquire_time); // 更新争用时间 + info->count++; // 更新争用次数 + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .count = 1, // 初始化争用次数 + .last_owner = 0, + .acquire_time = ts, // 初始化获取时间 + .ptr = lock_addr + }; + __builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name)); + bpf_map_update_elem(&kmutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + bpf_ringbuf_submit(e, 0); + return 0; +} + +SEC("kprobe/mutex_unlock") +int BPF_KPROBE(trace_mutex_unlock, struct mutex *lock) { + u64 lock_addr = (u64)lock; + u64 ts = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + struct mutex_info *info = bpf_map_lookup_elem(&kmutex_info_map, &lock_addr); + if (info) { + u64 held_time = ts - info->acquire_time; // 计算锁被持有的时间 + info->locked_total += held_time; // 更新锁被持有的总时间 + if (held_time > info->locked_max) { + info->locked_max = held_time; // 更新锁被持有的最长时间 + } + info->last_owner = pid; // 更新最后一次持有该锁的线程ID + bpf_get_current_comm(&info->last_name, sizeof(info->last_name)); // 更新最后一次持有该锁的线程名称 + } + return 0; +} + + + +/*----------------------------------------------*/ +/* 用户态互斥锁 */ +/*----------------------------------------------*/ + + + +SEC("uprobe/pthread_mutex_lock") +int BPF_KPROBE(pthread_mutex_lock, void *__mutex) { + u64 pid_tgid = bpf_get_current_pid_tgid(); + pid_t pid = pid_tgid >> 32; + u64 now = bpf_ktime_get_ns(); + + struct mutex_info *info = bpf_map_lookup_elem(&umutex_info_map, &__mutex); + if (info) { + if (info->acquire_time > 0) { + // 如果 acquire_time 已经被设置,说明锁被争用 + info->contended_total += (now - info->acquire_time); + info->count += 1; + } + info->acquire_time = now; + info->last_owner = pid; + bpf_get_current_comm(&info->last_name, sizeof(info->last_name)); + } else { + // 初始化 mutex_info + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .count = 0, + .last_owner = pid, + .acquire_time = now, + .ptr = (u64)__mutex, + }; + bpf_get_current_comm(&new_info.last_name, sizeof(new_info.last_name)); + bpf_map_update_elem(&umutex_info_map, &__mutex, &new_info, BPF_ANY); + } + return 0; +} + +SEC("uprobe/__pthread_mutex_trylock") +int BPF_KPROBE(__pthread_mutex_trylock, void *__mutex) { + u64 pid_tgid = bpf_get_current_pid_tgid(); + u64 now = bpf_ktime_get_ns(); + struct trylock_info info = { + .__mutex = __mutex, + .start_time = now, + }; + bpf_map_update_elem(&trylock_map, &pid_tgid, &info, BPF_ANY); + return 0; +} + +SEC("uretprobe/__pthread_mutex_trylock") +int BPF_KRETPROBE(ret_pthread_mutex_trylock, int ret) { + u64 pid_tgid = bpf_get_current_pid_tgid(); + struct trylock_info *try_info = bpf_map_lookup_elem(&trylock_map, &pid_tgid); + if (!try_info) { + return 0; + } + void *__mutex = try_info->__mutex; + u64 now = bpf_ktime_get_ns(); + if (ret == 0) { + struct mutex_info *info = bpf_map_lookup_elem(&umutex_info_map, &__mutex); + if (info) { + if (info->acquire_time > 0) { + // 如果 acquire_time 已经被设置,说明锁被争用 + info->contended_total += (now - info->acquire_time); + info->count += 1; + } + info->acquire_time = now; + info->last_owner = pid_tgid >> 32; + bpf_get_current_comm(&info->last_name, sizeof(info->last_name)); + } else { + // 初始化 mutex_info + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .count = 0, + .last_owner = pid_tgid >> 32, + .acquire_time = now, + .ptr = (u64)__mutex, + }; + bpf_get_current_comm(&new_info.last_name, sizeof(new_info.last_name)); + bpf_map_update_elem(&umutex_info_map, &__mutex, &new_info, BPF_ANY); + } + } + bpf_map_delete_elem(&trylock_map, &pid_tgid); + return 0; +} + +SEC("uprobe/pthread_mutex_unlock") +int BPF_KPROBE(pthread_mutex_unlock, void *__mutex){ + u64 now = bpf_ktime_get_ns(); + struct mutex_info *info = bpf_map_lookup_elem(&umutex_info_map, &__mutex); + if (info) { + u64 locked_time = now - info->acquire_time; + info->locked_total += locked_time; + if (locked_time > info->locked_max) { + info->locked_max = locked_time; + } + info->acquire_time = 0; + } + return 0; +} + diff --git a/MagicEyes/src/cpu_watcher/bpf/preempt.bpf.c b/MagicEyes/src/cpu_watcher/bpf/preempt.bpf.c new file mode 100644 index 000000000..bf650a6e0 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/bpf/preempt.bpf.c @@ -0,0 +1,85 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include +#include +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +#define TIF_NEED_RESCHED 3 +const int ctrl_key = 0; +// 记录时间戳 +BPF_HASH(preemptTime, pid_t, u64, 4096); +BPF_ARRAY(preempt_ctrl_map,int,struct preempt_ctrl,1); +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps"); + +static inline struct preempt_ctrl *get_preempt_ctrl(void) { + struct preempt_ctrl *preempt_ctrl; + preempt_ctrl = bpf_map_lookup_elem(&preempt_ctrl_map, &ctrl_key); + if (!preempt_ctrl || !preempt_ctrl->preempt_func) { + return NULL; + } + return preempt_ctrl; +} + +SEC("tp_btf/sched_switch") +int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev, struct task_struct *next) { + struct preempt_ctrl *preempt_ctrl = get_preempt_ctrl(); + u64 start_time = bpf_ktime_get_ns(); + pid_t prev_pid = BPF_CORE_READ(prev, pid); + + if (preempt) { + bpf_map_update_elem(&preemptTime, &prev_pid, &start_time, BPF_ANY); + } + + // 下面的代码被注释掉,因为我们使用`preempt`参数判断是否需要记录时间戳 + // if (prev->thread_info.flags & TIF_NEED_RESCHED) { + // bpf_map_update_elem(&preemptTime, &prev_pid, &start_time, BPF_ANY); + // } + + return 0; +} + +// SEC("kprobe/finish_task_switch") +SEC("kprobe/finish_task_switch.isra.0") +int BPF_KPROBE(finish_task_switch, struct task_struct *prev) { + struct preempt_ctrl *preempt_ctrl = get_preempt_ctrl(); + u64 end_time = bpf_ktime_get_ns(); + pid_t pid = BPF_CORE_READ(prev, pid); + u64 *val; + val = bpf_map_lookup_elem(&preemptTime, &pid); + if (val) { + u64 delta = end_time - *val; + struct preempt_event *e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) { + return 0; + } + e->prev_pid = pid; + e->next_pid = bpf_get_current_pid_tgid() >> 32; + e->duration = delta; + bpf_get_current_comm(&e->comm, sizeof(e->comm)); + bpf_ringbuf_submit(e, 0); + bpf_map_delete_elem(&preemptTime, &pid); + } + + return 0; +} diff --git a/MagicEyes/src/cpu_watcher/bpf/sar.bpf.c b/MagicEyes/src/cpu_watcher/bpf/sar.bpf.c new file mode 100644 index 000000000..00420b7d4 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/bpf/sar.bpf.c @@ -0,0 +1,274 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include +#include +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +const volatile long long unsigned int forks_addr = 0; +const int ctrl_key = 0; +#define PF_IDLE 0x00000002 /* I am an IDLE thread */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ + +// 计数表格,第0项为所统计fork数,第1项为进程切换数 +BPF_ARRAY(countMap,int,u64,3); +// 记录开始的时间 +BPF_ARRAY(procStartTime,pid_t,u64,1024); +//存储运行队列长度 +BPF_ARRAY(runqlen,u32,int,1); +//记录软中断开始时间 +BPF_HASH(softirqCpuEnterTime,u32,u64,1024); +//记录软中断结束时间 +BPF_HASH(softirqLastTime,u32,u64,1); +// 记录开始的时间 +BPF_HASH(irq_cpu_enter_start,u32,u64,1024); +//记录上次中断时间 +BPF_ARRAY(irq_Last_time,u32,u64,1); +// 储存cpu进入空闲的起始时间 +BPF_ARRAY(idleStart,u32,u64,128); +// 储存cpu进入空闲的持续时间 +BPF_ARRAY(idleLastTime,u32,u64,1); +// 储存cpu运行内核线程的时间 +BPF_ARRAY(kt_LastTime,u32,u64,1); +// 储存cpu运行用户线程的时间 +BPF_ARRAY(ut_LastTime,u32,u64,1); +BPF_ARRAY(tick_user,u32,u64,1); +BPF_ARRAY(symAddr,u32,u64,1); +BPF_ARRAY(sar_ctrl_map,int,struct sar_ctrl,1); + +static inline struct sar_ctrl *get_sar_ctrl(void) { + struct sar_ctrl *sar_ctrl; + sar_ctrl = bpf_map_lookup_elem(&sar_ctrl_map, &ctrl_key); + if (!sar_ctrl || !sar_ctrl->sar_func) { + return NULL; + } + return sar_ctrl; +} + +// 统计fork数 +SEC("kprobe/finish_task_switch.isra.0") +// SEC("kprobe/finish_task_switch") +int kprobe__finish_task_switch(struct pt_regs *ctx) +{ + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + u32 key = 0; + u64 val, *valp = NULL; + unsigned long total_forks; + + if(forks_addr !=0){ + valp = (u64 *)forks_addr; + bpf_probe_read_kernel(&total_forks, sizeof(unsigned long), valp); + key = 1; + val = total_forks; + bpf_map_update_elem(&countMap,&key,&val,BPF_ANY); + } + return 0; +} + +//获取进程切换数; +SEC("tracepoint/sched/sched_switch") +int trace_sched_switch2(struct cswch_args *info) { + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + pid_t prev = info->prev_pid, next = info->next_pid; + if (prev != next) { + u32 key = 0; + u64 *valp, delta, cur; + struct task_struct *ts; + pid_t pid = next; + u64 time = bpf_ktime_get_ns(); + bpf_map_update_elem(&procStartTime,&pid,&time,BPF_ANY); + valp = bpf_map_lookup_elem(&countMap,&key); + if (!valp) { + u64 initval = 1; + bpf_map_update_elem(&countMap,&key,&initval,BPF_ANY); + } + else *valp += 1; + } + return 0; +} + +// SEC("kprobe/finish_task_switch") +SEC("kprobe/finish_task_switch.isra.0") +int BPF_KPROBE(finish_task_switch,struct task_struct *prev){ + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + pid_t pid=BPF_CORE_READ(prev,pid); + u64 *val, time = bpf_ktime_get_ns(); + u64 delta; + // 记录内核进程(非IDLE)运行时间 + if ((BPF_CORE_READ(prev,flags) & PF_KTHREAD) && pid!= 0) { + val = bpf_map_lookup_elem(&procStartTime, &pid); + if (val) { + u32 key = 0; + delta = time - *val; + val = bpf_map_lookup_elem(&kt_LastTime, &key); + if (val) *val += delta; + else bpf_map_update_elem(&kt_LastTime, &key, &delta, BPF_ANY); + }// 记录用户进程的运行时间 + }else if (!(BPF_CORE_READ(prev,flags) & PF_KTHREAD) && !(BPF_CORE_READ(prev,flags) &PF_IDLE)) { + val = bpf_map_lookup_elem(&procStartTime, &pid); + if (val) { + u32 key = 0; + delta = (time - *val); + val = bpf_map_lookup_elem(&ut_LastTime, &key); + if (val) *val += delta; + else bpf_map_update_elem(&ut_LastTime, &key, &delta, BPF_ANY); + } + } + return 0; + +} + +//统计运行队列长度 +SEC("kprobe/update_rq_clock") +int BPF_KPROBE(update_rq_clock,struct rq *rq){ + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + u32 key = 0; + u64 val = BPF_CORE_READ(rq,nr_running); + bpf_map_update_elem(&runqlen,&key,&val,BPF_ANY); + return 0; +} + +//软中断 +SEC("tracepoint/irq/softirq_entry") +int trace_softirq_entry(struct __softirq_info *info) { + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + u32 key = info->vec; + u64 val = bpf_ktime_get_ns(); + bpf_map_update_elem(&softirqCpuEnterTime, &key, &val, BPF_ANY); + return 0; +} + +SEC("tracepoint/irq/softirq_exit") +int trace_softirq_exit(struct __softirq_info *info) { + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + u32 key = info->vec; + u64 now = bpf_ktime_get_ns(), *valp = 0; + valp =bpf_map_lookup_elem(&softirqCpuEnterTime, &key); + if (valp) { + // 找到表项 + u64 last_time = now - *valp; + u32 key0 = 0; + valp = bpf_map_lookup_elem(&softirqLastTime, &key0); + if (!valp) bpf_map_update_elem(&softirqLastTime, &key0, &last_time, BPF_ANY); + else *valp += last_time; + } + return 0; +} + +/*irqtime:CPU响应irq中断所占用的时间。 +注意这是所有CPU时间的叠加,平均到每个CPU应该除以CPU个数。*/ +SEC("tracepoint/irq/irq_handler_entry") +int trace_irq_handler_entry(struct __irq_info *info) { + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + u32 key = info->irq; + u64 ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&irq_cpu_enter_start, &key, &ts, BPF_ANY); + return 0; +} + +SEC("tracepoint/irq/irq_handler_exit") +int trace_irq_handler_exit(struct __irq_info *info) { + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + u32 key = info->irq; + u64 now = bpf_ktime_get_ns(), *ts = 0; + ts = bpf_map_lookup_elem(&irq_cpu_enter_start, &key); + if (ts) { + u64 last_time = now - *ts; + u32 key0 = 0; + ts = bpf_map_lookup_elem(&irq_Last_time, &key0); + if (!ts) + bpf_map_update_elem(&irq_Last_time, &key0, &last_time, BPF_ANY); + else + *ts += last_time; + } + return 0; +} + + +//tracepoint:power_cpu_idle 表征了CPU进入IDLE的状态,比较准确 +SEC("tracepoint/power/cpu_idle") +int trace_cpu_idle(struct idleStruct *pIDLE) { + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + u64 delta, time = bpf_ktime_get_ns(); + u32 key = pIDLE->cpu_id; + if (pIDLE->state == -1) { + u64 *valp = bpf_map_lookup_elem(&idleStart,&key); + if (valp && *valp != 0) { + delta = time - *valp; + key = 0; + valp = bpf_map_lookup_elem(&idleLastTime,&key); + if (valp) *valp += delta; + else bpf_map_update_elem(&idleLastTime,&key,&delta,BPF_ANY);//初次记录持续空闲时间; + } + } else { + u64 val = time; + bpf_map_update_elem(&idleStart,&key,&time,BPF_ANY); + } + return 0; +} + +static __always_inline int user_mode(struct pt_regs *regs) +{ + #ifdef CONFIG_X86_32 + return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; + #else + return !!(regs->cs & 3); + #endif +} +// 两个CPU各自会产生一个调用,这正好方便我们使用 +SEC("perf_event") +int tick_update(struct pt_regs *ctx) { + struct sar_ctrl *sar_ctrl = get_sar_ctrl(); + + // bpf_trace_printk("cs_rpl = %x\n", ctx->cs & 3); + u32 key = 0; + u64 val, *valp; + + // 记录用户态时间,直接从头文件arch/x86/include/asm/ptrace.h中引用 + if (user_mode(ctx)) { + u64 initval = 1; + valp = bpf_map_lookup_elem(&tick_user, &key); + if (valp) *valp += 1; + else bpf_map_update_elem(&tick_user, &key, &initval, BPF_ANY); + } + + unsigned long total_forks; + + // if(forks_addr !=0){ + // valp = (u64 *)forks_addr; + // bpf_probe_read_kernel(&total_forks, sizeof(unsigned long), valp); + // key = 1; + // val = total_forks; + // bpf_map_update_elem(&countMap,&key,&val,BPF_ANY); + // } + + valp = bpf_map_lookup_elem(&symAddr, &key); + if (valp) { + void *addr = (void *)(*valp); + if (addr > 0) { + bpf_probe_read_kernel(&total_forks, sizeof(unsigned long), addr); + key = 1; + val = total_forks; + bpf_map_update_elem(&countMap, &key, &val, BPF_ANY); + } + } + + return 0; +} diff --git a/MagicEyes/src/cpu_watcher/bpf/sc_delay.bpf.c b/MagicEyes/src/cpu_watcher/bpf/sc_delay.bpf.c new file mode 100644 index 000000000..ee224283a --- /dev/null +++ b/MagicEyes/src/cpu_watcher/bpf/sc_delay.bpf.c @@ -0,0 +1,89 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include "vmlinux.h" +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + + +const int ctrl_key = 0; +BPF_PERCPU_HASH(SyscallEnterTime,pid_t,u64,512); +BPF_PERCPU_HASH(Events,pid_t,u64,10); +BPF_ARRAY(sc_ctrl_map,int,struct sc_ctrl,1); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps");//环形缓冲区; + +static inline struct sc_ctrl *get_sc_ctrl(void) { + struct sc_ctrl *sc_ctrl; + sc_ctrl = bpf_map_lookup_elem(&sc_ctrl_map, &ctrl_key); + if (!sc_ctrl || !sc_ctrl->sc_func) { + return NULL; + } + return sc_ctrl; +} + + +SEC("tracepoint/raw_syscalls/sys_enter") +int tracepoint__syscalls__sys_enter(struct trace_event_raw_sys_enter *args){ + struct sc_ctrl *sc_ctrl = get_sc_ctrl(); + u64 start_time = bpf_ktime_get_ns()/1000; + pid_t pid = bpf_get_current_pid_tgid(); + u64 syscall_id = (u64)args->id; + + //bpf_printk("ID:%ld\n",syscall_id); + bpf_map_update_elem(&Events,&pid,&syscall_id,BPF_ANY); + bpf_map_update_elem(&SyscallEnterTime,&pid,&start_time,BPF_ANY); + return 0; +} + +SEC("tracepoint/raw_syscalls/sys_exit") +int tracepoint__syscalls__sys_exit(struct trace_event_raw_sys_exit *args){ + struct sc_ctrl *sc_ctrl = get_sc_ctrl(); + u64 exit_time = bpf_ktime_get_ns()/1000; + pid_t pid = bpf_get_current_pid_tgid() ; + u64 syscall_id; + u64 start_time, delay; + u64 *val = bpf_map_lookup_elem(&SyscallEnterTime, &pid); + if(val !=0){ + start_time = *val; + delay = exit_time - start_time; + bpf_map_delete_elem(&SyscallEnterTime, &pid); + }else{ + return 0; + } + u64 *val2 = bpf_map_lookup_elem(&Events, &pid); + if(val2 !=0){ + syscall_id = *val2; + bpf_map_delete_elem(&SyscallEnterTime, &pid); + }else{ + return 0; + } + struct syscall_events *e; + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) return 0; + e->pid = pid; + e->delay = delay; + bpf_get_current_comm(&e->comm, sizeof(e->comm)); + e->syscall_id = syscall_id; + bpf_ringbuf_submit(e, 0); + return 0; +} diff --git a/MagicEyes/src/cpu_watcher/bpf/schedule_delay.bpf.c b/MagicEyes/src/cpu_watcher/bpf/schedule_delay.bpf.c new file mode 100644 index 000000000..db1b2a363 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/bpf/schedule_delay.bpf.c @@ -0,0 +1,219 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include +#include +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; +#define TASK_RUNNING 0x0000 + +const int ctrl_key = 0; +BPF_HASH(has_scheduled,struct proc_id, bool, 10240);//记录该进程是否调度过 +BPF_HASH(enter_schedule,struct proc_id, struct schedule_event, 10240);//记录该进程上运行队列的时间 +BPF_ARRAY(sys_schedule,int,struct sum_schedule,1);//记录整个系统的调度延迟 +BPF_ARRAY(threshold_schedule,int,struct proc_schedule,10240);//记录每个进程的调度延迟 +BPF_HASH(proc_histories,struct proc_id, struct proc_history, 10240);//记录每个进程运行前的两个进程 +BPF_ARRAY(schedule_ctrl_map,int,struct schedule_ctrl,1); + +static inline struct schedule_ctrl *get_schedule_ctrl(void) { + struct schedule_ctrl *sched_ctrl; + sched_ctrl = bpf_map_lookup_elem(&schedule_ctrl_map, &ctrl_key); + if (!sched_ctrl || !sched_ctrl->schedule_func) { + return NULL; + } + return sched_ctrl; +}//查找控制结构体 + +SEC("tp_btf/sched_wakeup") +int BPF_PROG(sched_wakeup, struct task_struct *p) { + struct schedule_ctrl *sched_ctrl = get_schedule_ctrl(); + pid_t pid = p->pid; + int cpu = bpf_get_smp_processor_id(); + struct schedule_event *schedule_event; + struct proc_id id= {}; + u64 current_time = bpf_ktime_get_ns(); + id.pid = pid; + if (pid == 0) { + id.cpu_id = cpu; + } + schedule_event = bpf_map_lookup_elem(&enter_schedule, &id); + if (!schedule_event) { + struct schedule_event schedule_event1; + bool issched = false; + schedule_event1.pid = pid; + schedule_event1.count = 1; + schedule_event1.enter_time = current_time; + bpf_map_update_elem(&has_scheduled, &id, &issched, BPF_ANY); + bpf_map_update_elem(&enter_schedule, &id, &schedule_event1, BPF_ANY); + } else { + schedule_event->enter_time = current_time; + } + return 0; +} + +SEC("tp_btf/sched_wakeup_new") +int BPF_PROG(sched_wakeup_new, struct task_struct *p) { + struct schedule_ctrl *sched_ctrl = get_schedule_ctrl(); + sched_ctrl = bpf_map_lookup_elem(&schedule_ctrl_map,&ctrl_key); + if(!sched_ctrl || !sched_ctrl->schedule_func) + return 0; + pid_t pid = p->pid; + int cpu = bpf_get_smp_processor_id(); + struct proc_id id= {}; + u64 current_time = bpf_ktime_get_ns(); + id.pid = pid; + if (pid == 0) { + id.cpu_id = cpu; + } + struct schedule_event schedule_event; + bool issched = false; + schedule_event.pid = pid; + schedule_event.count = 1; + schedule_event.enter_time = current_time; + bpf_map_update_elem(&has_scheduled, &id, &issched, BPF_ANY); + bpf_map_update_elem(&enter_schedule, &id, &schedule_event, BPF_ANY); + return 0; +} + +SEC("tp_btf/sched_switch") +int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev, struct task_struct *next) { + struct schedule_ctrl *sched_ctrl = get_schedule_ctrl(); + struct proc_history *history; + struct proc_history new_history; + u64 current_time = bpf_ktime_get_ns(); + pid_t prev_pid = prev->pid; + unsigned int prev_state = prev->__state; + int prev_cpu = bpf_get_smp_processor_id(); + pid_t next_pid = next->pid; + int next_cpu = bpf_get_smp_processor_id(); + bool *issched; + struct schedule_event *schedule_event; + struct sum_schedule *sum_schedule; + int key = 0; + struct proc_id next_id = {}; + u64 delay; + if (prev_state == TASK_RUNNING) { + struct proc_id prev_pd = {}; + prev_pd.pid = prev_pid; + if (prev_pid == 0) { + prev_pd.cpu_id = prev_cpu; + } + schedule_event = bpf_map_lookup_elem(&enter_schedule, &prev_pd); + if (!schedule_event) { + struct schedule_event schedule_event2; + bool issched = false; + schedule_event2.pid = prev_pid; + schedule_event2.count = 1; + schedule_event2.enter_time = current_time; + bpf_map_update_elem(&has_scheduled, &prev_pd, &issched, BPF_ANY); + bpf_map_update_elem(&enter_schedule, &prev_pd, &schedule_event2, BPF_ANY); + } else { + schedule_event->enter_time = current_time; + } + } + + next_id.pid = next_pid; + if (next_pid == 0) { + next_id.cpu_id = next_cpu; + } + schedule_event = bpf_map_lookup_elem(&enter_schedule, &next_id); + if (!schedule_event) return 0; + issched = bpf_map_lookup_elem(&has_scheduled, &next_id); + if (!issched) return 0; + if (*issched) { + schedule_event->count++; + } else { + *issched = true; + } + delay = current_time - schedule_event->enter_time; + struct proc_schedule proc_schedule; + proc_schedule.delay = delay; + proc_schedule.id= next_id; + bpf_probe_read_kernel_str(&proc_schedule.proc_name, sizeof(proc_schedule.proc_name), next->comm); + bpf_map_update_elem(&threshold_schedule, &key, &proc_schedule, BPF_ANY); + sum_schedule = bpf_map_lookup_elem(&sys_schedule, &key); + if (!sum_schedule) { + struct sum_schedule sum_schedule = {}; + sum_schedule.sum_count++; + sum_schedule.sum_delay += delay; + if (delay > sum_schedule.max_delay) { + sum_schedule.max_delay = delay; + if (next->pid != 0) { + bpf_probe_read_kernel_str(&sum_schedule.proc_name_max, sizeof(sum_schedule.proc_name_max), next->comm); + } + } else if (sum_schedule.min_delay == 0 || delay < sum_schedule.min_delay) { + sum_schedule.min_delay = delay; + if (next->pid != 0) { + bpf_probe_read_kernel_str(&sum_schedule.proc_name_min, sizeof(sum_schedule.proc_name_min), next->comm); + } + } + bpf_map_update_elem(&sys_schedule, &key, &sum_schedule, BPF_ANY); + } else { + sum_schedule->sum_count++; + sum_schedule->sum_delay += delay; + if (delay > sum_schedule->max_delay) { + sum_schedule->max_delay = delay; + bpf_probe_read_kernel_str(&sum_schedule->proc_name_max, sizeof(sum_schedule->proc_name_max), next->comm); + } else if (sum_schedule->min_delay == 0 || delay < sum_schedule->min_delay) { + sum_schedule->min_delay = delay; + if (next->pid != 0) { + bpf_probe_read_kernel_str(&sum_schedule->proc_name_min, sizeof(sum_schedule->proc_name_min), next->comm); + } + } + } + history = bpf_map_lookup_elem(&proc_histories, &next_id); + if (history) { + // 如果找到了,更新历史记录 + new_history.last[0] = history->last[1]; + new_history.last[1].pid = prev->pid; + bpf_probe_read_kernel_str(&new_history.last[1].comm, sizeof(new_history.last[1].comm), prev->comm); + bpf_map_update_elem(&proc_histories, &next_id, &new_history, BPF_ANY); + } else { + // 如果没有找到,初始化新的历史记录 + new_history.last[0].pid = 0; // 初始化为0,表示没有历史信息 + new_history.last[0].comm[0] = '\0'; + new_history.last[1].pid = prev->pid; + bpf_probe_read_kernel_str(&new_history.last[1].comm, sizeof(new_history.last[1].comm), prev->comm); + bpf_map_update_elem(&proc_histories, &next_id, &new_history, BPF_ANY); + } + return 0; +} + +SEC("tracepoint/sched/sched_process_exit") +int sched_process_exit(void *ctx) { + struct schedule_ctrl *sched_ctrl = get_schedule_ctrl(); + struct task_struct *p = (struct task_struct *)bpf_get_current_task(); + pid_t pid = BPF_CORE_READ(p, pid); + int cpu = bpf_get_smp_processor_id(); + struct proc_id id= {}; + struct schedule_event *schedule_event; + bool *issched; + int key = 0; + id.pid = pid; + if (pid == 0) id.cpu_id = cpu; + schedule_event = bpf_map_lookup_elem(&enter_schedule, &id); + if (schedule_event) { + bpf_map_delete_elem(&enter_schedule, &id); + } + issched = bpf_map_lookup_elem(&has_scheduled, &id); + if (issched) { + bpf_map_delete_elem(&has_scheduled, &id); + } + return 0; +} \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/controller.c b/MagicEyes/src/cpu_watcher/controller.c new file mode 100644 index 000000000..2c319dfe0 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/controller.c @@ -0,0 +1,281 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com +// +// used to control the execution of proc_image tool +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cpu_watcher_helper.h" + +static struct env { + // 1代表activate;2代表unactivate;3代表finish + int usemode; + bool SAR; + bool percent; + bool CS_DELAY; + bool SYSCALL_DELAY; + bool MIN_US_SET; + int MIN_US; + bool PREEMPT; + bool SCHEDULE_DELAY; + bool MQ_DELAY; + int freq; + bool mutrace; + bool mutex_detail; + bool umutex; +} env = { + .usemode = 0, + .SAR = false, + .percent = false, + .CS_DELAY = false, + .SYSCALL_DELAY = false, + .MIN_US_SET = false, + .MIN_US = 10000, + .PREEMPT = false, + .SCHEDULE_DELAY = false, + .MQ_DELAY = false, + .freq = 99, + .mutrace = false, + .mutex_detail = false, + .umutex = false, +}; + +const char argp_program_doc[] ="Trace process to get cpu watcher.\n"; + +static const struct argp_option opts[] = { + { "activate", 'a', NULL, 0, "Set startup policy of proc_image tool" }, + { "unactivate", 'u', NULL, 0, "Initialize to the original unactivated state" }, + { "finish", 'f', NULL, 0, "Finish to run eBPF tool" }, + {"libbpf_sar", 's', 0, 0, "Print sar_info (the data of cpu)" }, + {"percent", 'P', 0, 0, "Format data as percentages" }, + {"cs_delay", 'c', 0, 0, "Print cs_delay (the data of cpu)" }, + {"syscall_delay", 'S', 0, 0, "Print syscall_delay (the data of syscall)" }, + {"preempt_time", 'p', 0, 0, "Print preempt_time (the data of preempt_schedule)" }, + {"schedule_delay", 'd', 0, 0, "Print schedule_delay (the data of cpu)" }, + {"schedule_delay_min_us_set", 'e', "THRESHOLD", 0, "Print scheduling delays that exceed the threshold (the data of cpu)" }, + {"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" }, + {"mutrace", 'x', 0, 0, "Print kernel mutex contend" }, + {"mutex_detail", 'i', 0, 0, "Print kernel mutex details" }, + {"umutex", 'b', 0, 0, "Print user mutex details" }, + { NULL, 'h', NULL, OPTION_HIDDEN, "show the full help" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case 'a': + env.usemode = 1; + break; + case 'u': + env.usemode = 2; + break; + case 'f': + env.usemode = 3; + break; + case 's': + env.SAR = true; + break; + case 'P': + env.percent = true; + case 'c': + env.CS_DELAY = true; + break; + case 'S': + env.SYSCALL_DELAY = true; + break; + case 'p': + env.PREEMPT = true; + break; + case 'd': + env.SCHEDULE_DELAY = true; + break; + case 'e': + env.MIN_US_SET = true; + if (arg) { + env.MIN_US = strtol(arg, NULL, 10); + if (env.MIN_US <= 0) { + fprintf(stderr, "Invalid value for min_us: %d\n", env.MIN_US); + argp_usage(state); + } + } else { + env.MIN_US = 10000; + } + break; + case 'm': + env.MQ_DELAY = true; + break; + case 'x': + env.mutrace = true; + break; + case 'i': + env.mutex_detail = true; + break; + case 'b': + env.umutex = true; + break; + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +int deactivate_mode(){ + int err; + + if(env.SAR){ + struct sar_ctrl sar_ctrl = {false,false,0}; + err = update_sar_ctrl_map(sar_ctrl); + if(err < 0) return err; + } + if(env.CS_DELAY){ + struct cs_ctrl cs_ctrl = {false,0}; + err = update_cs_ctrl_map(cs_ctrl); + if(err < 0) return err; + } + if(env.SYSCALL_DELAY){ + struct sc_ctrl sc_ctrl = {false,0}; + err = update_sc_ctrl_map(sc_ctrl); + if(err < 0) return err; + } + if(env.PREEMPT){ + struct preempt_ctrl preempt_ctrl = {false,0}; + err = update_preempt_ctrl_map(preempt_ctrl); + if(err < 0) return err; + } + if(env.SCHEDULE_DELAY){ + struct schedule_ctrl schedule_ctrl = {false,false,10000,0}; + err = update_schedule_ctrl_map(schedule_ctrl); + if(err < 0) return err; + } + if(env.MQ_DELAY){ + struct mq_ctrl mq_ctrl = {false,0}; + err = update_mq_ctrl_map(mq_ctrl); + if(err < 0) return err; + } + if(env.mutrace){ + struct mu_ctrl mu_ctrl = {false,false,0}; + err = update_mu_ctrl_map(mu_ctrl); + if(err < 0) return err; + } + return 0; +} + +static void sig_handler(int signo) +{ + deactivate_mode(); +} + +int main(int argc, char **argv) +{ + int err; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + signal(SIGALRM,sig_handler); + signal(SIGINT,sig_handler); + signal(SIGTERM,sig_handler); + + if(env.usemode == 1){ // activate mode + if(env.SAR){ + struct sar_ctrl sar_ctrl = {true,env.percent,SAR_WACTHER+env.percent}; + err = update_sar_ctrl_map(sar_ctrl); + if(err < 0) return err; + } + + if(env.CS_DELAY){ + struct cs_ctrl cs_ctrl = {true,CS_WACTHER}; + err = update_cs_ctrl_map(cs_ctrl); + if(err < 0) return err; + } + + if(env.SYSCALL_DELAY){ + struct sc_ctrl sc_ctrl = {true,SC_WACTHER}; + err = update_sc_ctrl_map(sc_ctrl); + if(err < 0) return err; + } + + if(env.PREEMPT){ + struct preempt_ctrl preempt_ctrl = {true,PREEMPT_WACTHER}; + err = update_preempt_ctrl_map(preempt_ctrl); + if(err < 0) return err; + } + + if(env.SCHEDULE_DELAY){ + /* + *1.未设置env.MIN_US_SET时, prev_watcher = SCHEDULE_WACTHER + 0;输出方式为schedule输出 + *2.已设置env.MIN_US_SET时, prev_watcher = SCHEDULE_WACTHER + 1;输出方式为-e输出 + */ + struct schedule_ctrl schedule_ctrl = {true,env.MIN_US_SET,env.MIN_US,SCHEDULE_WACTHER+env.MIN_US_SET}; + err = update_schedule_ctrl_map(schedule_ctrl); + if(err < 0) return err; + } + + if(env.MQ_DELAY){ + struct mq_ctrl mq_ctrl = {true,MQ_WACTHER}; + err = update_mq_ctrl_map(mq_ctrl); + if(err < 0) return err; + } + + if(env.mutrace){ + if (env.umutex){ + struct mu_ctrl mu_ctrl = {true,env.mutex_detail,env.umutex,MUTEX_WATCHER+2}; + err = update_mu_ctrl_map(mu_ctrl); + if(err < 0) return err; + } + else{ + struct mu_ctrl mu_ctrl = {true,env.mutex_detail,env.umutex,MUTEX_WATCHER+env.mutex_detail}; + err = update_mu_ctrl_map(mu_ctrl); + if(err < 0) return err; + } + } + }else if(env.usemode == 2){ // deactivate mode + err = deactivate_mode(); + if(err<0){ + fprintf(stderr, "Failed to deactivate\n"); + return err; + } + }else if(env.usemode == 3){ // finish mode + const char *command = "pkill cpu_watcher"; + int status = system(command); + if (status == -1) { + perror("system"); + } + }else{ + // 输出help信息 + printf("Please enter the usage mode(activate/deactivate/finish) before selecting the function\n"); + argp_help(&argp, stderr, ARGP_HELP_LONG, argv[0]); + } + + return 0; +} diff --git a/MagicEyes/src/cpu_watcher/cpu_watcher.c b/MagicEyes/src/cpu_watcher/cpu_watcher.c new file mode 100644 index 000000000..e50e23ab2 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/cpu_watcher.c @@ -0,0 +1,1236 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cpu_watcher_helper.h" +#include "sar.skel.h" +#include "cs_delay.skel.h" +#include "sc_delay.skel.h" +#include "preempt.skel.h" +#include "schedule_delay.skel.h" +#include "mq_delay.skel.h" +#include "mutrace.skel.h" + +typedef long long unsigned int u64; +typedef unsigned int u32; + + + +struct list_head { + struct list_head *next; + struct list_head *prev; +}; +struct msg_msg { + struct list_head m_list; + long int m_type; + size_t m_ts; + struct msg_msgseg *next; + void *security; +}; + +static struct env { + int time; + int period; + bool enable_proc; + bool SAR; + bool CS_DELAY; + bool SYSCALL_DELAY; + bool PREEMPT; + bool SCHEDULE_DELAY; + bool MQ_DELAY; + int freq; + bool EWMA; + int cycle; + int MUTRACE; +} env = { + .time = 0, + .period = 1, + .enable_proc = false, + .SAR = false, + .CS_DELAY = false, + .SYSCALL_DELAY = false, + .PREEMPT = false, + .SCHEDULE_DELAY = false, + .MQ_DELAY = false, + .freq = 99, + .EWMA = false, + .cycle = 0, + .MUTRACE = false, +}; + + + +struct cs_delay_bpf *cs_skel; +struct sar_bpf *sar_skel; +struct sc_delay_bpf *sc_skel; +struct preempt_bpf *preempt_skel; +struct schedule_delay_bpf *sd_skel; +struct mq_delay_bpf *mq_skel; +struct mutrace_bpf *mu_skel; + +static int csmap_fd; +static int sarmap_fd; +struct sar_ctrl sar_ctrl= {}; +static int scmap_fd; +static int preemptmap_fd; +static int schedulemap_fd; +struct schedule_ctrl sd_ctrl = {}; +static int mqmap_fd; +static int mumap_fd; +struct mu_ctrl mu_ctrl = {}; + +//static int prev_watcher = 0;//上一个使用的工具,用于在切换使用功能时,打印不用功能的表头; + +u64 softirq = 0; +u64 irqtime = 0; +u64 idle = 0; +u64 sched = 0; +u64 proc = 0; +unsigned long ktTime = 0; +unsigned long utTime = 0; +u64 tick_user = 0; + + +int sc_sum_time = 0 ; +int sc_max_time = 0 ; +int sc_min_time = SYSCALL_MIN_TIME ; +int sys_call_count = 0; +bool ifprint = 0; + + +int preempt_count = 0 ; +int sum_preemptTime = 0 ; +int preempt_start_print = 0 ; + +/*设置传参*/ +const char argp_program_doc[] = "cpu watcher is in use ....\n"; +static const struct argp_option opts[] = { + { "time", 't', "TIME-SEC", 0, "Max Running Time(0 for infinite)" }, + { "period", 'i', "INTERVAL", 0, "Period interval in seconds" }, + {"libbpf_sar", 's', 0, 0, "Print sar_info (the data of cpu)" }, + {"cs_delay", 'c', 0, 0, "Print cs_delay (the data of cpu)" }, + {"syscall_delay", 'S', 0, 0, "Print syscall_delay (the data of syscall)" }, + {"preempt_time", 'p', 0, 0, "Print preempt_time (the data of preempt_schedule)" }, + {"schedule_delay", 'd', 0, 0, "Print schedule_delay (the data of cpu)" }, + {"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" }, + {"mutrace", 'x', 0, 0, "Print mutrace data(the data of cpu)" }, + {"ewma", 'E',0,0,"dynamic filte the data"}, + {"cycle", 'T',"CYCLE",0,"Periods of the ewma"}, + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + { 0 }, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case 't': + env.time = strtol(arg, NULL, 10); + if (env.time) alarm(env.time); + break; + case 'i': + env.period = strtol(arg, NULL, 10); + break; + case 's': + env.SAR = true; + break; + case 'c': + env.CS_DELAY = true; + break; + case 'S': + env.SYSCALL_DELAY = true; + break; + case 'p': + env.PREEMPT = true; + break; + case 'd': + env.SCHEDULE_DELAY = true; + break; + case 'm': + env.MQ_DELAY = true; + break; + case 'x': + env.MUTRACE = true; + break; + case 'E': + env.EWMA = true; + break; + case 'T': + env.cycle = strtol(arg, NULL, 10); + break; + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, +}; + + + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + +static volatile bool exiting=false; +bool syscall_start_print = false; + +static void sig_handler(int sig) +{ + exiting = true; +} + +/*perf_event*/ +static int nr_cpus; +static int open_and_attach_perf_event(int freq, struct bpf_program *prog, + struct bpf_link *links[]) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .freq = 99, + .sample_period = freq, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + int i, fd; + for (i = 0; i < nr_cpus; i++) { + fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0); + if (fd < 0) { + /* Ignore CPU that is offline */ + if (errno == ENODEV) + continue; + fprintf(stderr, "failed to init perf sampling: %s\n", + strerror(errno)); + return -1; + } + links[i] = bpf_program__attach_perf_event(prog, fd); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "failed to attach perf event on cpu: " + "%d\n", i); + links[i] = NULL; + close(fd); + return -1; + } + } + return 0; +} + + +u64 find_ksym(const char* target_symbol) { + FILE *file = fopen("/proc/kallsyms", "r"); + if (file == NULL) { + perror("Failed to open /proc/kallsyms"); + return 1; + } + char symbol_name[99]; + u64 symbol_address = 0; + while (fscanf(file, "%llx %*c %s\n", &symbol_address, symbol_name) != EOF) { + if (strcmp(symbol_name, target_symbol) == 0) { + break; + } + } + fclose(file); + return symbol_address; +} + +static int print_all() +{ + int err,key=0; + err = bpf_map_lookup_elem(sarmap_fd, &key, &sar_ctrl); + if (err < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err); + return -1; + } + if(!sar_ctrl.sar_func) return 0; + if(sar_ctrl.prev_watcher == SAR_WACTHER + 1) { + printf(" time proc/s cswch/s runqlen irqTime/%% softirq/%% idle/%% kthread/%% sysc/%% utime/%% sys/%% \n"); + sar_ctrl.prev_watcher = SAR_WACTHER + 2; + err = bpf_map_update_elem(sarmap_fd, &key, &sar_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + } + }else if (sar_ctrl.prev_watcher == SAR_WACTHER){ + printf(" time proc/s cswch/s runqlen irqTime/us softirq/us idle/ms kthread/us sysc/ms utime/ms sys/ms \n"); + sar_ctrl.prev_watcher = SAR_WACTHER + 2; + err = bpf_map_update_elem(sarmap_fd, &key, &sar_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + } + } + int nprocs = get_nprocs(); + /*proc:*/ + int key_proc = 1; + int err_proc, fd_proc = bpf_map__fd(sar_skel->maps.countMap); + u64 total_forks; + err_proc = bpf_map_lookup_elem(fd_proc, &key_proc, &total_forks); + if (err_proc < 0) { + fprintf(stderr, "failed to lookup infos of total_forks: %d\n", err_proc); + return -1; + } + u64 __proc; + __proc = total_forks - proc; + proc = total_forks; + + /*cswch:*/ + int key_cswch = 0; + int err_cswch, fd_cswch = bpf_map__fd(sar_skel->maps.countMap); + u64 sched_total; + err_cswch = bpf_map_lookup_elem(fd_cswch, &key_cswch, &sched_total); + if (err_cswch < 0) { + fprintf(stderr, "failed to lookup infos of sched_total: %d\n", err_cswch); + return -1; + } + u64 __sched; + __sched = sched_total - sched; + sched = sched_total; + + /*runqlen:*/ + int key_runqlen = 0; + int err_runqlen, fd_runqlen = bpf_map__fd(sar_skel->maps.runqlen); + int runqlen; + err_runqlen = bpf_map_lookup_elem(fd_runqlen, &key_runqlen, &runqlen); + if (err_runqlen < 0) { + fprintf(stderr, "failed to lookup infos of runqlen: %d\n", err_runqlen); + return -1; + } + + /*irqtime:*/ + int key_irqtime = 0; + int err_irqtime, fd_irqtime = bpf_map__fd(sar_skel->maps.irq_Last_time); + u64 __irqtime; + __irqtime = irqtime; + err_irqtime = bpf_map_lookup_elem(fd_irqtime, &key_irqtime, &irqtime); + if (err_irqtime < 0) { + fprintf(stderr, "failed to lookup infos of irqtime: %d\n", err_irqtime); + return -1; + } + u64 dtairqtime = (irqtime - __irqtime); + + /*softirq:*/ + int key_softirq = 0; + int err_softirq, fd_softirq = bpf_map__fd(sar_skel->maps.softirqLastTime); + u64 __softirq; + __softirq = softirq; + err_softirq = bpf_map_lookup_elem(fd_softirq, &key_softirq, &softirq); + if (err_softirq < 0) { + fprintf(stderr, "failed to lookup infos of softirq: %d\n", err_softirq); + return -1; + } + u64 dtasoftirq = (softirq - __softirq); + + /*idle*/ + int key_idle = 0; + int err_idle, fd_idle = bpf_map__fd(sar_skel->maps.idleLastTime); + u64 __idle; + __idle = idle; + err_idle = bpf_map_lookup_elem(fd_idle, &key_idle, &idle); + if (err_idle < 0) { + fprintf(stderr, "failed to lookup infos of idle: %d\n", err_idle); + return -1; + } + u64 dtaidle = (idle - __idle); + + /*kthread*/ + int key_kthread = 0; + int err_kthread, fd_kthread = bpf_map__fd(sar_skel->maps.kt_LastTime); + unsigned long _ktTime=0; + _ktTime = ktTime; + err_kthread = bpf_map_lookup_elem(fd_kthread, &key_kthread,&ktTime); + if (err_kthread < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err_kthread); + return -1; + } + unsigned long dtaKT = ktTime -_ktTime; + + /*Uthread*/ + int key_uthread = 0; + int err_uthread, fd_uthread = bpf_map__fd(sar_skel->maps.ut_LastTime); + unsigned long _utTime=0; + _utTime = utTime; + err_uthread = bpf_map_lookup_elem(fd_uthread, &key_uthread,&utTime); + if (err_uthread < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err_uthread); + return -1; + } + unsigned long dtaUT = utTime -_utTime; + + /*sys*/ + int key_sys = 0; + int err_sys, fd_sys = bpf_map__fd(sar_skel->maps.tick_user); + u64 __tick_user =0 ;// 用于存储从映射中查找到的值 + __tick_user = tick_user; + //tick_user = 0; + err_sys = bpf_map_lookup_elem(fd_sys, &key_sys, &tick_user); + if (err_sys < 0) { + fprintf(stderr, "failed to lookup infos of sys: %d\n", err_sys); + return -1; + } + u64 dtaTickUser = tick_user - __tick_user; + u64 dtaUTRaw = dtaTickUser/(99.0000) * 1000000000; + u64 dtaSysc = abs(dtaUT - dtaUTRaw); + u64 dtaSys = dtaKT + dtaSysc ; + + if(env.enable_proc){ + time_t now = time(NULL); + struct tm *localTime = localtime(&now); + if (sar_ctrl.percent == true){ + printf("%02d:%02d:%02d %8llu %8llu %6d ",localTime->tm_hour, localTime->tm_min, localTime->tm_sec,__proc, __sched, runqlen); + // 大于百分之60的标红输出 + double values[7] = { + (double)dtairqtime / 10000000 / nprocs / env.period, + (double)dtasoftirq / 10000000 / nprocs / env.period, + (double)dtaidle / 10000000 / nprocs / env.period, + (double)dtaKT / 10000000 / nprocs / env.period, + (double)dtaSysc / 10000000 / nprocs / env.period, + (double)dtaUTRaw / 10000000 / nprocs / env.period, + (double)dtaSys / 10000000 / nprocs / env.period + }; + for (int i = 0; i < 7; i++) { + if (values[i] > 60.0) { + printf("\033[1;31m"); // 设置为红色 + } + printf("%10.2f ", values[i]); + printf("\033[0m"); // 重置为默认颜色 + } + printf("\n"); + }else{printf("%02d:%02d:%02d %8llu %8llu %6d %8llu %10llu %8llu %10lu %8llu %8llu %8llu\n", + localTime->tm_hour, localTime->tm_min, localTime->tm_sec, + __proc,__sched,runqlen,dtairqtime/1000,dtasoftirq/1000,dtaidle/1000000, + dtaKT/1000,dtaSysc / 1000000,dtaUTRaw/1000000,dtaSys / 1000000);} + } + else{ + env.enable_proc = true; + } + return 0; +} + +int count[25]={0};//定义一个count数组,用于汇总schedul()调度时间,以log2(时间间隔)为统计依据; +static int handle_event(void *ctx, void *data,unsigned long data_sz) +{ + const struct event *e = data; + printf("t1:%llu t2:%llu delay:%llu\n",e->t1,e->t2,e->delay); + int dly=(int)(e->delay),i=0; + while (dly > 1){ + dly /= 2; + i ++; + } + count[i]++; + return 0; +} +static int print_hstgram(int i,int max,int per_len) +{ + int cnt=count[i]; + if(per_len==1){ + while(cnt>0){ + printf("*"); + cnt--; + } + } + while(cnt-per_len>=0){ + printf("*"); + cnt-=per_len; + } + printf("\n"); + return per_len; +} +double my_pow(int n,int k)//实现pow函数 +{ + if (k > 0) + return n * my_pow(n, k - 1); + else if (k == 0) + return 1; + else + return 1.0 / my_pow(n, -k); +} +static void histogram() +{ + int log10[15]={0},max=0,per_len=1; + for(int i=0;i<10;i++){ + int tmp=count[i],cnt=0; + while (tmp >= 10){ + tmp /= 10; + cnt ++; + } + log10[cnt]++; + } + + for(int i=0;i<10;i++){//找log10里的最大值; + if(max0){ + per_len *=10 ; + max--; + } + + time_t now = time(NULL); + struct tm *localTime = localtime(&now); + printf("\nTime : %02d:%02d:%02d \n",localTime->tm_hour, localTime->tm_min, localTime->tm_sec); + printf("%-24s \t%-12s \t%-12s \n","cs_delay","Count","Distribution"); + printf("%d\t=>\t%-8d \t%-12d \t|",0,1,count[0]); + print_hstgram(0,max,per_len); + printf("%d\t=>\t%-8d \t%-12d \t|",2,3,count[1]); + print_hstgram(1,max,per_len); + for(int i=2;i<20;i++){ + printf("%d\t=>\t%-8d \t%-12d \t|",(int)my_pow(2,i),(int)my_pow(2,(i+1))-1,count[i]); + print_hstgram(i,max,per_len); + } + printf("per_len = %d\n",per_len); +} + + +struct ewma_info ewma_syscall_delay = {}; +static int syscall_delay_print(void *ctx, void *data,unsigned long data_sz) +{ + int err,key = 0; + struct sc_ctrl sc_ctrl ={}; + + err = bpf_map_lookup_elem(scmap_fd,&key,&sc_ctrl); + if (err < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err); + return -1; + } + if(!sc_ctrl.sc_func) return 0; + + const struct syscall_events *e = data; + if(e->delay<0||e->delay>1000000) return 0; + time_t now = time(NULL);// 获取当前时间 + struct tm *localTime = localtime(&now);// 将时间转换为本地时间结构 + + if(env.EWMA==0){ + printf("%02d:%02d:%02d %-8u %-15lld %-15lld\n", + localTime->tm_hour, localTime->tm_min, localTime->tm_sec, + e->pid,e->syscall_id,e->delay); + } + else{ + ewma_syscall_delay.cycle = env.cycle; + if(dynamic_filter(&ewma_syscall_delay,e->delay)){ + printf("%02d:%02d:%02d %-8u %-15lld %-15lld\n", + localTime->tm_hour, localTime->tm_min, localTime->tm_sec, + e->pid,e->syscall_id,e->delay); + } + } + + return 0; +} + + +//抢占时间输出 +static int preempt_print(void *ctx, void *data, unsigned long data_sz) +{ + const struct preempt_event *e = data; + printf("%-16s %-7d %-7d %-11llu\n", e->comm, e->prev_pid, e->next_pid, e->duration); + preempt_count++; + sum_preemptTime += e->duration; + return 0; +} + +static int attach(struct mutrace_bpf *mu_skel) +{ + int err; + ATTACH_UPROBE_CHECKED(mu_skel,pthread_mutex_lock,pthread_mutex_lock); + ATTACH_UPROBE_CHECKED(mu_skel,__pthread_mutex_trylock,__pthread_mutex_trylock); + ATTACH_URETPROBE_CHECKED(mu_skel,__pthread_mutex_trylock,ret_pthread_mutex_trylock); + ATTACH_UPROBE_CHECKED(mu_skel,pthread_mutex_unlock,pthread_mutex_unlock); + err = mutrace_bpf__attach(mu_skel); + CHECK_ERR(err, "Failed to attach BPF skeleton"); + return 0; + +} + + +//mutrace输出 +static int mutrace_print(void *ctx, void *data, unsigned long data_sz) { + int err,key = 0; + err = bpf_map_lookup_elem(mumap_fd,&key,&mu_ctrl); + if (err < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err); + return -1; + } + if(!mu_ctrl.mu_func) return 0; + if(mu_ctrl.prev_watcher == MUTEX_WATCHER ){ + printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count"); + mu_ctrl.prev_watcher = MUTEX_WATCHER + 9;//打印表头功能关 + err = bpf_map_update_elem(mumap_fd, &key, &mu_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + } + }else if (mu_ctrl.prev_watcher == MUTEX_WATCHER +1) { + printf("%s\n"," lock_ptr locked_total locked_max contended_total count last_owner last_owmer_name"); + mu_ctrl.prev_watcher = MUTEX_WATCHER + 9;//打印表头功能关 + err = bpf_map_update_elem(mumap_fd, &key, &mu_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + } + }else if (mu_ctrl.prev_watcher == MUTEX_WATCHER +2) { + printf("%s\n"," lock_ptr locked_total locked_max contended_total count last_owner last_owmer_name"); + mu_ctrl.prev_watcher = MUTEX_WATCHER + 9;//打印表头功能关 + err = bpf_map_update_elem(mumap_fd, &key, &mu_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + } + } + if(!mu_ctrl.mutex_detail&& (!mu_ctrl.umutex)){ + const struct mutex_contention_event *e = data; + if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) { + return 0; + } + // 增加锁争用次数 + increment_lock_count(e->ptr); + uint64_t contention_count = get_lock_count(e->ptr); + printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count); + } + return 0; +} + +static int kmutex_detail() { + int fd = bpf_map__fd(mu_skel->maps.kmutex_info_map); + u64 key, next_key; + struct mutex_info info; + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + int err = bpf_map_lookup_elem(fd, &next_key, &info); + if (err == 0 && info.contended_total != 0) { // 添加过滤条件 + printf(" %15llu %15lluns %15lluns %15lluns %15d %15d %20s\n", + next_key, info.locked_total, info.locked_max, info.contended_total, info.count, info.last_owner, info.last_name); + } + key = next_key; + } + return 0; +} + +static int umutex_detail() { + int fd = bpf_map__fd(mu_skel->maps.umutex_info_map); + u64 key, next_key; + struct mutex_info info; + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + int err = bpf_map_lookup_elem(fd, &next_key, &info); + if (err == 0 && info.contended_total != 0) { // 添加过滤条件 + printf(" %15llu %15llums %15llums %15llums %15d %15d %20s\n", + next_key, info.locked_total/1000000, info.locked_max/1000000, info.contended_total/1000000, info.count, info.last_owner, info.last_name); + } + key = next_key; + } + return 0; +} + +static int schedule_print() +{ + int err,key = 0; + err = bpf_map_lookup_elem(schedulemap_fd,&key,&sd_ctrl); + if (err < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err); + return -1; + } + if(!sd_ctrl.schedule_func) return 0; + + if(sd_ctrl.prev_watcher == SCHEDULE_WACTHER ){ + printf("%-8s %s\n", " TIME ", "avg_delay/μs max_delay/μs max_proc_name min_delay/μs min_proc_name"); + sd_ctrl.prev_watcher = SCHEDULE_WACTHER + 9;//打印表头功能关 + err = bpf_map_update_elem(schedulemap_fd, &key, &sd_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + } + } + else if(sd_ctrl.prev_watcher == SCHEDULE_WACTHER +1){ + // printf("sd_ctrl.prev_watcher = %d\n",sd_ctrl.prev_watcher); + printf("调度延时大于%dms的进程:\n",sd_ctrl.min_us/1000); + printf("%s\n","pid COMM schedule_delay/us"); + sd_ctrl.prev_watcher = SCHEDULE_WACTHER + 9;//打印表头功能关. + err = bpf_map_update_elem(schedulemap_fd, &key, &sd_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + } + } + + if(!sd_ctrl.min_us_set){ + struct sum_schedule info; + int err, fd = bpf_map__fd(sd_skel->maps.sys_schedule); + time_t now = time(NULL); + struct tm *localTime = localtime(&now); + int hour = localTime->tm_hour; + int min = localTime->tm_min; + int sec = localTime->tm_sec; + unsigned long long avg_delay; + err = bpf_map_lookup_elem(fd, &key, &info); + if (err < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err); + return -1; + } + avg_delay = info.sum_delay / info.sum_count; + if (!ifprint) { + ifprint=1; + }else{ + printf("%02d:%02d:%02d %-15lf %-15lf %10s %15lf %15s\n", + hour, min, sec, avg_delay / 1000.0, info.max_delay / 1000.0,info.proc_name_max,info.min_delay / 1000.0,info.proc_name_min); + } + } + else{ + struct proc_schedule info; + struct proc_id id_key; + struct proc_history prev_info; + int key = 0; + int err, fd1 = bpf_map__fd(sd_skel->maps.threshold_schedule),fd2 = bpf_map__fd(sd_skel->maps.proc_histories); + err = bpf_map_lookup_elem(fd1, &key, &info); + if (err < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err); + return -1; + } + if (info.delay / 1000 > sd_ctrl.min_us&&info.id.pid!=0) { + id_key.pid = info.id.pid; + id_key.cpu_id = info.id.cpu_id; + err = bpf_map_lookup_elem(fd2, &id_key, &prev_info); + if (err < 0) { + fprintf(stderr, "Failed to lookup proc_histories with PID %d and CPU ID %d: %d\n", id_key.pid, id_key.cpu_id, err); + return -1; + } + if (!entry_exists(info.id.pid, info.proc_name, info.delay / 1000)) { + printf("%-10d %-16s %15lld", info.id.pid, info.proc_name, info.delay / 1000); + add_entry(info.id.pid, info.proc_name, info.delay / 1000); + for (int i = 0; i < 2; i++) { + if (prev_info.last[i].pid != 0) { + printf(" Previous Process %d: PID=%-10d Name=%-16s ", i+1, prev_info.last[i].pid, prev_info.last[i].comm); + } + } + printf("\n"); + } + + } + } + + return 0; +} + + +static int mq_event(void *ctx, void *data,unsigned long data_sz) +{ + time_t now = time(NULL);// 获取当前时间 + struct tm *localTime = localtime(&now);// 将时间转换为本地时间结构 + const struct mq_events *e = data; + float send_delay,rcv_delay,delay; + if(!e->send_enter_time || !e->send_exit_time || !e->rcv_enter_time || !e->rcv_exit_time) { + printf("erro!\n"); + return 0; + } + send_delay = (e->send_exit_time - e->send_enter_time)/1000000.0; + rcv_delay = (e->rcv_exit_time - e->rcv_enter_time)/1000000.0; + if(e->send_enter_time < e->rcv_enter_time){ + delay = (e->rcv_exit_time - e->send_enter_time)/1000000.0; + }else{ + delay = (e->rcv_exit_time - e->send_enter_time)/1000000.0 + send_delay + rcv_delay; + } + printf("%02d:%02d:%02d %-8u %-8u %-8u \t%-16llu %-16llu %-16llu %-16llu\t%-15.5f %-15.5f %-15.5f\n", + localTime->tm_hour, localTime->tm_min, localTime->tm_sec, + e->mqdes,e->send_pid,e->rcv_pid, + e->send_enter_time,e->send_exit_time,e->rcv_enter_time,e->rcv_exit_time, + send_delay,rcv_delay,delay); + + return 0; +} + + +int main(int argc, char **argv) +{ + struct ring_buffer *rb = NULL; + struct bpf_map *cs_ctrl_map = NULL; + struct bpf_map *sar_ctrl_map = NULL; + struct bpf_map *sc_ctrl_map = NULL; + struct bpf_map *preempt_ctrl_map = NULL; + struct bpf_map *schedule_ctrl_map = NULL; + struct bpf_map *mq_ctrl_map = NULL; + struct bpf_map *mu_ctrl_map = NULL; + int key = 0; + int err; + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + const char* symbol_name = "total_forks"; + struct bpf_link *links[MAX_CPU_NR] = {}; + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); + /* Cleaner handling of Ctrl-C */ + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + fprintf(stderr, "failed to get # of possible cpus: '%s'!\n", + strerror(-nr_cpus)); + return 1; + } + if (nr_cpus > MAX_CPU_NR) { + fprintf(stderr, "the number of cpu cores is too big, please " + "increase MAX_CPU_NR's value and recompile"); + return 1; + } + + + if (env.CS_DELAY) + { + /* Load and verify BPF application */ + cs_skel = cs_delay_bpf__open(); + if (!cs_skel) + { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + /* Load & verify BPF programs */ + err = cs_delay_bpf__load(cs_skel); + if (err) + { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto cs_delay_cleanup; + } + + err = common_pin_map(&cs_ctrl_map,cs_skel->obj,"cs_ctrl_map",cs_ctrl_path); + if(err < 0){ + goto cs_delay_cleanup; + } + csmap_fd = bpf_map__fd(cs_ctrl_map); + struct cs_ctrl init_value = {false,CS_WACTHER}; + err = bpf_map_update_elem(csmap_fd, &key, &init_value, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto cs_delay_cleanup; + } + + /* Attach tracepoints */ + err = cs_delay_bpf__attach(cs_skel); + if (err) + { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto cs_delay_cleanup; + } + rb = ring_buffer__new(bpf_map__fd(cs_skel->maps.rb), handle_event, NULL, NULL); //ring_buffer__new() API,允许在不使用额外选项数据结构下指定回调 + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto cs_delay_cleanup; + } + }else if (env.PREEMPT) { + preempt_skel = preempt_bpf__open(); + if (!preempt_skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + + err = preempt_bpf__load(preempt_skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto preempt_cleanup; + } + + err = common_pin_map(&preempt_ctrl_map,preempt_skel->obj,"preempt_ctrl_map",preempt_ctrl_path); + if(err < 0){ + goto preempt_cleanup; + } + preemptmap_fd = bpf_map__fd(preempt_ctrl_map); + struct preempt_ctrl init_value = {false,PREEMPT_WACTHER}; + err = bpf_map_update_elem(preemptmap_fd, &key, &init_value, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto preempt_cleanup; + } + err = preempt_bpf__attach(preempt_skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto preempt_cleanup; + } + + rb = ring_buffer__new(bpf_map__fd(preempt_skel->maps.rb), preempt_print, NULL, NULL); + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto preempt_cleanup; + } + }else if (env.SYSCALL_DELAY){ + /* Load and verify BPF application */ + sc_skel = sc_delay_bpf__open(); + if (!sc_skel) + { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + /* Load & verify BPF programs */ + err = sc_delay_bpf__load(sc_skel); + if (err) + { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto sc_delay_cleanup; + } + err = common_pin_map(&sc_ctrl_map,sc_skel->obj,"sc_ctrl_map",sc_ctrl_path); + if(err < 0){ + goto sc_delay_cleanup; + } + scmap_fd = bpf_map__fd(sc_ctrl_map); + struct sc_ctrl init_value = {false,SC_WACTHER}; + err = bpf_map_update_elem(scmap_fd, &key, &init_value, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto sc_delay_cleanup; + } + /* Attach tracepoints */ + err = sc_delay_bpf__attach(sc_skel); + if (err) + { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto sc_delay_cleanup; + } + printf("%-8s %-8s %-15s %-15s\n","Time","Pid","syscall_id","delay/ms"); + rb = ring_buffer__new(bpf_map__fd(sc_skel->maps.rb), syscall_delay_print, NULL, NULL); //ring_buffer__new() API,允许在不使用额外选项数据结构下指定回调 + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto sc_delay_cleanup; + } + + + }else if(env.SCHEDULE_DELAY){ + + + sd_skel = schedule_delay_bpf__open(); + if (!sd_skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + err = schedule_delay_bpf__load(sd_skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto schedule_cleanup; + } + err = common_pin_map(&schedule_ctrl_map,sd_skel->obj,"schedule_ctrl_map",schedule_ctrl_path); + if(err < 0){ + goto schedule_cleanup; + } + schedulemap_fd = bpf_map__fd(schedule_ctrl_map); + struct schedule_ctrl init_value = {false,false,10000,SCHEDULE_WACTHER}; + + err = bpf_map_update_elem(schedulemap_fd, &key, &init_value, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto schedule_cleanup; + } + err = schedule_delay_bpf__attach(sd_skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto schedule_cleanup; + } + }else if (env.SAR){ + /* Load and verify BPF application */ + sar_skel = sar_bpf__open(); + if (!sar_skel) + { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + sar_skel->rodata->forks_addr = (u64)find_ksym(symbol_name); + /* Load & verify BPF programs */ + err = sar_bpf__load(sar_skel); + if (err) + { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto sar_cleanup; + } + + /*perf_event加载*/ + err = open_and_attach_perf_event(env.freq, sar_skel->progs.tick_update, links); + if (err) + goto sar_cleanup; + + err = common_pin_map(&sar_ctrl_map,sar_skel->obj,"sar_ctrl_map",sar_ctrl_path); + if(err < 0){ + goto sar_cleanup; + } + sarmap_fd = bpf_map__fd(sar_ctrl_map); + struct sar_ctrl init_value = {false,false,SAR_WACTHER}; + err = bpf_map_update_elem(sarmap_fd, &key, &init_value, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto sar_cleanup; + } + + err = sar_bpf__attach(sar_skel); + if (err) + { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto sar_cleanup; + } + }else if(env.MQ_DELAY){ + /* Load and verify BPF application */ + mq_skel = mq_delay_bpf__open(); + if (!mq_skel) + { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + /* Load & verify BPF programs */ + err = mq_delay_bpf__load(mq_skel); + if (err) + { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto mq_delay_cleanup; + } + + err = common_pin_map(&mq_ctrl_map,mq_skel->obj,"mq_ctrl_map",mq_ctrl_path); + if(err < 0){ + goto mq_delay_cleanup; + } + mqmap_fd = bpf_map__fd(mq_ctrl_map); + struct mq_ctrl init_value = {false,MQ_WACTHER}; + err = bpf_map_update_elem(mqmap_fd, &key, &init_value, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto mq_delay_cleanup; + } + + /* Attach tracepoints */ + err = mq_delay_bpf__attach(mq_skel); + if (err) + { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto mq_delay_cleanup; + } + printf("%-8s %-8s %-8s %-8s \t%-16s %-16s %-16s %-16s\t%-15s %-15s %-15s\n","Time","Mqdes","SND_PID","RCV_PID","SND_Enter","SND_EXit","RCV_Enter","RCV_EXit","SND_Delay/ms","RCV_Delay/ms","Delay/ms"); + rb = ring_buffer__new(bpf_map__fd(mq_skel->maps.rb), mq_event, NULL, NULL); //ring_buffer__new() API,允许在不使用额外选项数据结构下指定回调 + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto mq_delay_cleanup; + } + }else if (env.MUTRACE) { + mu_skel = mutrace_bpf__open(); + if (!mu_skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + + err = mutrace_bpf__load(mu_skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto mutrace_cleanup; + } + err = common_pin_map(&mu_ctrl_map,mu_skel->obj,"mu_ctrl_map",mu_ctrl_path); + if(err < 0){ + goto mutrace_cleanup; + } + mumap_fd = bpf_map__fd(mu_ctrl_map); + struct mu_ctrl init_value = {false,false,false,MUTEX_WATCHER}; + + err = bpf_map_update_elem(mumap_fd, &key, &init_value, 0); + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto mutrace_cleanup; + } + //ctrl + if(err < 0){ + goto mutrace_cleanup; + } + //ctrl + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto mutrace_cleanup; + } + err = attach(mu_skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto mutrace_cleanup; + } + + rb = ring_buffer__new(bpf_map__fd(mu_skel->maps.rb), mutrace_print, NULL, NULL); + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto mutrace_cleanup; + } + } + while (!exiting) { + if(env.SAR){ + sleep(env.period); + err = print_all(); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + } + else if(env.CS_DELAY){ + sleep(1); + err = ring_buffer__poll(rb, 1000 /* timeout, s */); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + histogram(); + } + else if(env.SYSCALL_DELAY){ + err = ring_buffer__poll(rb, 100 /* timeout, ms */); //ring_buffer__poll(),轮询打开ringbuf缓冲区。如果有事件,handle_event函数会执行 + /* Ctrl-C will cause -EINTR */ + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + // time_t now = time(NULL);// 获取当前时间 + // struct tm *localTime = localtime(&now);// 将时间转换为本地时间结构 + // printf("\n\nTime: %02d:%02d:%02d\n",localTime->tm_hour, localTime->tm_min, localTime->tm_sec); + // printf("----------------------------------------------------------------------------------------------------------\n"); + // sleep(1); + } + else if (env.PREEMPT) { + err = ring_buffer__poll(rb, 100 /* timeout, ms */); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + time_t now = time(NULL); + struct tm *localTime = localtime(&now); + if (!preempt_start_print) { + preempt_start_print = 1; + } else { + printf("----------------------------------------------------------------------------------------------------------\n"); + printf("\nAverage_preempt_Time: %8d ns\n", sum_preemptTime / preempt_count); + } + printf("\nTime: %02d:%02d:%02d\n", localTime->tm_hour, localTime->tm_min, localTime->tm_sec); + printf("%-12s %-8s %-8s %11s\n", "COMM", "prev_pid", "next_pid", "duration_ns"); + preempt_count = 0; + sum_preemptTime = 0; + sleep(2); + } + else if (env.SCHEDULE_DELAY){ + err = schedule_print(); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + break; + } + if(env.SCHEDULE_DELAY&&!sd_ctrl.min_us_set){ + sleep(1); + } + } + else if(env.MQ_DELAY){ + err = ring_buffer__poll(rb, 1000 /* timeout, s */); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + } + else if (env.MUTRACE) { + err = ring_buffer__poll(rb, 100 /* timeout, ms */); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + if(env.MUTRACE&&mu_ctrl.mutex_detail){ + err = kmutex_detail(); + sleep(1); + printf("-------------------------------------------------------------\n"); + }else if(env.MUTRACE&&mu_ctrl.umutex){ + err = umutex_detail(); + sleep(1); + printf("-------------------------------------------------------------\n"); + } + + } + else { + printf("正在开发中......\n-c 打印cs_delay:\t对内核函数schedule()的执行时长进行测试;\n-s sar工具;\n-y 打印sc_delay:\t系统调用运行延迟进行检测; \n-p 打印preempt_time:\t对抢占调度时间输出;\n"); + break; + } + } + +cs_delay_cleanup: + bpf_map__unpin(cs_ctrl_map, cs_ctrl_path); + ring_buffer__free(rb); + cs_delay_bpf__destroy(cs_skel); + return err < 0 ? -err : 0; + +sar_cleanup: + bpf_map__unpin(sar_ctrl_map, sar_ctrl_path); + sar_bpf__destroy(sar_skel); + return err < 0 ? -err : 0; + +sc_delay_cleanup: + bpf_map__unpin(sc_ctrl_map, sc_ctrl_path); + ring_buffer__free(rb); + sc_delay_bpf__destroy(sc_skel); + return err < 0 ? -err : 0; + +preempt_cleanup: + bpf_map__unpin(preempt_ctrl_map, preempt_ctrl_path); + ring_buffer__free(rb); + preempt_bpf__destroy(preempt_skel); + return err < 0 ? -err : 0; + +schedule_cleanup: + bpf_map__unpin(schedule_ctrl_map, schedule_ctrl_path); + schedule_delay_bpf__destroy(sd_skel); + return err < 0 ? -err : 0; + +mq_delay_cleanup: + bpf_map__unpin(mq_ctrl_map, mq_ctrl_path); + ring_buffer__free(rb); + mq_delay_bpf__destroy(mq_skel); + return err < 0 ? -err : 0; + +mutrace_cleanup: + ring_buffer__free(rb); + mutrace_bpf__destroy(mu_skel); + return err < 0 ? -err : 0; +} diff --git a/MagicEyes/src/cpu_watcher/docs/cpu_wacther_vis_guide.md b/MagicEyes/src/cpu_watcher/docs/cpu_wacther_vis_guide.md new file mode 100644 index 000000000..a20f821b9 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/docs/cpu_wacther_vis_guide.md @@ -0,0 +1,168 @@ +# cpu_wacther的可视化 + +## 1.配置环境 + +在使用cpu_watcher可视化之前,请先配置docker、go的环境,具体配置方法可参考: + +### 1.1 docker + +先参考该链接进行docker-desktop的安装: + +* [在 Ubuntu 上安装 Docker Desktop |Docker 文档](https://docs.docker.com/desktop/install/ubuntu/#install-docker-desktop) + +在准备启动docker-desktop时,可能遇到打不开docker-desktop的情况,如下所示: + +![image1](image/image1.png) + +这是因为虚拟机暂时不支持虚拟化,可以先关闭虚拟机,重新编辑虚拟机设置,开启虚拟化引擎的两个选项,再开机配置kvm; + +* [在 Linux 上安装 Docker Desktop |Docker 文档](https://docs.docker.com/desktop/install/linux-install/) + +![image2](image/image2.png) + +### 1.2 go环境: + +本可视化功能对go的版本有要求,请安装go1.19+版本,具体安装流程可参考如下链接: + +* [go:快速升级Go版本,我几分钟就解决了_go 升级版本-CSDN博客](https://blog.csdn.net/m0_37482190/article/details/128673828) + +## 2.使用cpuwatcher 可视化 + +* 首先先进入lmp目录下的lmp/eBPF_Supermarket/CPU_Subsystem/cpu_watcher文件夹 + + ```BASH + cd lmp/eBPF_Supermarket/CPU_Subsystem/cpu_watcher + ``` + + 在该目录下 进行make编译 + + ```bash + make -j 20 + ``` + +* 在lmp目录下的eBPF_Visualization/eBPF_prometheus文件夹下 + +* 执行`make`指令,编译可视化的go语言工具 + + 在执行make指令时,如果出现如下报错,是因为go包管理代理网址无法访问`proxy.golang.org` + + ```bash + go: golang.org/x/exp@v0.0.0-20190731235908-ec7cb31e5a56: Get "https://proxy.golang.org/golang.org/x/exp/@v/v0.0.0-20190731235908-ec7cb31e5a56.mod": dial tcp 172.217.160.113:443: i/o timeout + ``` + + 只需要换一个国内能访问的2代理地址即可 + + ```bash + go env -w GOPROXY=https://goproxy.cn + ``` + +* 执行`make start_service`指令,配置下载docker镜像并启动grafana和prometheus服务 + +* 执行如下指令开始采集数据以及相关处理: + + ```bash + ./data-visual collect /home/zhang/lmp/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher -s + ``` + +* 在网页打开网址:http://192.168.159.128:8090/metrics 此处为`localhost:8090/metrics`,便可以看到暴露在http网页中的数据; + + ![image3](image/image3.png) + +* 在网页打开网址:http://192.168.159.128:3000/ 即可进入grafana服务,使用初始密码登录(user:admin pswd: admin)进入管理界面: + + - 点击【Home-Connection-Add new connection】,选择Prometheus,建立与Prometheus服务器的连接: + + ![image4](image/image4.png) + + 这个172.17.0.1表示docker0网桥的 IPv4 地址。在 Docker 中,通常会将docker0的第一个 IP 地址分配给Docker主机自身。因此,172.17.0.1是 Docker主机上Docker守护进程的 IP 地址,所以在Grafana数据源这块设置成[http://172.17.0.1:9090](http://172.17.0.1:9090/) ,然后点击下面的【Save & test】按钮 + +- 进入可视化配置界面: + + ![image4.5](image/image4.5.png) + ![image5](image/image5.png) + +- 在下方处进行如图所示的配置,点击Run queries即可以可视化的方式监控avg_delay字段的数据: + + ![image6](image/image6.png) + +## 3.cpu_watcher各子工具可视化输出 + +本次可视化输出样例,是在对比系统正常运行和高负载运行时本工具的使用情况,使用stress加压工具对cpu进行持续5min的加压 + +```bash +stress --cpu 8 --timeout 300s +``` + +### 3.1 cpu_watcher -s + +**【irq Time】可视化输出结果** + +![image7](image/image7.png) + +**【softirq Time】可视化输出结果** + +![image8](image/image8.png) +**【cswch】可视化输出结果** + +![image9](image/image9.png) +**【proc】可视化输出结果** + +![image10](image/image10.png) + +**【Kthread】可视化输出结果** + +![image11](image/image11.png) + +**【idle】可视化输出结果** + +![image12](image/image12.png) + +**【sys】可视化输出结果** + +![image-20240411132742107](image/image-20240411132742107.png) + +**【sysc】可视化输出结果** + +![image-20240411132807253](image/image-20240411132807253.png) + +**【utime】可视化输出结果** + +![image-20240411132842070](image/image-20240411132842070.png) + +**【cpu处于不同状态对比图】可视化输出结果** + +![image-20240411132914396](image/image-20240411132914396.png) + +### 3.2 cpu_watcher -c + +**【cs_delay】可视化输出结果** + +![image-20240411133505763](image/image-20240411133505763.png) + +### 3.3 cpu_watcher -d + +**【schedule_delay】可视化输出结果** + +【max_delay】 + +![image-20240411133841698](image/image-20240411133841698.png) + +【avg_delay】 + +![image-20240411135159178](image/image-20240411135159178.png) + +【min_delay】 + +![image-20240411135335523](image/image-20240411135335523.png) + +### 3.4 cpu_watcher -p + +**【preempt】可视化输出结果** + +![image-20240411142421440](image/image-20240411142421440.png) + +## 3.5 cpu_watcher -S + +**【syscall_delay】可视化输出结果** + +![image-20240411144331888](image/image-20240411144331888.png) \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411132742107.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132742107.png new file mode 100644 index 000000000..dcfb6ad1d Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132742107.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411132807253.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132807253.png new file mode 100644 index 000000000..941b9006f Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132807253.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411132842070.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132842070.png new file mode 100644 index 000000000..1c32506ac Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132842070.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411132914396.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132914396.png new file mode 100644 index 000000000..7d156c222 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411132914396.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411133505763.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411133505763.png new file mode 100644 index 000000000..109b29e59 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411133505763.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411133841698.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411133841698.png new file mode 100644 index 000000000..84279fb8d Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411133841698.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411135159178.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411135159178.png new file mode 100644 index 000000000..ad7c56591 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411135159178.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411135335523.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411135335523.png new file mode 100644 index 000000000..5c46cc2f1 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411135335523.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411142421440.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411142421440.png new file mode 100644 index 000000000..b308e3268 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411142421440.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411144331888.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411144331888.png new file mode 100644 index 000000000..6e174c2f5 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411144331888.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411160509242.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411160509242.png new file mode 100644 index 000000000..bc7a81d97 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411160509242.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411170250839.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411170250839.png new file mode 100644 index 000000000..5b6bdc717 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411170250839.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image-20240411170311182.png b/MagicEyes/src/cpu_watcher/docs/image/image-20240411170311182.png new file mode 100644 index 000000000..a900381ba Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image-20240411170311182.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image1.png b/MagicEyes/src/cpu_watcher/docs/image/image1.png new file mode 100644 index 000000000..b0c02f418 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image1.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image10.png b/MagicEyes/src/cpu_watcher/docs/image/image10.png new file mode 100644 index 000000000..8c4a4bf93 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image10.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image11.png b/MagicEyes/src/cpu_watcher/docs/image/image11.png new file mode 100644 index 000000000..dc04b23f1 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image11.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image12.png b/MagicEyes/src/cpu_watcher/docs/image/image12.png new file mode 100644 index 000000000..bfb87edef Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image12.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image13.png b/MagicEyes/src/cpu_watcher/docs/image/image13.png new file mode 100644 index 000000000..566fd1c1e Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image13.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image2.png b/MagicEyes/src/cpu_watcher/docs/image/image2.png new file mode 100644 index 000000000..140a97b81 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image2.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image3.png b/MagicEyes/src/cpu_watcher/docs/image/image3.png new file mode 100644 index 000000000..52db1de49 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image3.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image4.5.png b/MagicEyes/src/cpu_watcher/docs/image/image4.5.png new file mode 100644 index 000000000..40aef3a11 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image4.5.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image4.png b/MagicEyes/src/cpu_watcher/docs/image/image4.png new file mode 100644 index 000000000..d186d225c Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image4.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image5.png b/MagicEyes/src/cpu_watcher/docs/image/image5.png new file mode 100644 index 000000000..48f6a4a74 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image5.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image6.png b/MagicEyes/src/cpu_watcher/docs/image/image6.png new file mode 100644 index 000000000..77baa7887 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image6.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image7.png b/MagicEyes/src/cpu_watcher/docs/image/image7.png new file mode 100644 index 000000000..cbf0a5915 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image7.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image8.png b/MagicEyes/src/cpu_watcher/docs/image/image8.png new file mode 100644 index 000000000..23c5d49fd Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image8.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image9.png b/MagicEyes/src/cpu_watcher/docs/image/image9.png new file mode 100644 index 000000000..e0b384f07 Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image9.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/image/image_mq.png b/MagicEyes/src/cpu_watcher/docs/image/image_mq.png new file mode 100644 index 000000000..1dae1df8f Binary files /dev/null and b/MagicEyes/src/cpu_watcher/docs/image/image_mq.png differ diff --git a/MagicEyes/src/cpu_watcher/docs/libbpf_sar.md b/MagicEyes/src/cpu_watcher/docs/libbpf_sar.md new file mode 100644 index 000000000..4944fc264 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/docs/libbpf_sar.md @@ -0,0 +1,81 @@ +# libbpf_sar功能介绍: + +libbpf_sar是cpu_watcher工具中的一个子工具,通过cpu_watcher -s使用; + +## 1.应用场景及意义 + +libbpf_sar是一个基于eBPF的按照指定时间间隔(默认为1s)来统计特定事件发生次数和特定事件占用CPU时间的工具。使用它可以帮助您查看事件发生速率和CPU资源利用率,并提供CPU负载画像以剖析负载的来源及其占用比例。 + +与传统工具相比,libbpf_sar可提供更为细致的指标,如: + +* 1.可把内核态时间剖析为内核线程执行时间sar和进程系统调用时间 + +* 2.部分linux发行版可能由于内核编译选项确实而不能记录irq时间,本工具可以弥补这一缺陷,并且不需要对内核做出任何更改,可动态检测 + +3.可附加到指定进程上,对该进程占用的CPU资源进行实时监测 + +## 2.性能参数及观测意义 + +在 Linux 中,与 CPU 相关的状态可以分为很多类,如用户态、系统调用、硬中断以及软中断等,同时还有一些内核线程执行的状态,如 kthread,idle 进程。 + +同时,也有一些对 CPU 相当重要的事件,如新进程创建,进程切换计数,运行队 列长度等,对这些细分事件进行准确的监测,有利于我们剖析出 CPU 负载根源, 并定位出性能瓶颈。 + +libbpf_sar主要是对 CPU 上的关键事件提供的具体信息参数与含义如表3-6所示: + +libbpf_sar 实例采集信息及含义 + +| **性能指标** | **含义** | +| ------------ | -------------------------------- | +| Proc | 新创建的进程数 | +| Cswch | 进程切换计数 | +| runqlen | 运行队列长度 | +| irqTime | 硬中断时间 | +| Softirq | 软中断时间 | +| Idle | Idle 进程运行时间 | +| Sysc | 加上内核线程运行时间的系统态时间 | +| Utime | 用户态执行时间 | +| sys | 系统调用执行时间 | + +本实例采集到的信息将系统的 CPU 进行了精准分类,并且也统计了关键事件的触发频率,对于系统的性能分析有重要的意义 + +## 3.输出格式: + +```bash + time proc/s cswch/s irqTime/us softirq/us idle/ms kthread/us sysc/ms utime/ms sys/ms +15:55:43 48 1389 1646 8866 6811 3243 688 717 691 +15:55:44 31 1089 1587 7375 6759 1868 659 707 660 +15:55:45 47 1613 1685 8885 6792 3268 796 828 799 +15:55:46 0 2133 5938 7797 7643 8106 8 20 17 +15:55:47 1 3182 5128 14279 6644 4883 314 363 319 +15:55:48 0 1815 1773 11329 6753 4286 282 313 287 +15:55:49 31 1249 1605 9859 6752 4442 545 585 549 +15:55:50 47 1601 1712 11348 6765 6249 210 242 216 +15:55:51 0 1238 10591 12709 6802 13896 238 262 252 +15:55:52 0 1145 1658 10000 6863 4593 308 333 313 +15:55:53 0 1317 1587 9090 6798 4699 383 414 387 +15:55:54 31 1254 1531 9570 6755 4252 381 414 385 +15:55:55 47 1547 1624 10985 6769 6516 344 373 350 +15:55:56 0 1064 2187 9892 6851 4585 189 212 194 +``` + +* proc/s 列的数据表示 CPU 每秒新创建的进程数; +* cswch/s 列的数据表示 CPU 每秒进程切换的数量; +* runqlen 列的数据表示 CPU 运行队列的长度; +* irqTime/us 列的数据表示 CPU 处理硬中断的时间,以 us 计时; +* softirq/s 列的数据表示 CPU 每秒处理软中断的时间,以 us 计时; +* idle/ms 列的数据表示 系统处于空闲态的时间; +* kthread/us 列的数据表示系统中内核线程执行的时间; + +* sysc/ms 表示系统中内核线程外加系统调用处理的总时间; +* utime/ms 表示进程在用户态执行的总时间; +* sys/ms 表示系统调用执行的总时间。 + +事件的统计方式是对每个CPU分开统计然后求和,因此有的项的占用时间可能超过1s。所有事件占用时间的和应当为1s * 总CPU核数。由于技术问题,输出的第一项可能偏差较大,可以不予理会。按Ctrl+C本程序将停止统计,并输出在程序运行的时间段内各表项的平均值按照2s的采样频率显示各事件的CPU占用率。数据是带颜色的,蓝色表示CPU占比小于30%,绿色表示占比大于30%小于60%,红色表示占比大于60%。 + + + +## 4.数据可视化 + +![image-20240411160509242](image/image-20240411160509242.png) +![image-20240411170250839](image/image-20240411170250839.png) +![image-20240411170311182](image/image-20240411170311182.png) \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/docs/mq_delay.md b/MagicEyes/src/cpu_watcher/docs/mq_delay.md new file mode 100644 index 000000000..cfcc1ba5b --- /dev/null +++ b/MagicEyes/src/cpu_watcher/docs/mq_delay.md @@ -0,0 +1,92 @@ +# mq_delay + +为了对进程间通过消息队列通信时,发送消息、接手消息以及处于等待状态所用时间进行监测,cpuwatcher工具增添mq_delay工具。 + +![image_mq](image/image_mq.png) + +以上是发送进程发送,接收进程接收的具体过程。本工具通过跟踪单个的消息块(struct msg_msg结构体)来监测发送时延、接收时延以及等待时延。 + +## 跟踪消息块过程: + +发送过程 + +* 用户程序将要发送的消息通过mq_send()函数或mq_timedsend()函数发送,mq_send/mq_timedsend函数调用mq_timedsend系统调用在内核实现具体的发送实现,此时将指向用户态消息缓冲区的指针u_msg_ptr传入内核态,此处我们第一次追踪到消息块。 +* 在mq_timedsend 系统调用中,会调用do_mq_timedsend()内核函数进行发送消息的操作,此处将u_msg_ptr指针作为传参传入do_mq_timedsend()函数; +* 在do_mq_timedsend()函数中,通过load_msg()函数将消息从用户空间加载到内核中,这里将u_msg_ptr指针作为传参; +* load_msg()函数中,通过copy_from_user()函数将u_msg_ptr指针指向的用户空间信息复制到分配的内核空间msg,并返回一个指向消息块所在内核空间的指针msg_ptr,此时我们便在内核中跟踪到了具体的消息块实体,后续操作都是围绕这个消息块指针展开的,包括接收程序也是对此指针进行copy_to_user操作; + +接受过程 + +* 用户程序通过mq_receive()或mq_timedreceive()函数,从消息队列中接收消息,mq_receive()或mq_timedreceive()函数调用mq_timedreceive系统调用在内核中实现具体的接收实现,此时将指向用户态缓冲区的指针u_msg_ptr传入内核,这里是我们本次跟踪最后一次遇到消息块。 +* mq_timedreceive系统调用通过do_mq_timedreceive()函数找到要接收的消息块,并将其传入u_msg_ptr所指向的用户空间 +* do_mq_timedreceive()函数如果等到要接收的消息块,会通过store_msg()函数将消息块(发送时msg_ptr所指向的消息块)存储至u_msg_ptr所指向的用户空间。所以此时,我们在接收消息的内核处理函数中追踪到了具体的消息块。 + +此处还可拓展一些功能: + +* 对于发送消息块时,是否上等待队列,等待了多久? +* 对于接收消息块时,是否上等待队列,等待了多久? +* 对于处于非阻塞状态的进程,是否可以识别到,并及时统计出来? + +## 挂载点: + +发送过程: + +| 类型 | 名称 | +| --------- | -------------- | +| kprobe | do_mq_timesend | +| kprobe | load_msg | +| kretprobe | load_msg | +| kretprobe | do_mq_timesend | + +接收过程: + +| 类型 | 名称 | +| --------- | ----------------- | +| kprobe | do_mq_timereceive | +| kprobe | store_msg | +| kretprobe | store_msg | +| kretprobe | do_mq_timereceive | + +输出效果 + +```c +Time: 22:12:39 +----------------------------------------------------------------------------------------------------------------------- +Mqdes: 3 msg_len: 1152 msg_prio: 50 +SND_PID: 20945 SND_enter_time: 131725037824711 SND_exit_time: 131725037867085 +RCV_PID: 20984 RCV_enter_time: 131726555726321 RCV_exit_time: 131726555872719 +------------------------------------------------------------------------------- +SND_Delay/ms: 0.04 RCV_Delay/ms: 0.15 Delay/ms: 1518.04801 +----------------------------------------------------------------------------------------------------------------------- + + + +Time: 22:12:44 +----------------------------------------------------------------------------------------------------------------------- +Mqdes: 3 msg_len: 1152 msg_prio: 50 +SND_PID: 21007 SND_enter_time: 131730008219660 SND_exit_time: 131730008614901 +RCV_PID: 21035 RCV_enter_time: 131731465676396 RCV_exit_time: 131731465758821 +------------------------------------------------------------------------------- +SND_Delay/ms: 0.40 RCV_Delay/ms: 0.08 Delay/ms: 1457.53916 +----------------------------------------------------------------------------------------------------------------------- + + + +Time: 22:12:48 +----------------------------------------------------------------------------------------------------------------------- +Mqdes: 3 msg_len: 1152 msg_prio: 50 +SND_PID: 21069 SND_enter_time: 131733828139276 SND_exit_time: 131733828195905 +RCV_PID: 21098 RCV_enter_time: 131735705540405 RCV_exit_time: 131735705924036 +------------------------------------------------------------------------------- +SND_Delay/ms: 0.06 RCV_Delay/ms: 0.38 Delay/ms: 1877.78476 +----------------------------------------------------------------------------------------------------------------------- +``` + + + + + + + + + diff --git a/MagicEyes/src/cpu_watcher/docs/preempt_time.md b/MagicEyes/src/cpu_watcher/docs/preempt_time.md new file mode 100644 index 000000000..e35aaf291 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/docs/preempt_time.md @@ -0,0 +1,44 @@ +## preempt_time工具介绍 + +​ preempt_time,统计每次系统中抢占调度所用的时间。 + +### 原理分析 + +​ 使用 btf raw tracepoint监控内核中的每次调度事件: + +```c +SEC("tp_btf/sched_switch") +``` + +​ btf raw tracepoint 跟常规 raw tracepoint 有一个 最主要的区别是: btf 版本可以直接在 ebpf 程序中访问内核内存, 不需要像常规 raw tracepoint 一样需要借助类似 `bpf_core_read` 或 `bpf_probe_read_kernel` 这样 的辅助函数才能访问内核内存。 + +```c +int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev, struct task_struct *next) +``` + +​ 该事件为我们提供了关于抢占的参数preempt,我们可以通过判断preempt的值来决定是否记录本次调度信息。 + +​ 另一挂载点为kprobe:finish_task_switch,即本次调度切换完成进行收尾工作的函数,在此时通过ebpf map与之前记录的调度信息做差,即可得到本次抢占调度的时间: + +```c +SEC("kprobe/finish_task_switch") +``` + +### 输出效果 + +可以获取到抢占进程的`pid`与进程名,以及被抢占进程的`pid`,和本次抢占时间,单位纳秒 + +``` +COMM prev_pid next_pid duration_ns +node 14221 2589 3014 +kworker/u256:1 15144 13516 1277 +node 14221 2589 3115 +kworker/u256:1 15144 13516 1125 +kworker/u256:1 15144 13516 974 +node 14221 2589 2560 +kworker/u256:1 15144 13516 1132 +node 14221 2589 2717 +kworker/u256:1 15144 13516 1206 +kworker/u256:1 15144 13516 1131 +node 14221 2589 3355 +``` diff --git a/MagicEyes/src/cpu_watcher/docs/schedule_delay.md b/MagicEyes/src/cpu_watcher/docs/schedule_delay.md new file mode 100644 index 000000000..563cfec6b --- /dev/null +++ b/MagicEyes/src/cpu_watcher/docs/schedule_delay.md @@ -0,0 +1,57 @@ +## schedule_delay工具介绍 + +​ schedule_delay工具可以检测该系统当前的调度延迟。即从一个任务具备运行的条件,到真正执行(获得 CPU 的执行权)的这段时间。 + +​ 实时观测该指标可以帮助我们了解到当前操作系统的负载。 + +### 原理分析 + +​ 只需考虑,在何时一个任务会被加入运行队列等待运行。内核提供了两个函数实现这个功能: + +- 新建的进程通过调用`wake_up_new_task`,将新创建的任务加入runqueue等待调度。 +- 进程从睡眠状态被唤醒时触发,调用`ttwu_do_wakeup`函数,进入runqueue等待调度。 + +​ 关于这两个函数,内核提供了两个对应的`tracepoint`: + +| 内核函数 | 对应`tracepoint` | +| :--------------: | :--------------------: | +| wake_up_new_task | sched:sched_wakeup_new | +| ttwu_do_wakeup | sched:sched_wakeup | + +​ 在触发到这两个tracepoint的时候,记录这个进程的信息和进入运行队列的时间。 + +​ 除此之外,我们还应该考虑到,当一个进程**被迫离开cpu**时,其状态依然是`TASK_RUNNING`,所以在schedule时,我们还要做出判断,决定该进程是否要被记录。 + +| 内核函数 | 对应`tracepoint` | +| :------: | :----------------: | +| schedule | sched:sched_switch | + +​ 在触发到这个tracepoint时,记录此时即将要占用cpu的进程信息,通过ebpf map记录的进入运行队列的时间作差,即调度延迟。在这里还需要判断上一个进程是否要被记录。 + +```c + if(prev_state == TASK_RUNNING)//判断退出cpu进程的状态 +``` + +​ 最后要注意的是,为了避免map溢出,我们还需要在进程退出的时候,删除map中记录的数据。 + +| 内核函数 | 对应`tracepoint` | +| :------: | :----------------------: | +| do_exit | sched:sched_process_exit | + +### 输出效果 + +​ 我们可以检测到系统从加载ebpf程序到当前的平均、最大、最小调度时延: + +``` + TIME avg_delay/μs max_delay/μs min_delay/μs +17:31:28 35.005000 97.663000 9.399000 +17:31:29 326.518000 12618.465000 7.994000 +17:31:30 455.837000 217053.545000 6.462000 +17:31:31 422.582000 217053.545000 6.462000 +17:31:32 382.627000 217053.545000 6.462000 +17:31:33 360.499000 217053.545000 6.462000 +17:31:34 364.805000 217053.545000 6.462000 +17:31:35 362.039000 217053.545000 6.462000 +17:31:36 373.751000 217053.545000 6.462000 +``` + diff --git a/MagicEyes/src/cpu_watcher/grafana_cpu_watcher_dashboard.json b/MagicEyes/src/cpu_watcher/grafana_cpu_watcher_dashboard.json new file mode 100644 index 000000000..9177bf611 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/grafana_cpu_watcher_dashboard.json @@ -0,0 +1,1177 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"avg_delay/us\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "1", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"max_delay/us\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "1", + "legendFormat": "__auto", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"min_delay/μs\"}", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "1", + "legendFormat": "__auto", + "range": true, + "refId": "C", + "useBackend": false + } + ], + "title": "schedule_delay", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"irqTime/us\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "irqTime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"duration_ns\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "preempt_delay", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"proc/s\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "1", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "procs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"sys/ms\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "sys", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"softirq/us\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "softirqTime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"utime/ms\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "utime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"idle/ms\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "1", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "idle", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"kthread/us\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "kthread", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"cswch/s\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "cswch", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ddlnxe6bsiha8c" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "bpf_metrics{bpf_out_data=\"delay/ms\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "1", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "sys_delay", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "browser", + "title": "cpu_watcher_vis", + "uid": "cdlnxoxcy4zr4b", + "version": 4, + "weekStart": "" +} \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/include/cpu_watcher.h b/MagicEyes/src/cpu_watcher/include/cpu_watcher.h new file mode 100644 index 000000000..7ffb2b3fa --- /dev/null +++ b/MagicEyes/src/cpu_watcher/include/cpu_watcher.h @@ -0,0 +1,305 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com +#ifndef CPU_WATCHER_H +#define CPU_WATCHER_H + +#include +#include + +typedef unsigned long long u64; +typedef unsigned int u32; +typedef __kernel_mqd_t mqd_t; +#define __user +#define MAX_CPU_NR 128 +#define TASK_COMM_LEN 20 +#define SYSCALL_MIN_TIME 1E7 +#define MAX_SYSCALL_COUNT 100 +#define MAX_ENTRIES 102400 // map容量 + +/*----------------------------------------------*/ +/* 一些maps结构体的宏定义 */ +/*----------------------------------------------*/ +/// @brief 创建一个指定名字和键值类型的ebpf数组 +/// @param name 新散列表的名字 +/// @param type1 键的类型 +/// @param type2 值的类型 +/// @param MAX_ENTRIES map容量 +#define BPF_ARRAY(name, type1, type2, MAX_ENTRIES) \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __uint(key_size, sizeof(type1)); \ + __uint(value_size, sizeof(type2)); \ + __uint(max_entries, MAX_ENTRIES); \ + } name SEC(".maps") +/// @brief 创建一个指定名字和键值类型的ebpf散列表 +/// @param name 新散列表的名字 +/// @param type1 键的类型 +/// @param type2 值的类型 +/// @param MAX_ENTRIES 哈希map容量 +#define BPF_HASH(name, type1, type2, MAX_ENTRIES) \ + struct { \ + __uint(type, BPF_MAP_TYPE_HASH); \ + __uint(key_size, sizeof(type1)); \ + __uint(value_size, sizeof(type2)); \ + __uint(max_entries, MAX_ENTRIES); \ + } name SEC(".maps") +/// @brief 创建一个指定名字和键值类型的ebpf每CPU数组 +/// @param name 新散列表的名字 +/// @param type1 键的类型 +/// @param type2 值的类型 +/// @param MAX_ENTRIES map容量 +#define BPF_PERCPU_ARRAY(name, type1, type2, MAX_ENTRIES) \ + struct { \ + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); \ + __uint(key_size, sizeof(type1)); \ + __uint(value_size, sizeof(type2)); \ + __uint(max_entries, MAX_ENTRIES); \ + } name SEC(".maps") +/// @brief 创建一个指定名字和键值类型的ebpf每CPU散列表 +/// @param name 新散列表的名字 +/// @param type1 键的类型 +/// @param type2 值的类型 +/// @param MAX_ENTRIES map容量 +#define BPF_PERCPU_HASH(name, type1, type2, MAX_ENTRIES) \ + struct { \ + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); \ + __uint(key_size, sizeof(type1)); \ + __uint(value_size, sizeof(type2)); \ + __uint(max_entries, MAX_ENTRIES); \ + } name SEC(".maps") + +/*----------------------------------------------*/ +/* cs_delay结构体 */ +/*----------------------------------------------*/ +#ifndef __CS_DELAY_H +#define __CS_DELAY_H +struct event { + u64 t1; + u64 t2; + u64 delay; +}; +#endif /* __CS_DELAY_H */ + +/*----------------------------------------------*/ +/* syscall_delay结构体 */ +/*----------------------------------------------*/ +struct syscall_flags { + u64 start_time; + int syscall_id; +}; + +struct syscall_events {//每个进程一个 + int pid, count; + char comm[TASK_COMM_LEN]; + u64 delay; + u64 syscall_id; +}; +/*----------------------------------------------*/ +/* preempt_event结构体 */ +/*----------------------------------------------*/ +struct preempt_event { + pid_t prev_pid; + pid_t next_pid; + unsigned long long duration; + char comm[TASK_COMM_LEN]; +}; +/*----------------------------------------------*/ +/* schedule_delay相关结构体 */ +/*----------------------------------------------*/ +//标识不同进程 +struct proc_id { + int pid; + int cpu_id; +}; +//标识该进程的调度信息 +struct schedule_event { + int pid; + int count;//调度次数 + unsigned long long enter_time; +}; +//整个系统所有调度信息 +struct sum_schedule { + unsigned long long sum_count; + unsigned long long sum_delay; + unsigned long long max_delay; + unsigned long long min_delay; + char proc_name_max[TASK_COMM_LEN]; + char proc_name_min[TASK_COMM_LEN]; +}; + +struct proc_schedule { + struct proc_id id; + unsigned long long delay; + char proc_name[TASK_COMM_LEN]; +}; + +struct proc_info { + pid_t pid; + char comm[TASK_COMM_LEN]; +}; + +struct proc_history { + struct proc_info last[2]; // 存储最后两个调度的进程信息 +}; + +/*----------------------------------------------*/ +/* mutrace相关结构体 */ +/*----------------------------------------------*/ +struct mutex_info { + u64 locked_total;//锁被持有的总时间 + u64 locked_max;//锁被持有的最长时间 + u64 contended_total;//锁发生竞争的总时间 + int count;//记录锁被争用的总次数 + pid_t last_owner;//最后一次持有该锁的线程 ID + char last_name[TASK_COMM_LEN]; + u64 acquire_time; // 锁每次被获取的时间戳,方便后续计算 + u64 ptr;//地址 +}; + +struct mutex_contention_event { + u64 ptr;//锁地址 + pid_t owner_pid;//持有者pid + pid_t contender_pid;//抢占者pid + char contender_name[TASK_COMM_LEN]; + char owner_name[TASK_COMM_LEN]; + int owner_prio; + int contender_prio; +}; + +struct trylock_info { + void *__mutex; + u64 start_time; +}; + +/*----------------------------------------------*/ +/* mq_delay相关结构体 */ +/*----------------------------------------------*/ +struct mq_events { + int send_pid; + int rcv_pid; + mqd_t mqdes; + size_t msg_len; + unsigned int msg_prio; + u64 send_enter_time; + u64 send_exit_time; + u64 rcv_enter_time; + u64 rcv_exit_time; +}; +struct send_events { + int send_pid; + u64 Key_msg_ptr; + mqd_t mqdes; + size_t msg_len; + unsigned int msg_prio; + const char *u_msg_ptr; + const void *src; + u64 send_enter_time; + u64 send_exit_time; +}; +struct rcv_events { + int rcv_pid; + u64 Key_msg_ptr; + mqd_t mqdes; + size_t msg_len; + unsigned int msg_prio; + const char *u_msg_ptr; + const void *dest; + u64 rcv_enter_time; + u64 rcv_exit_time; +}; +/*----------------------------------------------*/ +/* cswch_args结构体 */ +/*----------------------------------------------*/ +struct cswch_args { + u64 pad; + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + long prev_state; + char next_comm[16]; + pid_t next_pid; + int next_prio; +}; + +/*----------------------------------------------*/ +/* 软中断结构体 */ +/*----------------------------------------------*/ +struct __softirq_info { + u64 pad; + u32 vec; +}; + +/*----------------------------------------------*/ +/* 硬中断结构体 */ +/*----------------------------------------------*/ +struct __irq_info { + u64 pad; + u32 irq; +}; + +/*----------------------------------------------*/ +/* idlecpu空闲时间所需结构体 */ +/*----------------------------------------------*/ +struct idleStruct { + u64 pad; + unsigned int state; + unsigned int cpu_id; +}; + +/*----------------------------------------------*/ +/* 控制板块 */ +/*----------------------------------------------*/ +struct sar_ctrl{ + bool sar_func; + bool percent; + int prev_watcher; +}; + +struct cs_ctrl{ + bool cs_func; + int prev_watcher; +}; + +struct sc_ctrl{ + bool sc_func; + int prev_watcher; +}; + +struct preempt_ctrl{ + bool preempt_func; + int prev_watcher; +}; + +struct schedule_ctrl{ + bool schedule_func; + bool min_us_set; + int min_us; + int prev_watcher; +}; + +struct mq_ctrl{ + bool mq_func; + int prev_watcher; +}; + +struct mu_ctrl{ + bool mu_func; + bool mutex_detail; + bool umutex; + int prev_watcher; +}; + +#endif // CPU_WATCHER_H \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/include/cpu_watcher_helper.h b/MagicEyes/src/cpu_watcher/include/cpu_watcher_helper.h new file mode 100644 index 000000000..08e38f0e1 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/include/cpu_watcher_helper.h @@ -0,0 +1,353 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com +#ifndef CPU_WATCHER_HELPER_H +#define CPU_WATCHER_HELPER_H + +#include +#include "cpu_watcher.h" + +#define SAR_WACTHER 10 +#define CS_WACTHER 20 +#define SC_WACTHER 30 +#define PREEMPT_WACTHER 40 +#define SCHEDULE_WACTHER 50 +#define MQ_WACTHER 60 +#define MUTEX_WATCHER 70 +#define HASH_SIZE 1024 + +/*----------------------------------------------*/ +/* ewma算法 */ +/*----------------------------------------------*/ +//滑动窗口周期,用于计算alpha +#define CYCLE 10 +//阈值容错空间; +#define TOLERANCE 1.0 +struct ewma_info{ + double previousEWMA; + int count; + int cycle;//cycle是滑动窗口周期大小 +}; + +double calculateEWMA(double previousEWMA, double dataPoint, double alpha) { + return alpha * dataPoint + (1 - alpha) * previousEWMA;//当前时间点的ewma +} + +bool dynamic_filter(struct ewma_info *ewma_syscall_delay, double dataPoint) { + double alpha,threshold; + if(ewma_syscall_delay->cycle==0) alpha = 2.0 /(CYCLE + 1); // 计算 alpha + else alpha = 2.0 /(ewma_syscall_delay->cycle + 1); + + if(ewma_syscall_delay->previousEWMA == 0) {//初始化ewma算法,则赋值previousEWMA = dataPoint 并打印 + ewma_syscall_delay->previousEWMA = dataPoint; + return 1; + } + if(ewma_syscall_delay->count <30){ + ewma_syscall_delay->previousEWMA = calculateEWMA(ewma_syscall_delay->previousEWMA,dataPoint,alpha);//计算 + return 1; + } + else{ + ewma_syscall_delay->previousEWMA = calculateEWMA(ewma_syscall_delay->previousEWMA,dataPoint,alpha);//计算 + threshold = ewma_syscall_delay->previousEWMA * TOLERANCE; + if(dataPoint >= threshold) return 1; + } + return 0; +} + +/*----------------------------------------------*/ +/* bpf file system */ +/*----------------------------------------------*/ +const char *sar_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/sar_ctrl_map"; +const char *cs_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/cs_ctrl_map"; +const char *sc_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/sc_ctrl_map"; +const char *preempt_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/preempt_ctrl_map"; +const char *schedule_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/schedule_ctrl_map"; +const char *mq_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/mq_ctrl_map"; +const char *mu_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/mu_ctrl_map"; + +int common_pin_map(struct bpf_map **bpf_map, const struct bpf_object *obj, const char *map_name, const char *ctrl_path) +{ + int ret; + + *bpf_map = bpf_object__find_map_by_name(obj, map_name);//查找具有指定名称的 BPF 映射 + if (!*bpf_map) { + fprintf(stderr, "Failed to find BPF map\n"); + return -1; + } + // 用于防止上次没有成功 unpin 掉这个 map + bpf_map__unpin(*bpf_map, ctrl_path); + ret = bpf_map__pin(*bpf_map, ctrl_path); + if (ret){ + fprintf(stderr, "Failed to pin BPF map\n"); + return -1; + }//找到pin上 + + return 0; +} + +int update_sar_ctrl_map(struct sar_ctrl sar_ctrl){ + int err,key = 0; + int srcmap_fd; + + srcmap_fd = bpf_obj_get(sar_ctrl_path); + if (srcmap_fd < 0) { + fprintf(stderr,"Failed to open sar_ctrl_map file\n"); + return srcmap_fd; + } + err = bpf_map_update_elem(srcmap_fd,&key,&sar_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update sar_ctrl_map elem\n"); + return err; + } + + return 0; +} + +int update_cs_ctrl_map(struct cs_ctrl cs_ctrl){ + int err,key = 0; + int srcmap_fd; + + srcmap_fd = bpf_obj_get(cs_ctrl_path); + if (srcmap_fd < 0) { + fprintf(stderr,"Failed to open cs_ctrl_map file\n"); + return srcmap_fd; + } + err = bpf_map_update_elem(srcmap_fd,&key,&cs_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update cs_ctrl_map elem\n"); + return err; + } + + return 0; +} + +int update_sc_ctrl_map(struct sc_ctrl sc_ctrl){ + int err,key = 0; + int srcmap_fd; + + srcmap_fd = bpf_obj_get(sc_ctrl_path); + if (srcmap_fd < 0) { + fprintf(stderr,"Failed to open sc_ctrl_map file\n"); + return srcmap_fd; + } + err = bpf_map_update_elem(srcmap_fd,&key,&sc_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update sc_ctrl_map elem\n"); + return err; + } + + return 0; +} + +int update_preempt_ctrl_map(struct preempt_ctrl preempt_ctrl){ + int err,key = 0; + int srcmap_fd; + + srcmap_fd = bpf_obj_get(preempt_ctrl_path); + if (srcmap_fd < 0) { + fprintf(stderr,"Failed to open preempt_ctrl_map file\n"); + return srcmap_fd; + } + err = bpf_map_update_elem(srcmap_fd,&key,&preempt_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update preempt_ctrl_map elem\n"); + return err; + } + + return 0; +} + +int update_schedule_ctrl_map(struct schedule_ctrl schedule_ctrl){ + int err,key = 0; + int srcmap_fd; + + srcmap_fd = bpf_obj_get(schedule_ctrl_path); + if (srcmap_fd < 0) { + fprintf(stderr,"Failed to open schedule_ctrl_map file\n"); + return srcmap_fd; + } + err = bpf_map_update_elem(srcmap_fd,&key,&schedule_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update schedule_ctrl_map elem\n"); + return err; + } + + return 0; +} + +int update_mq_ctrl_map(struct mq_ctrl mq_ctrl){ + int err,key = 0; + int srcmap_fd; + + srcmap_fd = bpf_obj_get(mq_ctrl_path); + if (srcmap_fd < 0) { + fprintf(stderr,"Failed to open mq_ctrl_map file\n"); + return srcmap_fd; + } + err = bpf_map_update_elem(srcmap_fd,&key,&mq_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update mq_ctrl_map elem\n"); + return err; + } + + return 0; +} + +int update_mu_ctrl_map(struct mu_ctrl mu_ctrl){ + int err,key = 0; + int srcmap_fd; + + srcmap_fd = bpf_obj_get(mu_ctrl_path); + if (srcmap_fd < 0) { + fprintf(stderr,"Failed to open mq_ctrl_map file\n"); + return srcmap_fd; + } + err = bpf_map_update_elem(srcmap_fd,&key,&mu_ctrl, 0); + if(err < 0){ + fprintf(stderr, "Failed to update mq_ctrl_map elem\n"); + return err; + } + + return 0; +} +/*----------------------------------------------*/ +/* mutex_count */ +/*----------------------------------------------*/ + +typedef struct { + uint64_t ptr; + uint64_t count; +} lock_count_t; + +lock_count_t lock_counts[HASH_SIZE]; + +static uint64_t hash(uint64_t ptr) { + return ptr % HASH_SIZE; +} + +static void increment_lock_count(uint64_t ptr) { + uint64_t h = hash(ptr); + while (lock_counts[h].ptr != 0 && lock_counts[h].ptr != ptr) { + h = (h + 1) % HASH_SIZE; + } + if (lock_counts[h].ptr == 0) { + lock_counts[h].ptr = ptr; + lock_counts[h].count = 1; + } else { + lock_counts[h].count++; + } +} + +static uint64_t get_lock_count(uint64_t ptr) { + uint64_t h = hash(ptr); + while (lock_counts[h].ptr != 0 && lock_counts[h].ptr != ptr) { + h = (h + 1) % HASH_SIZE; + } + if (lock_counts[h].ptr == 0) { + return 0; + } else { + return lock_counts[h].count; + } +} + +/*----------------------------------------------*/ +/* hash */ +/*----------------------------------------------*/ + + +struct output_entry { + int pid; + char comm[16]; + long long delay; +}; + + +struct output_entry seen_entries[MAX_ENTRIES]; +int seen_count = 0; + + +bool entry_exists(int pid, const char *comm, long long delay) { + for (int i = 0; i < seen_count; i++) { + if (seen_entries[i].pid == pid && + strcmp(seen_entries[i].comm, comm) == 0 && + seen_entries[i].delay == delay) { + return true; + } + } + return false; +} + + +void add_entry(int pid, const char *comm, long long delay) { + if (seen_count < MAX_ENTRIES) { + seen_entries[seen_count].pid = pid; + strncpy(seen_entries[seen_count].comm, comm, sizeof(seen_entries[seen_count].comm)); + seen_entries[seen_count].delay = delay; + seen_count++; + } +} +/*----------------------------------------------*/ +/* uprobe */ +/*----------------------------------------------*/ +static const char object[] = "/usr/lib/x86_64-linux-gnu/libc.so.6"; + +#define __ATTACH_UPROBE(skel, sym_name, prog_name, is_retprobe) \ + do \ + { \ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts, \ + .retprobe = is_retprobe, \ + .func_name = #sym_name); \ + skel->links.prog_name = bpf_program__attach_uprobe_opts( \ + skel->progs.prog_name, \ + -1, \ + object, \ + 0, \ + &uprobe_opts); \ + } while (false) + +#define __CHECK_PROGRAM(skel, prog_name) \ + do \ + { \ + if (!skel->links.prog_name) \ + { \ + fprintf(stderr, "[%s] no program attached for" #prog_name "\n", strerror(errno)); \ + return -errno; \ + } \ + } while (false) + +#define __ATTACH_UPROBE_CHECKED(skel, sym_name, prog_name, is_retprobe) \ + do \ + { \ + __ATTACH_UPROBE(skel, sym_name, prog_name, is_retprobe); \ + __CHECK_PROGRAM(skel, prog_name); \ + } while (false) + +#define ATTACH_UPROBE(skel, sym_name, prog_name) __ATTACH_UPROBE(skel, sym_name, prog_name, false) +#define ATTACH_URETPROBE(skel, sym_name, prog_name) __ATTACH_UPROBE(skel, sym_name, prog_name, true) + +#define ATTACH_UPROBE_CHECKED(skel, sym_name, prog_name) __ATTACH_UPROBE_CHECKED(skel, sym_name, prog_name, false) +#define ATTACH_URETPROBE_CHECKED(skel, sym_name, prog_name) __ATTACH_UPROBE_CHECKED(skel, sym_name, prog_name, true) + +#define CHECK_ERR(cond, info) \ + if (cond) \ + { \ + fprintf(stderr, "[%s]" info "\n", strerror(errno)); \ + return -1; \ + } + +#define warn(...) fprintf(stderr, __VA_ARGS__) + + +#endif // CPU_WATCHER_HELPER_H \ No newline at end of file diff --git a/MagicEyes/src/cpu_watcher/test/Makefile b/MagicEyes/src/cpu_watcher/test/Makefile new file mode 100644 index 000000000..8e5da9b71 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/test/Makefile @@ -0,0 +1,21 @@ +CC = gcc +CFLAGS = -Wall -Wextra +LDFLAGS = -lrt + +.PHONY: all clean + +all: test_cpuwatcher sender receiver + +sender: mq_test_sender.c + $(CC) $(CFLAGS) -o sender mq_test_sender.c $(LDFLAGS) + +receiver: mq_test_receiver.c + $(CC) $(CFLAGS) -o receiver mq_test_receiver.c $(LDFLAGS) + +test_cpuwatcher: test_cpuwatcher.c + $(CC) $(CFLAGS) -o test_cpuwatcher test_cpuwatcher.c + +clean: + rm -f test_cpuwatcher sender receiver + # 清除 stress-ng 生成的临时文件 + rm -rf /tmp-stress-ng* diff --git a/MagicEyes/src/cpu_watcher/test/mq_test_receiver.c b/MagicEyes/src/cpu_watcher/test/mq_test_receiver.c new file mode 100644 index 000000000..240202514 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/test/mq_test_receiver.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include +#include + +#define QUEUE_NAME "/test_queue" +#define MSG_SIZE 50 + +int main() { + mqd_t mq; + char msg_buffer[MSG_SIZE]; + unsigned int priority; + + // 打开消息队列 + mq = mq_open(QUEUE_NAME, O_RDONLY); + if (mq == (mqd_t)-1) { + perror("mq_open"); + exit(1); + } + + // 接收消息 + while (1) { + if (mq_receive(mq, msg_buffer, MSG_SIZE, &priority) == -1) { + perror("mq_receive"); + break; + } + printf("Received: %s\n", msg_buffer); + } + + // 关闭消息队列 + mq_close(mq); + + return 0; +} diff --git a/MagicEyes/src/cpu_watcher/test/mq_test_sender.c b/MagicEyes/src/cpu_watcher/test/mq_test_sender.c new file mode 100644 index 000000000..43c66e55b --- /dev/null +++ b/MagicEyes/src/cpu_watcher/test/mq_test_sender.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include +#include + +#define QUEUE_NAME "/test_queue" +#define MSG_SIZE 50 +#define MAX_MSGS 10 + +int main() { + mqd_t mq; + struct mq_attr attr; + char msg_buffer[MSG_SIZE]; + unsigned int priority = 1; + int i; + + // 设置消息队列属性 + attr.mq_flags = 0; + attr.mq_maxmsg = MAX_MSGS; + attr.mq_msgsize = MSG_SIZE; + attr.mq_curmsgs = 0; + + // 创建或打开消息队列 + mq = mq_open(QUEUE_NAME, O_CREAT | O_WRONLY, 0644, &attr); + if (mq == (mqd_t)-1) { + perror("mq_open"); + exit(1); + } + + // 发送消息 + for (i = 0;i<60 ; i++) { + sprintf(msg_buffer, "Message %d", i); + if (mq_send(mq, msg_buffer, strlen(msg_buffer) + 1, priority) == -1) { + perror("mq_send"); + break; + } + printf("Sent: %s\n", msg_buffer); + sleep(1); + } + + // 关闭消息队列 + mq_close(mq); + mq_unlink(QUEUE_NAME); + + return 0; +} diff --git a/MagicEyes/src/cpu_watcher/test/test_cpuwatcher.c b/MagicEyes/src/cpu_watcher/test/test_cpuwatcher.c new file mode 100644 index 000000000..59bb2dcb6 --- /dev/null +++ b/MagicEyes/src/cpu_watcher/test/test_cpuwatcher.c @@ -0,0 +1,213 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define gettid() syscall(__NR_gettid) + +static struct env { + bool sar_test; + bool cs_delay_test; + bool sc_delay_test; + bool mq_delay_test; + bool preempt_test; + bool schedule_test; + bool mutrace_test; +} env = { + .sar_test = false, + .cs_delay_test = false, + .sc_delay_test = false, + .mq_delay_test = false, + .preempt_test = false, + .schedule_test = false, + .mutrace_test = false, +}; + +const char argp_program_doc[] ="To test cpu_watcher.\n"; + +static const struct argp_option opts[] = { + { "sar", 's', NULL, 0, "To test sar", 0 }, + { "cs_delay", 'c', NULL, 0, "To test cs_delay", 0 }, + { "sc_delay", 'S', NULL, 0, "To test sc_delay", 0 }, + { "mq_delay", 'm', NULL, 0, "To test mq_delay", 0 }, + { "preempt_delay", 'p', NULL, 0, "To test preempt_delay", 0 }, + { "schedule_delay", 'd', NULL, 0, "To test schedule_delay", 0 }, + { "mu_trace", 'x', NULL, 0, "To test mutrace", 0 }, + { "all", 'a', NULL, 0, "To test all", 0 }, + { NULL, 'h', NULL, OPTION_HIDDEN, "show the full help", 0 }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + (void)arg; + switch (key) { + case 'a': + env.sar_test = true; + env.cs_delay_test = true; + env.mq_delay_test = true; + env.preempt_test = true; + env.sc_delay_test = true; + env.schedule_test = true; + break; + case 's': + env.sar_test = true; + break; + case 'c': + env.cs_delay_test = true; + break; + case 'S': + env.sc_delay_test = true; + break; + case 'm': + env.mq_delay_test = true; + break; + case 'p': + env.preempt_test = true; + break; + case 'd': + env.schedule_test = true; + break; + case 'x': + env.mutrace_test = true; + break; + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +void *schedule_stress_test(void *arg) { + (void)arg; + while (1) { + sched_yield(); // 调度函数 + } + return NULL; +} + +void start_schedule_stress_test(int num_threads) { + pthread_t *threads = malloc(num_threads * sizeof(pthread_t)); + for (int i = 0; i < num_threads; i++) { + pthread_create(&threads[i], NULL, schedule_stress_test, NULL); + } + for (int i = 0; i < num_threads; i++) { + pthread_join(threads[i], NULL); + } + free(threads); +} + +void *func(void *arg) +{ + (void)arg; + int tpid; + tpid = gettid(); + printf("新线程pid:%d,睡眠3s后退出\n",tpid); + sleep(3); + printf("新线程退出\n"); + return NULL; +} + +void input_pid() { + int stop; + int pid = getpid(); + printf("test_proc进程的PID:【%d】\n", pid); + printf("输入任意数字继续程序的运行:"); + scanf("%d", &stop); // 使用时将其取消注释 + printf("程序开始执行...\n"); + printf("\n"); +} + +int main(int argc, char **argv){ + int err; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + if(env.sar_test){ + printf("SAR_TEST----------------------------------------------\n"); + //SAR功能测试逻辑:系统上执行混合压力测试,包括4个顺序读写硬盘线程、4个IO操作线程,持续15秒,观察加压前后的变化。 + char *argvv[] = { "/usr/bin/stress-ng", "--hdd", "4", "--hdd-opts", "wr-seq,rd-seq", "--io", "4", "--timeout", "15s", "--metrics-brief", NULL }; + char *envp[] = { "PATH=/bin", NULL }; + printf("SAR功能测试逻辑:系统上执行混合压力测试,包括4个顺序读写硬盘线程、4个IO操作线程和4个UDP网络操作线程,持续15秒,观察加压前后的变化\n"); + printf("执行指令 stress-ng --hdd 4 --hdd-opts wr-seq,rd-seq --io 4 --udp 4 --timeout 15s --metrics-brief\n"); + execve("/usr/bin/stress-ng", argvv, envp); + perror("execve"); + printf("\n"); + } + + if(env.cs_delay_test){ + printf("CS_DELAY_TEST----------------------------------------------\n"); + //CS_DELAY功能测试逻辑:无限循环的线程函数,不断调用 sched_yield() 来放弃 CPU 使用权,模拟高调度负载。 + start_schedule_stress_test(10); // 创建10个线程进行调度压力测试 + } + + if(env.sc_delay_test){ + printf("SC_DELAY_TEST----------------------------------------------\n"); + //SC_DELAY功能测试逻辑:创建多个系统调用,观察其变化 + const int num_iterations = 1000000; // 系统调用的迭代次数 + for (int i = 0; i < num_iterations; i++) { + getpid(); // 获取进程ID + getppid(); // 获取父进程ID + time(NULL); // 获取当前时间 + syscall(SYS_gettid); // 获取线程ID + } + printf("系统调用压力测试完成。\n"); + } + + if(env.mq_delay_test){ + /*mq_delay的测试代码*/ + input_pid(); // 在mq_delay_test中调用 + system("./sender & ./receiver"); + sleep(60); + system("^Z"); + } + + if(env.preempt_test){ + printf("PREEMPT_TEST----------------------------------------------\n"); + //PREEMPT功能测试逻辑:无限循环的线程函数,不断调用 sched_yield() 来放弃 CPU 使用权,模拟高调度负载。 + start_schedule_stress_test(10); // 创建10个线程进行调度压力测试 + } + + if(env.schedule_test){ + printf("SCHEDULE_TEST----------------------------------------------\n"); + // 调度延迟测试逻辑:创建线程执行 sysbench --threads=32 --time=10 cpu run,观察加压前后的变化 + char *argvv[] = { "/usr/bin/sysbench", "--threads=32", "--time=10", "cpu", "run", NULL }; + char *envp[] = { "PATH=/bin", NULL }; + printf("调度延迟测试逻辑:\n"); + printf("执行指令 sysbench --threads=32 --time=10 cpu run\n"); + execve("/usr/bin/sysbench", argvv, envp); + perror("execve"); + printf("\n"); + } + + if(env.mutrace_test){ + printf("MUTRACE_TEST----------------------------------------------\n"); + //MUTRACE功能测试逻辑:系统上执行混合压力测试,包括4个顺序读写硬盘线程、4个IO操作线程,持续15秒,观察加压前后的变化。 + char *argvv[] = { "/usr/bin/stress-ng", "--hdd", "4", "--hdd-opts", "wr-seq,rd-seq", "--io", "4", "--timeout", "15s", "--metrics-brief", NULL }; + char *envp[] = { "PATH=/bin", NULL }; + printf("MUTRACE功能测试逻辑:系统上执行混合压力测试,包括4个顺序读写硬盘线程、4个IO操作线程和4个UDP网络操作线程,持续15秒,观察加压前后的变化\n"); + printf("执行指令 stress-ng --hdd 4 --hdd-opts wr-seq,rd-seq --io 4 --udp 4 --timeout 15s --metrics-brief\n"); + execve("/usr/bin/stress-ng", argvv, envp); + perror("execve"); + printf("\n"); + } + + return 0; +}