From 0f3c45b165017653eaa3fb00294f464ac80aca58 Mon Sep 17 00:00:00 2001 From: gaoyixiang1 <1739037263@qq.com> Date: Thu, 18 Jan 2024 18:36:53 +0800 Subject: [PATCH 01/24] =?UTF-8?q?C++=E7=AC=A6=E5=8F=B7=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Stack_Analyser/libbpf/stack_analyzer.cc | 609 +++++++++++------- 1 file changed, 377 insertions(+), 232 deletions(-) diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc b/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc index e36129134..6c4a761c5 100644 --- a/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #include "rapidjson/document.h" #include "rapidjson/filewritestream.h" @@ -29,7 +32,9 @@ #include "symbol.h" #include "clipp.h" -extern "C" { + +extern "C" +{ #include #include #include @@ -49,15 +54,41 @@ extern "C" { // 模板用来统一调用多个类有同样但未被抽象的接口 // 虚函数用来规范接口来被统一调用 -class StackCollector { + + +std::string demangle(const char *symbol) { + size_t size = 0; + int status = 0; + char *demangled = abi::__cxa_demangle(symbol, NULL, &size, &status); + + if (status == 0 && demangled != NULL) { + std::string result; + for(size_t i=0;i *sortedCountList(void) { - if (value_fd < 0) { + std::vector *sortedCountList(void) + { + if (value_fd < 0) + { return NULL; } std::vector *D = new std::vector(); - for (psid prev = {0}, id; !bpf_map_get_next_key(value_fd, &prev, &id); prev = id) { + for (psid prev = {0}, id; !bpf_map_get_next_key(value_fd, &prev, &id); prev = id) + { bpf_map_lookup_elem(value_fd, &id, data_buf); CountItem d(id.pid, id.ksid, id.usid, data_value()); D->insert(std::lower_bound(D->begin(), D->end(), d), d); @@ -93,8 +128,8 @@ class StackCollector { int comm_fd = -1; // pid-进程名表的文件描述符 int trace_fd = -1; // 栈id-栈轨迹表的文件描述符 - void *data_buf = NULL; // 用于存储单个指标值的缓冲区 - + void *data_buf = NULL; // 用于存储单个指标值的缓冲区 + bool showDelta = true; /// @brief 将缓冲区的数据解析为特定值 @@ -107,31 +142,33 @@ class StackCollector { /// @return 字符串 virtual std::string data_str(uint64_t f) { return "value:" + std::to_string(f); }; - #define declareEBPF(eBPFName) \ +#define declareEBPF(eBPFName) \ struct eBPFName *skel = NULL; public: - std::string name; // 标识类名 + std::string name; // 标识类名 + + int pid = -1; // 用于设置ebpf程序跟踪的pid + int cpu = -1; // 用于设置ebpf程序跟踪的cpu + int err = 0; // 用于保存错误代码 - int pid = -1; // 用于设置ebpf程序跟踪的pid - int cpu = -1; // 用于设置ebpf程序跟踪的cpu - int err = 0; // 用于保存错误代码 - - bool ustack = true; // 是否跟踪用户栈 - bool kstack = true; // 是否跟踪内核栈 - uint64_t min = 0; - uint64_t max = __UINT64_MAX__; // 设置采集指标最大值,最小值 + bool ustack = true; // 是否跟踪用户栈 + bool kstack = true; // 是否跟踪内核栈 + uint64_t min = 0; + uint64_t max = __UINT64_MAX__; // 设置采集指标最大值,最小值 - bool clear = false; // 清除已输出的指标积累量 + bool clear = false; // 清除已输出的指标积累量 int self_pid; - StackCollector() { + StackCollector() + { self_pid = getpid(); data_buf = new uint64_t(0); }; - virtual ~StackCollector() { + virtual ~StackCollector() + { delete (uint64_t *)data_buf; }; @@ -139,52 +176,60 @@ class StackCollector { /// @param 无 /// @return 成功则返回0,否则返回负数 virtual int load(void) = 0; - #define defaultLoad \ - int load(void) override { \ - StackProgLoadOpen( \ - skel->bss->apid = pid \ - ); \ - return 0; \ +#define defaultLoad \ + int load(void) override \ + { \ + StackProgLoadOpen( \ + skel->bss->apid = pid); \ + return 0; \ }; /// @brief 将ebpf程序挂载到跟踪点上 /// @param 无 /// @return 成功则返回0,否则返回负数 virtual int attach(void) = 0; - #define defaultAttach \ - int attach(void) override { \ - err = skel->attach(skel); \ - CHECK_ERR(err, "Failed to attach BPF skeleton");\ - return 0; \ +#define defaultAttach \ + int attach(void) override \ + { \ + err = skel->attach(skel); \ + CHECK_ERR(err, "Failed to attach BPF skeleton"); \ + return 0; \ }; /// @brief 断开ebpf的跟踪点和处理函数间的连接 /// @param 无 virtual void detach(void) = 0; - #define defaultDetach \ - void detach(void) override {\ - if (skel) { \ - skel->detach(skel); \ - } \ +#define defaultDetach \ + void detach(void) override \ + { \ + if (skel) \ + { \ + skel->detach(skel); \ + } \ }; /// @brief 卸载ebpf程序 /// @param 无 virtual void unload(void) = 0; - #define defaultUnload \ - void unload(void) override {\ - if (skel) { \ - skel->destroy(skel);\ - } \ - skel = NULL; \ +#define defaultUnload \ + void unload(void) override \ + { \ + if (skel) \ + { \ + skel->destroy(skel); \ + } \ + skel = NULL; \ }; /// @brief 清除count map的数据 /// @param 无 - void check_clear_count(void) { - if(!showDelta) return; + void check_clear_count(void) + { + if (!showDelta) + return; uint c = MAX_ENTRIES; - for (psid prev = {0}, id; c && !bpf_map_get_next_key(value_fd, &prev, &id); c--, prev = id) { + for (psid prev = {0}, id; c && !bpf_map_get_next_key(value_fd, &prev, &id); c--, prev = id) + { bpf_map_delete_elem(value_fd, &id); } } @@ -194,7 +239,8 @@ class StackCollector { void print_list(void) { auto D = sortedCountList(); - for (auto id : *D) { + for (auto id : *D) + { printf("pid:%-6d\tusid:%-6d\tksid:%-6d\t%s\n", id.pid, id.usid, id.ksid, data_str(id.val).c_str()); } delete D; @@ -210,11 +256,12 @@ class StackCollector { CHECK_ERR_VALUE(comm_fd < 0, nullptr, "comm map open failure"); // std::filebuf DataFileBuf; // const std::string DataFileName = name + "_stack_data.log"; - // CHECK_ERR(DataFileBuf.open(DataFileName, std::ios::app) == nullptr, "data file open failed"); + // CHECK_ERR(DataFileBuf.open(DataFileName, std::ios::app) == nullptr, "data file open failed"); // std::ostream DataText(&DataFileBuf); auto DataTextP = new std::ostringstream(); auto &DataText = *DataTextP; - for (psid prev = {}, id; !bpf_map_get_next_key(value_fd, &prev, &id); prev = id) { + for (psid prev = {}, id; !bpf_map_get_next_key(value_fd, &prev, &id); prev = id) + { { char cmd[COMM_LEN]; bpf_map_lookup_elem(comm_fd, &id.pid, cmd); @@ -222,26 +269,49 @@ class StackCollector { } symbol sym; uint64_t ip[MAX_STACKS]; - if (id.usid >= 0) { + if (id.usid >= 0) + { bpf_map_lookup_elem(trace_fd, &id.usid, ip); std::string *s = 0, symbol; elf_file file; - uint64_t *p = ip + MAX_STACKS -1; - for(; !*p; p--); - for (; p >= ip; p--) { + uint64_t *p = ip + MAX_STACKS - 1; + for (; !*p; p--) + ; + for (; p >= ip; p--) + { uint64_t &addr = *p; sym.reset(addr); - if (g_symbol_parser.find_symbol_in_cache(id.pid, addr, symbol)) { + if (g_symbol_parser.find_symbol_in_cache(id.pid, addr, symbol)) + { s = &symbol; + if ((*s)[0] == '_' && (*s)[1] == 'Z')//代表是C++符号,则调用demangle解析 + { + *s = demangle(symbol.c_str()); + } DataText << *s << ';'; - } else if (g_symbol_parser.get_symbol_info(id.pid, sym, file) && - g_symbol_parser.find_elf_symbol(sym, file, id.pid, id.pid)) { + } + else if (g_symbol_parser.get_symbol_info(id.pid, sym, file) && + g_symbol_parser.find_elf_symbol(sym, file, id.pid, id.pid)) + { std::stringstream ss(""); ss << "+0x" << std::hex << (addr - sym.ip); + if (sym.name[0] == '_' && sym.name[1] == 'Z')//代表是C++符号,则调用demangle解析 + { + sym.name = demangle(sym.name.c_str()); + } + // int sym_len = sym.name.length(); + // if (sym.name[sym_len-1] == ')' && sym.name[sym_len-2] == '(')//代表函数名加了括号 + // { + // DataText << ""; + // }else{ + // sym.name+="()"; + // } sym.name += ss.str(); DataText << sym.name << ';'; g_symbol_parser.putin_symbol_cache(id.pid, addr, sym.name); - } else { + } + else + { std::stringstream ss(""); ss << "0x" << std::hex << addr; auto addr_str = ss.str(); @@ -249,20 +319,39 @@ class StackCollector { g_symbol_parser.putin_symbol_cache(id.pid, addr, addr_str); } } - } else { + } + else + { DataText << "[MISSING USER STACK];"; } DataText << "---------;"; - if (id.ksid >= 0) { + if (id.ksid >= 0) + { bpf_map_lookup_elem(trace_fd, &id.ksid, ip); uint64_t *p = ip + MAX_STACKS - 1; - for(; !*p; p--); - for (; p >= ip; p--) { + for (; !*p; p--) + ; + for (; p >= ip; p--) + { uint64_t &addr = *p; sym.reset(addr); - if (g_symbol_parser.find_kernel_symbol(sym)) { + if (g_symbol_parser.find_kernel_symbol(sym)) + { + if (sym.name[0] == '_' && sym.name[1] == 'Z')//代表是C++符号,则调用demangle解析 + { + sym.name = demangle(sym.name.c_str()); + } + // int sym_len = sym.name.length(); + // if (sym.name[sym_len-1] == ')' && sym.name[sym_len-2] == '(')//代表函数名加了括号 + // { + // DataText << ""; + // }else{ + // sym.name+="()"; + // } DataText << sym.name << ';'; - } else { + } + else + { std::stringstream ss(""); ss << "0x" << std::hex << addr; auto addr_str = ss.str(); @@ -270,7 +359,9 @@ class StackCollector { g_symbol_parser.putin_symbol_cache(pid, addr, addr_str); } } - } else { + } + else + { DataText << "[MISSING KERNEL STACK];"; } bpf_map_lookup_elem(value_fd, &id, data_buf); @@ -280,7 +371,8 @@ class StackCollector { } }; -class OnCPUStackCollector : public StackCollector { +class OnCPUStackCollector : public StackCollector +{ private: declareEBPF(on_cpu_count_bpf); const char *online_cpus_file = "/sys/devices/system/cpu/online"; @@ -292,7 +384,8 @@ class OnCPUStackCollector : public StackCollector { public: unsigned long long freq = 49; - OnCPUStackCollector() { + OnCPUStackCollector() + { name = "on_cpu"; err = parse_cpu_mask_file(online_cpus_file, &online_mask, &num_online_cpus); CHECK_ERR_EXIT(err, "Fail to get online CPU numbers"); @@ -302,19 +395,19 @@ class OnCPUStackCollector : public StackCollector { std::string data_str(uint64_t f) override { return "counts:" + std::to_string(f); }; - int load(void) override { + int load(void) override + { FILE *fp = popen("cat /proc/kallsyms | grep \" avenrun\"", "r"); CHECK_ERR(!fp, "Failed to draw flame graph"); unsigned long *load_a; fscanf(fp, "%p", &load_a); pclose(fp); StackProgLoadOpen( - skel->bss->load_a = load_a - ) - return 0; + skel->bss->load_a = load_a) return 0; }; - int attach(void) override { + int attach(void) override + { attr = { .type = PERF_TYPE_SOFTWARE, // hardware event can't be used .size = sizeof(attr), @@ -324,13 +417,16 @@ class OnCPUStackCollector : public StackCollector { .freq = 1, // use freq instead of period }; pefds = (int *)malloc(num_cpus * sizeof(int)); - for (int i = 0; i < num_cpus; i++) { + for (int i = 0; i < num_cpus; i++) + { pefds[i] = -1; } links = (struct bpf_link **)calloc(num_cpus, sizeof(struct bpf_link *)); - for (int cpu = 0; cpu < num_cpus; cpu++) { + for (int cpu = 0; cpu < num_cpus; cpu++) + { /* skip offline/not present CPUs */ - if (cpu >= num_online_cpus || !online_mask[cpu]) { + if (cpu >= num_online_cpus || !online_mask[cpu]) + { continue; } /* Set up performance monitoring on a CPU/Core */ @@ -344,17 +440,23 @@ class OnCPUStackCollector : public StackCollector { return 0; } - void detach(void) override { - if (links) { - for (int cpu = 0; cpu < num_cpus; cpu++) { + void detach(void) override + { + if (links) + { + for (int cpu = 0; cpu < num_cpus; cpu++) + { bpf_link__destroy(links[cpu]); } free(links); links = NULL; } - if (pefds) { - for (int i = 0; i < num_cpus; i++) { - if (pefds[i] >= 0) { + if (pefds) + { + for (int i = 0; i < num_cpus; i++) + { + if (pefds[i] >= 0) + { close(pefds[i]); } } @@ -366,20 +468,24 @@ class OnCPUStackCollector : public StackCollector { defaultUnload; }; -class OffCPUStackCollector : public StackCollector{ +class OffCPUStackCollector : public StackCollector +{ private: declareEBPF(off_cpu_count_bpf); + protected: std::string data_str(uint64_t f) override { return "time(ms):" + std::to_string(f); }; defaultLoad; defaultAttach; defaultDetach; defaultUnload; + public: OffCPUStackCollector() { name = "off-cpu"; }; }; -class MemoryStackCollector : public StackCollector { +class MemoryStackCollector : public StackCollector +{ private: declareEBPF(mem_count_bpf); @@ -388,15 +494,22 @@ class MemoryStackCollector : public StackCollector { public: char *object = (char *)"libc.so.6"; - - MemoryStackCollector() { kstack = false; name = "memory"; showDelta = false; }; - int load(void) override { + MemoryStackCollector() + { + kstack = false; + name = "memory"; + showDelta = false; + }; + + int load(void) override + { StackProgLoadOpen(); return 0; }; - int attach(void) override { + int attach(void) override + { ATTACH_UPROBE_CHECKED(skel, malloc, malloc_enter); ATTACH_URETPROBE_CHECKED(skel, malloc, malloc_exit); ATTACH_UPROBE_CHECKED(skel, calloc, calloc_enter); @@ -414,12 +527,14 @@ class MemoryStackCollector : public StackCollector { return 0; }; - void detach(void) override { + void detach(void) override + { skel->detach(skel); - #define destoryBPFLinkIfExist(name) \ - if(skel->links.name) { \ - bpf_link__destroy(skel->links.name); \ - } +#define destoryBPFLinkIfExist(name) \ + if (skel->links.name) \ + { \ + bpf_link__destroy(skel->links.name); \ + } destoryBPFLinkIfExist(malloc_enter); destoryBPFLinkIfExist(malloc_exit); destoryBPFLinkIfExist(calloc_enter); @@ -433,21 +548,25 @@ class MemoryStackCollector : public StackCollector { }; defaultUnload; - }; -class IOStackCollector : public StackCollector { +class IOStackCollector : public StackCollector +{ private: declareEBPF(io_count_bpf); + protected: - std::string data_str(uint64_t f) override { + std::string data_str(uint64_t f) override + { const std::string IOScale[] = {"counts", "size(B)", "aver(B/1)"}; return IOScale[DataType] + ":" + std::to_string(f); }; - uint64_t data_value() override { + uint64_t data_value() override + { io_tuple *p = (io_tuple *)data_buf; - switch (DataType) { + switch (DataType) + { case AVE: return p->size / p->count; case SIZE: @@ -462,33 +581,37 @@ class IOStackCollector : public StackCollector { public: io_mod DataType = io_mod::COUNT; - IOStackCollector() { + IOStackCollector() + { delete (uint64_t *)data_buf; data_buf = new io_tuple{0}; name = "io"; }; - - ~IOStackCollector() override { + ~IOStackCollector() override + { delete (io_tuple *)data_buf; }; defaultLoad; defaultAttach; defaultDetach; - defaultUnload; - + defaultUnload; }; -class ReadaheadStackCollector : public StackCollector { +class ReadaheadStackCollector : public StackCollector +{ private: declareEBPF(pre_count_bpf); + protected: - std::string data_str(uint64_t f) override { - return "rest_pages:" + std::to_string(f); + std::string data_str(uint64_t f) override + { + return "rest_pages:" + std::to_string(f); }; - uint64_t data_value() override { + uint64_t data_value() override + { ra_tuple *p = (ra_tuple *)data_buf; return p->expect - p->truth; }; @@ -499,121 +622,124 @@ class ReadaheadStackCollector : public StackCollector { defaultDetach; defaultUnload; - ReadaheadStackCollector() { + ReadaheadStackCollector() + { delete (uint64_t *)data_buf; data_buf = new ra_tuple{0}; name = "readahead"; showDelta = false; }; - ~ReadaheadStackCollector() override { + ~ReadaheadStackCollector() override + { delete (ra_tuple *)data_buf; } }; -namespace MainConfig { - int run_time = __INT_MAX__; // 运行时间 - unsigned delay = 5; // 设置输出间隔 - display_t d_mode = display_t::NO_OUTPUT; // 设置显示模式 +namespace MainConfig +{ + int run_time = __INT_MAX__; // 运行时间 + unsigned delay = 5; // 设置输出间隔 + display_t d_mode = display_t::NO_OUTPUT; // 设置显示模式 std::string command = ""; int32_t target_pid = -1; std::string server_address = "127.0.0.1:12345"; }; -std::vector StackCollectorList; -void endCollect(void) { +std::vector StackCollectorList; +void endCollect(void) +{ signal(SIGINT, SIG_IGN); - for(auto Item : StackCollectorList) { - if(MainConfig::run_time > 0) { + for (auto Item : StackCollectorList) + { + if (MainConfig::run_time > 0) + { Item->format(); } Item->detach(); Item->unload(); } - if (MainConfig::command.length()) { + if (MainConfig::command.length()) + { kill(MainConfig::target_pid, SIGTERM); } } uint64_t optbuff; -int main(int argc, char *argv[]) { - auto MainOption = ( - ( - ((clipp::option("-p", "--pid") & clipp::value("pid of sampled process, default -1 for all", MainConfig::target_pid)) % "set pid of process to monitor") | - ((clipp::option("-c", "--command") & clipp::value("to be sampled command to run, default none", MainConfig::command)) % "set command for monitoring the whole life") - ), - (clipp::option("-d", "--delay") & clipp::value("delay time(seconds) to output, default 5", MainConfig::delay)) % "set the interval to output", - clipp::option("-l", "--realtime-list").set(MainConfig::d_mode, LIST_OUTPUT) % "output in console, default false", - clipp::option("-t", "--timeout") & clipp::value("run time, default nearly infinite", MainConfig::run_time) % "set the total simpling time", - clipp::option("-s", "--server") & clipp::value("server address, default 127.0.0.1:12345", MainConfig::server_address) % "set the server address" - ); - - auto SubOption = ( - clipp::option("-U", "--user-stack-only").call([]{ - StackCollectorList.back()->kstack = false; - }) % "only sample user stacks", - clipp::option("-K", "--kernel-stack-only").call([]{ - StackCollectorList.back()->ustack = false; - }) % "only sample kernel stacks", - (clipp::option("-m", "--max-value") & clipp::value("max threshold of sampled value", optbuff).call([]{ - StackCollectorList.back()->max = optbuff; - })) % "set the max threshold of sampled value", - (clipp::option("-n", "--min-value") & clipp::value("min threshold of sampled value", optbuff).call([]{ - StackCollectorList.back()->min = optbuff; - })) % "set the min threshold of sampled value" - ); - - auto OnCpuOption = clipp::option("on-cpu").call([]{ - StackCollectorList.push_back(new OnCPUStackCollector()); - }) % "sample the call stacks of on-cpu processes" & ( - clipp::option("-F", "--frequency") & clipp::value("sampling frequency", optbuff).call([]{ - static_cast(StackCollectorList.back())->freq = optbuff; - }) % "sampling at a set frequency", - SubOption - ); - - auto OffCpuOption = clipp::option("off-cpu").call([]{ - StackCollectorList.push_back(new OffCPUStackCollector()); - }) % "sample the call stacks of off-cpu processes" & SubOption; - - auto MemoryOption = clipp::option("mem").call([]{ - StackCollectorList.push_back(new MemoryStackCollector()); - }) % "sample the memory usage of call stacks" & SubOption; - - auto IOOption = clipp::option("io").call([]{ - StackCollectorList.push_back(new IOStackCollector()); - }) % "sample the IO data volume of call stacks" & ( - (clipp::option("--mod") & ( - clipp::option("count").call([]{ - static_cast(StackCollectorList.back())->DataType = COUNT; - }) % "Counting the number of I/O operations" | - clipp::option("ave").call([]{ - static_cast(StackCollectorList.back())->DataType = AVE; - }) % "Counting the ave of I/O operations" | - clipp::option("size").call([]{ - static_cast(StackCollectorList.back())->DataType = SIZE; - }) % "Counting the size of I/O operations" - )) % "set the statistic mod", - SubOption - ); - - auto ReadaheadOption = clipp::option("ra").call([]{ - StackCollectorList.push_back(new ReadaheadStackCollector()); - }) % "sample the readahead hit rate of call stacks" & SubOption; - - auto cli = ( - MainOption, - clipp::option("-v", "--version").call([] { - std::cout << "verion 2.0\n\n"; - }) % "show version", - OnCpuOption, - OffCpuOption, - MemoryOption, - IOOption, - ReadaheadOption - ) % "statistic call trace relate with some metrics"; - - if (!clipp::parse(argc, argv, cli)) { +int main(int argc, char *argv[]) +{ + auto MainOption = (( + ((clipp::option("-p", "--pid") & clipp::value("pid of sampled process, default -1 for all", MainConfig::target_pid)) % "set pid of process to monitor") | + ((clipp::option("-c", "--command") & clipp::value("to be sampled command to run, default none", MainConfig::command)) % "set command for monitoring the whole life")), + (clipp::option("-d", "--delay") & clipp::value("delay time(seconds) to output, default 5", MainConfig::delay)) % "set the interval to output", + clipp::option("-l", "--realtime-list").set(MainConfig::d_mode, LIST_OUTPUT) % "output in console, default false", + clipp::option("-t", "--timeout") & clipp::value("run time, default nearly infinite", MainConfig::run_time) % "set the total simpling time", + clipp::option("-s", "--server") & clipp::value("server address, default 127.0.0.1:12345", MainConfig::server_address) % "set the server address"); + + auto SubOption = (clipp::option("-U", "--user-stack-only").call([] + { StackCollectorList.back()->kstack = false; }) % + "only sample user stacks", + clipp::option("-K", "--kernel-stack-only").call([] + { StackCollectorList.back()->ustack = false; }) % + "only sample kernel stacks", + (clipp::option("-m", "--max-value") & clipp::value("max threshold of sampled value", optbuff).call([] + { StackCollectorList.back()->max = optbuff; })) % + "set the max threshold of sampled value", + (clipp::option("-n", "--min-value") & clipp::value("min threshold of sampled value", optbuff).call([] + { StackCollectorList.back()->min = optbuff; })) % + "set the min threshold of sampled value"); + + auto OnCpuOption = clipp::option("on-cpu").call([] + { StackCollectorList.push_back(new OnCPUStackCollector()); }) % + "sample the call stacks of on-cpu processes" & + (clipp::option("-F", "--frequency") & clipp::value("sampling frequency", optbuff).call([] + { static_cast(StackCollectorList.back())->freq = optbuff; }) % + "sampling at a set frequency", + SubOption); + + auto OffCpuOption = clipp::option("off-cpu").call([] + { StackCollectorList.push_back(new OffCPUStackCollector()); }) % + "sample the call stacks of off-cpu processes" & + SubOption; + + auto MemoryOption = clipp::option("mem").call([] + { StackCollectorList.push_back(new MemoryStackCollector()); }) % + "sample the memory usage of call stacks" & + SubOption; + + auto IOOption = clipp::option("io").call([] + { StackCollectorList.push_back(new IOStackCollector()); }) % + "sample the IO data volume of call stacks" & + ((clipp::option("--mod") & (clipp::option("count").call([] + { static_cast(StackCollectorList.back())->DataType = COUNT; }) % + "Counting the number of I/O operations" | + clipp::option("ave").call([] + { static_cast(StackCollectorList.back())->DataType = AVE; }) % + "Counting the ave of I/O operations" | + clipp::option("size").call([] + { static_cast(StackCollectorList.back())->DataType = SIZE; }) % + "Counting the size of I/O operations")) % + "set the statistic mod", + SubOption); + + auto ReadaheadOption = clipp::option("ra").call([] + { StackCollectorList.push_back(new ReadaheadStackCollector()); }) % + "sample the readahead hit rate of call stacks" & + SubOption; + + auto cli = (MainOption, + clipp::option("-v", "--version").call([] + { std::cout << "verion 2.0\n\n"; }) % + "show version", + OnCpuOption, + OffCpuOption, + MemoryOption, + IOOption, + ReadaheadOption) % + "statistic call trace relate with some metrics"; + + if (!clipp::parse(argc, argv, cli)) + { std::cout << clipp::make_man_page(cli, argv[0]) << '\n'; return 0; } @@ -621,32 +747,42 @@ int main(int argc, char *argv[]) { uint64_t eventbuff = 1; int child_exec_event_fd = eventfd(0, EFD_CLOEXEC); CHECK_ERR(child_exec_event_fd < 0, "failed to create event fd"); - if(MainConfig::command.length()) { + if (MainConfig::command.length()) + { MainConfig::target_pid = fork(); - switch(MainConfig::target_pid) { - case -1: { - std::cout << "command create failed." << std::endl; - return -1; - } case 0: { - const auto bytes = read(child_exec_event_fd, &eventbuff, sizeof(eventbuff)); - CHECK_ERR( bytes < 0, "failed to read from fd %ld", bytes) - else CHECK_ERR(bytes != sizeof(eventbuff), "read unexpected size %ld", bytes); - printf("child exec %s\n", MainConfig::command.c_str()); - CHECK_ERR_EXIT(execl("/bin/bash", "bash", "-c", MainConfig::command.c_str(), NULL), "failed to execute child command"); - break; - } default: { - printf("create child %d\n", MainConfig::target_pid); - break; - } + switch (MainConfig::target_pid) + { + case -1: + { + std::cout << "command create failed." << std::endl; + return -1; + } + case 0: + { + const auto bytes = read(child_exec_event_fd, &eventbuff, sizeof(eventbuff)); + CHECK_ERR(bytes < 0, "failed to read from fd %ld", bytes) + else CHECK_ERR(bytes != sizeof(eventbuff), "read unexpected size %ld", bytes); + printf("child exec %s\n", MainConfig::command.c_str()); + CHECK_ERR_EXIT(execl("/bin/bash", "bash", "-c", MainConfig::command.c_str(), NULL), "failed to execute child command"); + break; + } + default: + { + printf("create child %d\n", MainConfig::target_pid); + break; + } } } - for(auto Item = StackCollectorList.begin(); Item != StackCollectorList.end(); ) { + for (auto Item = StackCollectorList.begin(); Item != StackCollectorList.end();) + { (*Item)->pid = MainConfig::target_pid; - if ((*Item)->load()) { + if ((*Item)->load()) + { goto err; } - if ((*Item)->attach()) { + if ((*Item)->attach()) + { goto err; } Item++; @@ -658,26 +794,31 @@ int main(int argc, char *argv[]) { Item = StackCollectorList.erase(Item); } - if(MainConfig::command.length()) { + if (MainConfig::command.length()) + { printf("wake up child\n"); write(child_exec_event_fd, &eventbuff, sizeof(eventbuff)); } printf("display mode: %d\n", MainConfig::d_mode); - // 创建 socket + // 创建 socket bool ToRemote = true; int clientSocket = socket(AF_INET, SOCK_STREAM, 0); - if (clientSocket == -1) { + if (clientSocket == -1) + { std::cerr << "Error creating socket" << std::endl; // return -1; ToRemote = false; - } else { + } + else + { // 服务器地址信息 sockaddr_in serverAddress; serverAddress.sin_family = AF_INET; auto ColonPos = MainConfig::server_address.find(':'); - if(ColonPos < 0) { + if (ColonPos < 0) + { std::cerr << "server address err" << std::endl; return 0; } @@ -686,7 +827,8 @@ int main(int argc, char *argv[]) { serverAddress.sin_port = htons(std::stoi(PortAddr)); inet_pton(AF_INET, IPAddr.c_str(), &serverAddress.sin_addr); // 连接到服务器 - if (connect(clientSocket, (struct sockaddr*)&serverAddress, sizeof(serverAddress)) == -1) { + if (connect(clientSocket, (struct sockaddr *)&serverAddress, sizeof(serverAddress)) == -1) + { std::cerr << "Error connecting to server" << std::endl; close(clientSocket); // return -1; @@ -694,32 +836,36 @@ int main(int argc, char *argv[]) { } } - - for(; MainConfig::run_time > 0 && (MainConfig::target_pid < 0 || !kill(MainConfig::target_pid, 0)); MainConfig::run_time -= MainConfig::delay) { - sleep(MainConfig::delay); // 模拟实时性 + for (; MainConfig::run_time > 0 && (MainConfig::target_pid < 0 || !kill(MainConfig::target_pid, 0)); MainConfig::run_time -= MainConfig::delay) + { + sleep(MainConfig::delay); // 模拟实时性 time_t timep; ::time(&timep); printf("%s", ctime(&timep)); - for(auto Item : StackCollectorList) { + for (auto Item : StackCollectorList) + { Item->detach(); // if(MainConfig::d_mode == display_t::LIST_OUTPUT) { // Item->print_list(); // } auto StreamData = Item->format(); - if(!StreamData) { + if (!StreamData) + { continue; } auto dataToSend = StreamData->str(); - if(ToRemote) { + if (ToRemote) + { // 发送数据到服务器 struct diy_header AHeader = { - .len = dataToSend.size() - }; + .len = dataToSend.size()}; strcpy(AHeader.name, Item->name.c_str()); send(clientSocket, &AHeader, sizeof(AHeader), 0); send(clientSocket, dataToSend.c_str(), AHeader.len, 0); - } else { + } + else + { Item->print_list(); std::ofstream fout; fout.open(Item->name + "_stack_data.txt", std::ios::out | std::ios::app); @@ -730,7 +876,6 @@ int main(int argc, char *argv[]) { Item->attach(); } - } // 关闭连接 close(clientSocket); From 7335b077579030d4344556ff994dabc1e5571fea Mon Sep 17 00:00:00 2001 From: gaoyixiang1 <1739037263@qq.com> Date: Wed, 24 Jan 2024 19:42:42 +0800 Subject: [PATCH 02/24] fix conflicts --- .../Stack_Analyser/libbpf/stack_analyzer.cc | 569 ++++-------------- 1 file changed, 130 insertions(+), 439 deletions(-) diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc b/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc index 92c9b7263..b6dc50f2b 100644 --- a/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc @@ -20,16 +20,11 @@ #include #include #include -#include -#include #include -#include -#include #include "symbol.h" #include "clipp.h" - extern "C" { #include @@ -46,7 +41,39 @@ extern "C" #include "bpf/pre_count.skel.h" } -std::string GetLocalDateTime(void) { +std::string demangleCppSym(std::string symbol) +{ + size_t size = 0; + int status = 0; + char *demangled = abi::__cxa_demangle(symbol.c_str(), NULL, &size, &status); + + if (status == 0 && demangled != NULL) + // 去除参数列表及括号 + { + char *func_name = demangled; + for (auto i = size - 1; i; i--) + { + if (demangled[i] == ' ') + { + for (char *p = demangled + i; *p; p++) + { + *p = p[1]; + } + } + } + std::string FuncName(func_name); + free(demangled); + return FuncName; + } + else + // 解码失败,返回原始符号 + { + return symbol; + } +} + +std::string getLocalDateTime(void) +{ auto t = time(NULL); auto localTm = localtime(&t); char buff[32]; @@ -57,29 +84,6 @@ std::string GetLocalDateTime(void) { // 模板用来统一调用多个类有同样但未被抽象的接口 // 虚函数用来规范接口来被统一调用 - - -std::string demangle(const char *symbol) { - size_t size = 0; - int status = 0; - char *demangled = abi::__cxa_demangle(symbol, NULL, &size, &status); - - if (status == 0 && demangled != NULL) { - std::string result; - for(size_t i=0;i *D = new std::vector(); - for (psid prev = {0}, id; !bpf_map_get_next_key(value_fd, &prev, &id); prev = id) - { - bpf_map_lookup_elem(value_fd, &id, data_buf); - CountItem d(id.pid, id.ksid, id.usid, data_value()); - auto keys = new psid[MAX_ENTRIES]; - auto vals = new char[MAX_ENTRIES*count_size]; + auto vals = new char[MAX_ENTRIES * count_size]; uint32_t count = MAX_ENTRIES; psid next_key; int err; - if(showDelta) { + if (showDelta) + { err = bpf_map_lookup_and_delete_batch(value_fd, NULL, &next_key, keys, vals, &count, NULL); - } else { + } + else + { err = bpf_map_lookup_batch(value_fd, NULL, &next_key, keys, vals, &count, NULL); } - if(err == EFAULT) { + if (err == EFAULT) + { return NULL; } auto D = new std::vector(); - for(uint32_t i = 0; i < count; i++) { - CountItem d(keys[i].pid, keys[i].ksid, keys[i].usid, data_value(vals + count_size*i)); + for (uint32_t i = 0; i < count; i++) + { + CountItem d(keys[i].pid, keys[i].ksid, keys[i].usid, data_value(vals + count_size * i)); D->insert(std::lower_bound(D->begin(), D->end(), d), d); } delete[] keys; @@ -152,8 +154,6 @@ class StackCollector int comm_fd = -1; // pid-进程名表的文件描述符 int trace_fd = -1; // 栈id-栈轨迹表的文件描述符 - void *data_buf = NULL; // 用于存储单个指标值的缓冲区 - size_t count_size = sizeof(uint32_t); bool showDelta = true; @@ -161,12 +161,12 @@ class StackCollector /// @brief 将缓冲区的数据解析为特定值 /// @param 无 /// @return 解析出的值 - virtual uint64_t data_value(void *data) { return *(uint32_t *)data; }; + virtual double data_value(void *data) { return *(uint32_t *)data; }; /// @brief 为特定值添加注解 /// @param f 特定值 /// @return 字符串 - virtual std::string data_str(uint64_t f) = 0; + virtual std::string data_str(void) = 0; #define declareEBPF(eBPFName) \ struct eBPFName *skel = NULL; @@ -190,14 +190,6 @@ class StackCollector StackCollector() { self_pid = getpid(); - - data_buf = new uint64_t(0); - }; - - virtual ~StackCollector() - { - delete (uint64_t *)data_buf; - }; /// @brief 负责ebpf程序的加载、参数设置和打开操作 @@ -249,198 +241,74 @@ class StackCollector skel = NULL; \ }; - - /// @brief 清除count map的数据 - /// @param 无 - void check_clear_count(void) - { - if (!showDelta) - return; - uint c = MAX_ENTRIES; - for (psid prev = {0}, id; c && !bpf_map_get_next_key(value_fd, &prev, &id); c--, prev = id) - { - bpf_map_delete_elem(value_fd, &id); - } - } - - /// @brief 打印count列表 - /// @param 无 - void print_list(void) - { - auto D = sortedCountList(); - for (auto id : *D) - { - printf("pid:%-6d\tusid:%-6d\tksid:%-6d\t%s\n", id.pid, id.usid, id.ksid, data_str(id.val).c_str()); - } - delete D; - } - - /// @brief 将表中的栈数据保存为火焰图 - /// @param 无 - /// @return 表未成功打开则返回负数 - std::ostringstream *format(void) + operator std::string() { - CHECK_ERR_VALUE(value_fd < 0, nullptr, "count map open failure"); - CHECK_ERR_VALUE(trace_fd < 0, nullptr, "trace map open failure"); - CHECK_ERR_VALUE(comm_fd < 0, nullptr, "comm map open failure"); - // std::filebuf DataFileBuf; - // const std::string DataFileName = name + "_stack_data.log"; - // CHECK_ERR(DataFileBuf.open(DataFileName, std::ios::app) == nullptr, "data file open failed"); - // std::ostream DataText(&DataFileBuf); - auto DataTextP = new std::ostringstream(); - auto &DataText = *DataTextP; - for (psid prev = {}, id; !bpf_map_get_next_key(value_fd, &prev, &id); prev = id) - { - { - char cmd[COMM_LEN]; - bpf_map_lookup_elem(comm_fd, &id.pid, cmd); - DataText << std::string(cmd) << ':' << std::to_string(id.pid) << ';'; - } - symbol sym; - uint64_t ip[MAX_STACKS]; - if (id.usid >= 0) - { - bpf_map_lookup_elem(trace_fd, &id.usid, ip); - std::string *s = 0, symbol; - elf_file file; - uint64_t *p = ip + MAX_STACKS - 1; - for (; !*p; p--) - ; - for (; p >= ip; p--) - { - uint64_t &addr = *p; - sym.reset(addr); - if (g_symbol_parser.find_symbol_in_cache(id.pid, addr, symbol)) - { - s = &symbol; - if ((*s)[0] == '_' && (*s)[1] == 'Z')//代表是C++符号,则调用demangle解析 - { - *s = demangle(symbol.c_str()); - } - DataText << *s << ';'; - } - else if (g_symbol_parser.get_symbol_info(id.pid, sym, file) && - g_symbol_parser.find_elf_symbol(sym, file, id.pid, id.pid)) - { - std::stringstream ss(""); - ss << "+0x" << std::hex << (addr - sym.ip); - if (sym.name[0] == '_' && sym.name[1] == 'Z')//代表是C++符号,则调用demangle解析 - { - sym.name = demangle(sym.name.c_str()); - } - // int sym_len = sym.name.length(); - // if (sym.name[sym_len-1] == ')' && sym.name[sym_len-2] == '(')//代表函数名加了括号 - // { - // DataText << ""; - // }else{ - // sym.name+="()"; - // } - sym.name += ss.str(); - DataText << sym.name << ';'; - g_symbol_parser.putin_symbol_cache(id.pid, addr, sym.name); - } - else - { - std::stringstream ss(""); - ss << "0x" << std::hex << addr; - auto addr_str = ss.str(); - DataText << addr_str << ';'; - g_symbol_parser.putin_symbol_cache(id.pid, addr, addr_str); - } - } - } - else - { - DataText << "[MISSING USER STACK];"; - } - DataText << "---------;"; - if (id.ksid >= 0) - { - bpf_map_lookup_elem(trace_fd, &id.ksid, ip); - uint64_t *p = ip + MAX_STACKS - 1; - for (; !*p; p--) - ; - for (; p >= ip; p--) - { - uint64_t &addr = *p; - sym.reset(addr); - if (g_symbol_parser.find_kernel_symbol(sym)) - { - if (sym.name[0] == '_' && sym.name[1] == 'Z')//代表是C++符号,则调用demangle解析 - { - sym.name = demangle(sym.name.c_str()); - } - // int sym_len = sym.name.length(); - // if (sym.name[sym_len-1] == ')' && sym.name[sym_len-2] == '(')//代表函数名加了括号 - // { - // DataText << ""; - // }else{ - // sym.name+="()"; - // } - DataText << sym.name << ';'; - } - else - { - std::stringstream ss(""); - ss << "0x" << std::hex << addr; - auto addr_str = ss.str(); - DataText << addr_str << ';'; - g_symbol_parser.putin_symbol_cache(pid, addr, addr_str); - } - } - } - else - { - DataText << "[MISSING KERNEL STACK];"; - - operator std::string() { std::ostringstream oss; - oss << "time:"; { - oss << GetLocalDateTime() << '\n'; + oss << "time:"; + { + oss << getLocalDateTime() << '\n'; } std::map> traces; - oss << "counts:\n"; { + oss << "counts:\n"; + { auto D = sortedCountList(); - if(!D) return oss.str(); - oss << "pid\tusid\tksid\t" << data_str(1).c_str() << '\n'; + if (!D) + return oss.str(); + oss << "pid\tusid\tksid\t" << data_str() << '\n'; uint64_t trace[MAX_STACKS], *p; - for (auto id : *D) { + for (auto id : *D) + { oss << id.pid << '\t' << id.usid << '\t' << id.ksid << '\t' << id.val << '\n'; - if(id.usid > 0 && traces.find(id.usid) == traces.end()) { + if (id.usid > 0 && traces.find(id.usid) == traces.end()) + { bpf_map_lookup_elem(trace_fd, &id.usid, trace); - for(p = trace + MAX_STACKS - 1; !*p; p--); - for (; p >= trace; p--) { + for (p = trace + MAX_STACKS - 1; !*p; p--) + ; + for (; p >= trace; p--) + { uint64_t &addr = *p; symbol sym; sym.reset(addr); elf_file file; - std::string symbol; - if (g_symbol_parser.find_symbol_in_cache(id.pid, addr, symbol)); + if (g_symbol_parser.find_symbol_in_cache(id.pid, addr, sym.name)) + ; else if (g_symbol_parser.get_symbol_info(id.pid, sym, file) && - g_symbol_parser.find_elf_symbol(sym, file, id.pid, id.pid)) { + g_symbol_parser.find_elf_symbol(sym, file, id.pid, id.pid)) + { + if (sym.name[0] == '_' && sym.name[1] == 'Z') + // 代表是C++符号,则调用demangle解析 + { + sym.name = demangleCppSym(sym.name); + } std::stringstream ss(""); ss << "+0x" << std::hex << (addr - sym.ip); sym.name += ss.str(); - symbol = sym.name; g_symbol_parser.putin_symbol_cache(id.pid, addr, sym.name); - } else { + } + else + { std::stringstream ss(""); ss << "0x" << std::hex << addr; - symbol = ss.str(); - g_symbol_parser.putin_symbol_cache(id.pid, addr, symbol); + sym.name = ss.str(); + g_symbol_parser.putin_symbol_cache(id.pid, addr, sym.name); } - traces[id.usid].push_back(symbol); + traces[id.usid].push_back(sym.name); } } - if(id.ksid > 0 && traces.find(id.ksid) == traces.end()) { + if (id.ksid > 0 && traces.find(id.ksid) == traces.end()) + { bpf_map_lookup_elem(trace_fd, &id.ksid, trace); - for(p = trace + MAX_STACKS - 1; !*p; p--); - for (; p >= trace; p--) { + for (p = trace + MAX_STACKS - 1; !*p; p--) + ; + for (; p >= trace; p--) + { uint64_t &addr = *p; symbol sym; sym.reset(addr); - if (g_symbol_parser.find_kernel_symbol(sym)); - else { + if (g_symbol_parser.find_kernel_symbol(sym)) + ; + else + { std::stringstream ss(""); ss << "0x" << std::hex << addr; sym.name = ss.str(); @@ -449,22 +317,26 @@ class StackCollector traces[id.ksid].push_back(sym.name); } } - } delete D; } - oss << "traces:\n"; { + oss << "traces:\n"; + { oss << "sid\ttrace\n"; - for(auto i : traces) { + for (auto i : traces) + { oss << i.first << "\t"; - for(auto s : i.second) { - oss << s << ','; + for (auto s : i.second) + { + oss << s << ';'; } oss << "\b \n"; } } - oss << "groups:\n"; { - if(tgid_fd < 0) { + oss << "groups:\n"; + { + if (tgid_fd < 0) + { return oss.str(); } auto keys = new uint32_t[MAX_ENTRIES]; @@ -472,18 +344,22 @@ class StackCollector uint32_t count = MAX_ENTRIES; uint32_t next_key; int err = bpf_map_lookup_batch(tgid_fd, NULL, &next_key, keys, vals, &count, NULL); - if(err == EFAULT) { + if (err == EFAULT) + { return oss.str(); } oss << "pid\ttgid\n"; - for(uint32_t i = 0; i < count; i++) { + for (uint32_t i = 0; i < count; i++) + { oss << keys[i] << '\t' << vals[i] << '\n'; } delete[] keys; delete[] vals; } - oss << "commands:\n"; { - if(comm_fd < 0) { + oss << "commands:\n"; + { + if (comm_fd < 0) + { return oss.str(); } auto keys = new uint32_t[MAX_ENTRIES]; @@ -491,11 +367,13 @@ class StackCollector uint32_t count = MAX_ENTRIES; uint32_t next_key; int err = bpf_map_lookup_batch(comm_fd, NULL, &next_key, keys, vals, &count, NULL); - if(err == EFAULT) { + if (err == EFAULT) + { return oss.str(); } oss << "pid\tcommand\n"; - for(uint32_t i = 0; i < count; i++) { + for (uint32_t i = 0; i < count; i++) + { oss << keys[i] << '\t' << vals[i] << '\n'; } delete[] keys; @@ -504,7 +382,6 @@ class StackCollector oss << "OK\n"; return oss.str(); } - }; class OnCPUStackCollector : public StackCollector @@ -529,7 +406,8 @@ class OnCPUStackCollector : public StackCollector CHECK_ERR_EXIT(num_cpus <= 0, "Fail to get the number of processors"); }; - std::string data_str(uint64_t f) override { return std::to_string(f) + "Count:" + std::to_string(freq) + "HZ:5s"; }; + double data_value(void *data) override { return 1. * *(uint32_t *)data * 1000 / freq; } + std::string data_str(void) override { return "ThisTimeOnCpu/ms"; }; int load(void) override { @@ -610,7 +488,7 @@ class OffCPUStackCollector : public StackCollector declareEBPF(off_cpu_count_bpf); protected: - std::string data_str(uint64_t f) override { return std::to_string(f) + "ms:5s"; }; + std::string data_str(void) override { return "OffCpuThisTime/ms"; }; defaultLoad; defaultAttach; defaultDetach; @@ -626,7 +504,7 @@ class MemoryStackCollector : public StackCollector declareEBPF(mem_count_bpf); protected: - std::string data_str(uint64_t f) override { return std::to_string(f) + "LeakByte"; }; + std::string data_str(void) override { return "LeakMomery/Byte"; }; public: char *object = (char *)"libc.so.6"; @@ -692,28 +570,19 @@ class IOStackCollector : public StackCollector declareEBPF(io_count_bpf); protected: - std::string data_str(uint64_t f) override + std::string data_str(void) override { - const std::string IOScale[] = {"counts", "size(B)", "aver(B/1)"}; - return IOScale[DataType] + ":" + std::to_string(f); + static const std::string IOScale[] = {"IOCountThisTime/1", "IOSizeThisTime/Byte", "AverageIOSizeThisTime/Byte"}; + return IOScale[DataType]; }; - uint64_t data_value() override + double data_value(void *data) override { - io_tuple *p = (io_tuple *)data_buf; + io_tuple *p = (io_tuple *)data; switch (DataType) { - - std::string data_str(uint64_t f) override { - const std::string IOScale[] = {"Count", "Byte", "Byte:Count"}; - return std::to_string(f) + IOScale[DataType] + ":5s"; - }; - - uint64_t data_value(void *data) override { - io_tuple *p = (io_tuple *)data; - switch (DataType) { case AVE: - return p->size / p->count; + return 1. * p->size / p->count; case SIZE: return p->size; case COUNT: @@ -728,17 +597,6 @@ class IOStackCollector : public StackCollector IOStackCollector() { - delete (uint64_t *)data_buf; - data_buf = new io_tuple{0}; - name = "io"; - }; - - ~IOStackCollector() override - { - delete (io_tuple *)data_buf; - }; - - IOStackCollector() { count_size = sizeof(io_tuple); name = "io"; }; @@ -755,19 +613,13 @@ class ReadaheadStackCollector : public StackCollector declareEBPF(pre_count_bpf); protected: - std::string data_str(uint64_t f) override + std::string data_str(void) override { - return "rest_pages:" + std::to_string(f); + return "TotalUnusedReadaheadPages/Page"; }; - uint64_t data_value() override + double data_value(void *data) override { - ra_tuple *p = (ra_tuple *)data_buf; - std::string data_str(uint64_t f) override { - return std::to_string(f) + "UnusedPage"; - }; - - uint64_t data_value(void *data) override { ra_tuple *p = (ra_tuple *)data; return p->expect - p->truth; }; @@ -780,18 +632,10 @@ class ReadaheadStackCollector : public StackCollector ReadaheadStackCollector() { - delete (uint64_t *)data_buf; - data_buf = new ra_tuple{0}; - ReadaheadStackCollector() { name = "readahead"; count_size = sizeof(ra_tuple); showDelta = false; }; - - ~ReadaheadStackCollector() override - { - delete (ra_tuple *)data_buf; - } }; namespace MainConfig @@ -811,9 +655,6 @@ void endCollect(void) { if (MainConfig::run_time > 0) { - Item->format(); - for(auto Item : StackCollectorList) { - if(MainConfig::run_time > 0) { std::cout << std::string(*Item) << std::endl; } Item->detach(); @@ -833,8 +674,7 @@ int main(int argc, char *argv[]) ((clipp::option("-c", "--command") & clipp::value("to be sampled command to run, default none", MainConfig::command)) % "set command for monitoring the whole life")), (clipp::option("-d", "--delay") & clipp::value("delay time(seconds) to output, default 5", MainConfig::delay)) % "set the interval to output", clipp::option("-l", "--realtime-list").set(MainConfig::d_mode, LIST_OUTPUT) % "output in console, default false", - clipp::option("-t", "--timeout") & clipp::value("run time, default nearly infinite", MainConfig::run_time) % "set the total simpling time", - clipp::option("-s", "--server") & clipp::value("server address, default 127.0.0.1:12345", MainConfig::server_address) % "set the server address"); + clipp::option("-t", "--timeout") & clipp::value("run time, default nearly infinite", MainConfig::run_time) % "set the total simpling time"); auto SubOption = (clipp::option("-U", "--user-stack-only").call([] { StackCollectorList.back()->kstack = false; }) % @@ -900,83 +740,6 @@ int main(int argc, char *argv[]) if (!clipp::parse(argc, argv, cli)) { -int main(int argc, char *argv[]) { - auto MainOption = ( - ( - ((clipp::option("-p", "--pid") & clipp::value("pid of sampled process, default -1 for all", MainConfig::target_pid)) % "set pid of process to monitor") | - ((clipp::option("-c", "--command") & clipp::value("to be sampled command to run, default none", MainConfig::command)) % "set command for monitoring the whole life") - ), - (clipp::option("-d", "--delay") & clipp::value("delay time(seconds) to output, default 5", MainConfig::delay)) % "set the interval to output", - clipp::option("-l", "--realtime-list").set(MainConfig::d_mode, LIST_OUTPUT) % "output in console, default false", - clipp::option("-t", "--timeout") & clipp::value("run time, default nearly infinite", MainConfig::run_time) % "set the total simpling time" - ); - - auto SubOption = ( - clipp::option("-U", "--user-stack-only").call([]{ - StackCollectorList.back()->kstack = false; - }) % "only sample user stacks", - clipp::option("-K", "--kernel-stack-only").call([]{ - StackCollectorList.back()->ustack = false; - }) % "only sample kernel stacks", - (clipp::option("-m", "--max-value") & clipp::value("max threshold of sampled value", optbuff).call([]{ - StackCollectorList.back()->max = optbuff; - })) % "set the max threshold of sampled value", - (clipp::option("-n", "--min-value") & clipp::value("min threshold of sampled value", optbuff).call([]{ - StackCollectorList.back()->min = optbuff; - })) % "set the min threshold of sampled value" - ); - - auto OnCpuOption = clipp::option("on-cpu").call([]{ - StackCollectorList.push_back(new OnCPUStackCollector()); - }) % "sample the call stacks of on-cpu processes" & ( - clipp::option("-F", "--frequency") & clipp::value("sampling frequency", optbuff).call([]{ - static_cast(StackCollectorList.back())->freq = optbuff; - }) % "sampling at a set frequency", - SubOption - ); - - auto OffCpuOption = clipp::option("off-cpu").call([]{ - StackCollectorList.push_back(new OffCPUStackCollector()); - }) % "sample the call stacks of off-cpu processes" & SubOption; - - auto MemoryOption = clipp::option("mem").call([]{ - StackCollectorList.push_back(new MemoryStackCollector()); - }) % "sample the memory usage of call stacks" & SubOption; - - auto IOOption = clipp::option("io").call([]{ - StackCollectorList.push_back(new IOStackCollector()); - }) % "sample the IO data volume of call stacks" & ( - (clipp::option("--mod") & ( - clipp::option("count").call([]{ - static_cast(StackCollectorList.back())->DataType = COUNT; - }) % "Counting the number of I/O operations" | - clipp::option("ave").call([]{ - static_cast(StackCollectorList.back())->DataType = AVE; - }) % "Counting the ave of I/O operations" | - clipp::option("size").call([]{ - static_cast(StackCollectorList.back())->DataType = SIZE; - }) % "Counting the size of I/O operations" - )) % "set the statistic mod", - SubOption - ); - - auto ReadaheadOption = clipp::option("ra").call([]{ - StackCollectorList.push_back(new ReadaheadStackCollector()); - }) % "sample the readahead hit rate of call stacks" & SubOption; - - auto cli = ( - MainOption, - clipp::option("-v", "--version").call([] { - std::cout << "verion 2.0\n\n"; - }) % "show version", - OnCpuOption, - OffCpuOption, - MemoryOption, - IOOption, - ReadaheadOption - ) % "statistic call trace relate with some metrics"; - - if (!clipp::parse(argc, argv, cli)) { std::cout << clipp::make_man_page(cli, argv[0]) << '\n'; return 0; } @@ -1037,90 +800,18 @@ int main(int argc, char *argv[]) { write(child_exec_event_fd, &eventbuff, sizeof(eventbuff)); } - printf("display mode: %d\n", MainConfig::d_mode); - - // 创建 socket - bool ToRemote = true; - int clientSocket = socket(AF_INET, SOCK_STREAM, 0); - if (clientSocket == -1) - { - std::cerr << "Error creating socket" << std::endl; - // return -1; - ToRemote = false; - } - else - { - // 服务器地址信息 - sockaddr_in serverAddress; - serverAddress.sin_family = AF_INET; - auto ColonPos = MainConfig::server_address.find(':'); - if (ColonPos < 0) - { - std::cerr << "server address err" << std::endl; - return 0; - } - auto IPAddr = MainConfig::server_address.substr(0, ColonPos); - auto PortAddr = MainConfig::server_address.substr(ColonPos + 1); - serverAddress.sin_port = htons(std::stoi(PortAddr)); - inet_pton(AF_INET, IPAddr.c_str(), &serverAddress.sin_addr); - // 连接到服务器 - if (connect(clientSocket, (struct sockaddr *)&serverAddress, sizeof(serverAddress)) == -1) - { - std::cerr << "Error connecting to server" << std::endl; - close(clientSocket); - // return -1; - ToRemote = false; - } - } + // printf("display mode: %d\n", MainConfig::d_mode); for (; MainConfig::run_time > 0 && (MainConfig::target_pid < 0 || !kill(MainConfig::target_pid, 0)); MainConfig::run_time -= MainConfig::delay) { - sleep(MainConfig::delay); // 模拟实时性 - time_t timep; - ::time(&timep); - printf("%s", ctime(&timep)); - + sleep(MainConfig::delay); for (auto Item : StackCollectorList) { - Item->detach(); - // if(MainConfig::d_mode == display_t::LIST_OUTPUT) { - // Item->print_list(); - // } - auto StreamData = Item->format(); - if (!StreamData) - { - continue; - } - auto dataToSend = StreamData->str(); - if (ToRemote) - { - // 发送数据到服务器 - struct diy_header AHeader = { - .len = dataToSend.size()}; - strcpy(AHeader.name, Item->name.c_str()); - send(clientSocket, &AHeader, sizeof(AHeader), 0); - send(clientSocket, dataToSend.c_str(), AHeader.len, 0); - } - else - { - Item->print_list(); - std::ofstream fout; - fout.open(Item->name + "_stack_data.txt", std::ios::out | std::ios::app); - fout << dataToSend; - } - delete StreamData; - Item->check_clear_count(); - - // printf("display mode: %d\n", MainConfig::d_mode); - - for(; MainConfig::run_time > 0 && (MainConfig::target_pid < 0 || !kill(MainConfig::target_pid, 0)); MainConfig::run_time -= MainConfig::delay) { - sleep(MainConfig::delay); - for(auto Item : StackCollectorList) { Item->detach(); std::cout << std::string(*Item) << std::endl; Item->attach(); } } - + atexit(endCollect); } \ No newline at end of file From 7a38410ee90e3c433cde1a8a30a07ead26a88758 Mon Sep 17 00:00:00 2001 From: gaoyixiang1 <1739037263@qq.com> Date: Sat, 27 Jan 2024 09:50:15 +0800 Subject: [PATCH 03/24] add stack_count and improve doc --- .../Stack_Analyser/libbpf/Makefile | 2 +- .../Stack_Analyser/libbpf/include/sa_common.h | 6 +++ .../Stack_Analyser/libbpf/include/sa_user.h | 9 +++- .../Stack_Analyser/libbpf/stack_analyzer.cc | 48 +++++++++++++++++-- ...77\347\224\250\346\226\271\346\263\225.md" | 6 +-- 5 files changed, 61 insertions(+), 10 deletions(-) diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/Makefile b/eBPF_Supermarket/Stack_Analyser/libbpf/Makefile index 6553fff2f..5561e438f 100644 --- a/eBPF_Supermarket/Stack_Analyser/libbpf/Makefile +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/Makefile @@ -38,7 +38,7 @@ INCLUDES := -I$(OUTPUT) -I./libbpf-bootstrap/libbpf/include/uapi -I$(dir $(VMLIN CFLAGS := -g -Wall ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) -APPS = on_cpu_count off_cpu_count mem_count io_count pre_count +APPS = on_cpu_count off_cpu_count mem_count io_count pre_count stack_count SYMBOL = elf symbol TARGETS = stack_analyzer diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_common.h b/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_common.h index a142fa261..b15ac747c 100644 --- a/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_common.h +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_common.h @@ -46,4 +46,10 @@ typedef struct { __u64 size; } io_tuple; +typedef struct { + __u32 tgid; + comm name; + __u64 count; +} stack_tuple; + #endif \ No newline at end of file diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_user.h b/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_user.h index 4e0ffdd27..7d51562da 100644 --- a/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_user.h +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/include/sa_user.h @@ -41,6 +41,7 @@ typedef enum { MOD_MEM, // 内存模式 MOD_IO, // io模式 MOD_RA, // 预读取分析模式 + MOD_STACK, //调用栈次数统计模式 MOD_NUM // 该枚举类值的总数 } StackCollectMode; @@ -49,7 +50,8 @@ char StackCollectModeName[MOD_NUM][16] = { "off_cpu", "memory", "io", - "readahead" + "readahead", + "stackcount", }; typedef enum { @@ -63,6 +65,11 @@ typedef enum { AVE } io_mod; +typedef enum { + COUNTS, + CPU +} stack_mod; + /// @brief 获取epbf程序中指定表的文件描述符 /// @param name 表的名字 #define OPEN_MAP(name) bpf_map__fd(skel->maps.name) diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc b/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc index b6dc50f2b..aa28e612f 100644 --- a/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/stack_analyzer.cc @@ -39,6 +39,7 @@ extern "C" #include "bpf/mem_count.skel.h" #include "bpf/io_count.skel.h" #include "bpf/pre_count.skel.h" +#include "bpf/stack_count.skel.h" } std::string demangleCppSym(std::string symbol) @@ -96,6 +97,7 @@ class StackCollector double val; CountItem(int32_t p, int32_t k, int32_t u, double v) { + pid = p; ksid = k; usid = u; @@ -173,7 +175,7 @@ class StackCollector public: std::string name; // 标识类名 - + int pid = -1; // 用于设置ebpf程序跟踪的pid int cpu = -1; // 用于设置ebpf程序跟踪的cpu int err = 0; // 用于保存错误代码 @@ -184,7 +186,6 @@ class StackCollector uint64_t max = __UINT64_MAX__; // 设置采集指标最大值,最小值 bool clear = false; // 清除已输出的指标积累量 - int self_pid; StackCollector() @@ -384,6 +385,7 @@ class StackCollector } }; + class OnCPUStackCollector : public StackCollector { private: @@ -417,7 +419,7 @@ class OnCPUStackCollector : public StackCollector fscanf(fp, "%p", &load_a); pclose(fp); StackProgLoadOpen( - skel->bss->load_a = load_a) return 0; + skel->bss->load_a = load_a ) return 0; }; int attach(void) override @@ -638,6 +640,40 @@ class ReadaheadStackCollector : public StackCollector }; }; +class StackCountStackCollector : public StackCollector +{ +private: + declareEBPF(stack_count_bpf); + + +protected: + std::string data_str(void) override + { + return "Calling Counts"; + }; + double data_value(void *data) override + { + stack_tuple *p = (stack_tuple *)data; + return p->count; + }; + +public: + stack_mod DataType = stack_mod::COUNTS; + + StackCountStackCollector() + { + count_size = sizeof(stack_tuple); + name = "stackcount"; + }; + + defaultLoad; + defaultAttach; + defaultDetach; + defaultUnload; + +}; + + namespace MainConfig { int run_time = __INT_MAX__; // 运行时间 @@ -726,6 +762,9 @@ int main(int argc, char *argv[]) { StackCollectorList.push_back(new ReadaheadStackCollector()); }) % "sample the readahead hit rate of call stacks" & SubOption; + auto StackCountOption = clipp::option("stackcount").call([] + { StackCollectorList.push_back(new StackCountStackCollector()); }) % + "sample the counts of calling stacks" & SubOption; auto cli = (MainOption, clipp::option("-v", "--version").call([] @@ -735,7 +774,8 @@ int main(int argc, char *argv[]) OffCpuOption, MemoryOption, IOOption, - ReadaheadOption) % + ReadaheadOption, + StackCountOption) % "statistic call trace relate with some metrics"; if (!clipp::parse(argc, argv, cli)) diff --git "a/eBPF_Supermarket/Stack_Analyser/libbpf/\346\241\206\346\236\266\344\275\277\347\224\250\346\226\271\346\263\225.md" "b/eBPF_Supermarket/Stack_Analyser/libbpf/\346\241\206\346\236\266\344\275\277\347\224\250\346\226\271\346\263\225.md" index 96af9c47b..4c68a9bb6 100644 --- "a/eBPF_Supermarket/Stack_Analyser/libbpf/\346\241\206\346\236\266\344\275\277\347\224\250\346\226\271\346\263\225.md" +++ "b/eBPF_Supermarket/Stack_Analyser/libbpf/\346\241\206\346\236\266\344\275\277\347\224\250\346\226\271\346\263\225.md" @@ -8,7 +8,7 @@ # 用户侧 -1. 在 `include/stack_analyzer.h` 中的 `MOD` 枚举类型定义中新增一个功能模块的标识 +1. 在 `include/stack_analyzer.h` 中的 `MOD` 枚举类型定义中新增一个功能模块的标识,将该名称添加到`MOD_NUM`前面即可 2. 在 `stack_analyzer.cc` 中创建一个 `bpf_loader` 的子类,在其中重写以下函数: @@ -24,9 +24,7 @@ 6. 自定义eBPF程序清除函数 `void remove(void)`,将eBPF程序清除 -3. 在 main 函数中添加新增子命令和对应参数解析语句,将子命令解析为新增子功能对应的标识符并设置给 `env::mod` - -4. 在 `bpf_loader arr[]` 中添加 包装子类构造函数的匿名函数,添加顺序需和其在 MOD 枚举类型中对应的 标识 的顺序一致 +3. 在 main 函数中添加新增子命令和对应参数解析语句 # 编译侧 From 34680f24b021b052897215d88889058eef1fef1a Mon Sep 17 00:00:00 2001 From: gaoyixiang1 <45355878+gaoyixiang1@users.noreply.github.com> Date: Sat, 27 Jan 2024 10:28:36 +0800 Subject: [PATCH 04/24] add stack_count.bpf.c --- eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c | 1 + 1 file changed, 1 insertion(+) create mode 100644 eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c b/eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c new file mode 100644 index 000000000..b4785957b --- /dev/null +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c @@ -0,0 +1 @@ +s From 33188fd84f187eeda9e776ae8334c4c3f00becea Mon Sep 17 00:00:00 2001 From: gaoyixiang1 <45355878+gaoyixiang1@users.noreply.github.com> Date: Sat, 27 Jan 2024 10:31:16 +0800 Subject: [PATCH 05/24] Update stack_count.bpf.c --- .../libbpf/bpf/stack_count.bpf.c | 73 ++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c b/eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c index b4785957b..719c9cb24 100644 --- a/eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c +++ b/eBPF_Supermarket/Stack_Analyser/libbpf/bpf/stack_count.bpf.c @@ -1 +1,72 @@ -s +#include "vmlinux.h" +#include +#include +#include + + +#include "sa_ebpf.h" +#include "task.h" + +DeclareCommonMaps(stack_tuple); +DeclareCommonVar(); + +//传进来的参数 +int apid = 0; +// int acpu = 0; + +const char LICENSE[] SEC("license") = "GPL"; + +static int handle(struct trace_event_raw_sys_enter *ctx) +{ + struct task_struct* curr = (struct task_struct*)bpf_get_current_task(); //利用bpf_get_current_task()获得当前的进程tsk + ignoreKthread(curr); + // u32 cpu_id = bpf_get_smp_processor_id(); + // if(cpu_id != acpu){ + // return 0; + // } + stack_tuple key = {}; + u32 pid = get_task_ns_pid(curr); //利用帮助函数获得当前进程的pid + if ((apid >= 0 && pid != apid) || !pid || pid == self_pid) + return 0; + + u32 tgid = get_task_ns_tgid(curr); //利用帮助函数获取进程的tgid + bpf_map_update_elem(&pid_tgid, &pid, &tgid, BPF_ANY); //将pid_tgid表中的pid选项更新为tgid,若没有该表项,则创建 + comm *p = bpf_map_lookup_elem(&pid_comm, &pid); //p指向pid_comm哈希表中的pid表项对应的value + if (!p) //如果p不为空,获取当前进程名保存至name中,如果pid_comm当中不存在pid name项,则更新 + { + comm name; + bpf_get_current_comm(&name, COMM_LEN); + bpf_map_update_elem(&pid_comm, &pid, &name, BPF_NOEXIST); + p = &name; + } + key.name = *p; + u32 *t = bpf_map_lookup_elem(&pid_tgid, &pid); + if(!t){ + key.tgid = 0xffffffff; + }else{ + key.tgid = *t; + } + + psid apsid = { + .pid = pid, + .usid = u ? USER_STACK : -1, + .ksid = k ? KERNEL_STACK : -1, + }; + stack_tuple *d = bpf_map_lookup_elem(&psid_count, &apsid); //d指向psid_count表当中的apsid表项的值 + + if(!d) { + stack_tuple nd = {.count = 1, .name = key.name,.tgid = key.tgid}; + bpf_map_update_elem(&psid_count, &apsid, &nd, BPF_NOEXIST); + } else { + d->count++; + } + return 0; + +} + +#define io_sec_tp(name) \ + SEC("tp/syscalls/sys_enter_" #name) \ + int prog_t_##name(struct trace_event_raw_sys_enter *ctx) { return handle(ctx); } + +io_sec_tp(write); +io_sec_tp(read); From 2f3de07863ed132021a4c6e3ae7152eb81a02a6e Mon Sep 17 00:00:00 2001 From: zhangzihengya Date: Thu, 1 Feb 2024 17:36:29 +0800 Subject: [PATCH 06/24] add schedule_image --- .../CPU_Subsystem/eBPF_proc_image/Makefile | 2 +- .../CPU_Subsystem/eBPF_proc_image/README.md | 6 +- .../eBPF_proc_image/bpf/resource_image.bpf.c | 18 +- .../eBPF_proc_image/bpf/schedule_image.bpf.c | 178 ++++++++++++++++++ .../eBPF_proc_image/include/helpers.h | 3 +- .../eBPF_proc_image/include/proc_image.h | 14 ++ .../eBPF_proc_image/proc_image.c | 133 ++++++++++--- 7 files changed, 318 insertions(+), 36 deletions(-) create mode 100644 eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/schedule_image.bpf.c diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/Makefile b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/Makefile index a80365a6c..a7a74ab5c 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/Makefile +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/Makefile @@ -41,7 +41,7 @@ INCLUDES := -I$(OUTPUT) -I../libbpf/include/uapi -I$(LIBBLAZESYM_INC) -I./includ CFLAGS := -g -Wall ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) -APPS = resource_image lock_image syscall_image keytime_image +APPS = resource_image lock_image syscall_image keytime_image schedule_image TARGETS = proc_image # Get Clang's default includes on this system. We'll explicitly add these dirs diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/README.md b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/README.md index 9fd6a1ff6..65a9dd56b 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/README.md +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/README.md @@ -35,6 +35,7 @@ proc_image 工具的参数信息: | -l, --lock | 采集进程持有的用户态锁信息,包括用户态互斥锁、用户态读写锁(可持续开发) | | -q, --quote | 在参数周围添加引号(") | | -k, --keytime | 采集进程关键时间点的相关信息,包括execve、exit、fork、vfork、pthread_create | +| -S, --schedule | 采集进程的调度信息 | | -a, --all | 启动所有的采集进程数据的功能 | | -h, --help | 显示帮助信息 | @@ -44,10 +45,11 @@ tools文件夹中的eBPF程序是按照进程生命周期中数据的类型分 | 工具 | 描述 | | --------------- | ------------------------------- | -| lifecycle_image | 对进程上下CPU进行画像 | +| resource_image | 对进程的资源使用情况进行画像 | | lock_image | 对进程/线程持有锁的区间进行画像 | | keytime_image | 对进程的关键时间点进行画像 | -| newlife_image | 对新创建进程或线程进行画像 | +| syscall_image | 对进程的系统调用序列进行画像 | +| schedule_image | 对进程的调度信息进行画像 | ## 五、test_proc 测试程序 diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/resource_image.bpf.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/resource_image.bpf.c index af98ece6c..1f19c5a5d 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/resource_image.bpf.c +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/resource_image.bpf.c @@ -16,29 +16,29 @@ // // eBPF kernel-mode code that collects process resource usage -#include "vmlinux.h" +#include #include #include #include #include #include "proc_image.h" +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + const volatile pid_t target_pid = -1; const volatile int target_cpu_id = -1; const volatile pid_t ignore_tgid = -1; -char LICENSE[] SEC("license") = "Dual BSD/GPL"; - struct { __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 7000); + __uint(max_entries, 10240); __type(key, struct proc_id); __type(value, struct start_rsc); } start SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 7000); + __uint(max_entries, 10240); __type(key, struct proc_id); __type(value, struct total_rsc); } total SEC(".maps"); @@ -57,7 +57,7 @@ int kprobe__finish_task_switch(struct pt_regs *ctx) if(prev_tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && prev_pid==target_pid) || (target_pid==0 && prev_pid==target_pid && prev_cpu==target_cpu_id))){ - struct proc_id prev_pd = {0}; + struct proc_id prev_pd = {}; prev_pd.pid = prev_pid; if(prev_pid == 0) prev_pd.cpu_id = prev_cpu; @@ -68,7 +68,7 @@ int kprobe__finish_task_switch(struct pt_regs *ctx) } if(bpf_map_lookup_elem(&total,&prev_pd) == NULL){ - struct total_rsc prev_total = {0}; + struct total_rsc prev_total = {}; long unsigned int memused; // #if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 2, 0) @@ -128,8 +128,8 @@ int kprobe__finish_task_switch(struct pt_regs *ctx) if(next_tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && next_pid==target_pid) || (target_pid==0 && next_pid==target_pid && next_cpu==target_cpu_id))){ - struct proc_id next_pd = {0}; - struct start_rsc next_start={0}; + struct proc_id next_pd = {}; + struct start_rsc next_start={}; next_pd.pid = next_pid; if(next_pid == 0) next_pd.cpu_id = next_cpu; diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/schedule_image.bpf.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/schedule_image.bpf.c new file mode 100644 index 000000000..1ceea420b --- /dev/null +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/bpf/schedule_image.bpf.c @@ -0,0 +1,178 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: zhangziheng0525@163.com +// +// eBPF kernel-mode code that collects process schedule information + +#include +#include +#include +#include +#include "proc_image.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +const volatile pid_t target_pid = -1; +const volatile int target_cpu_id = -1; +const volatile pid_t ignore_tgid = -1; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, struct proc_id); + __type(value,struct schedule_event); +} proc_schedule SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, struct proc_id); + __type(value,bool); +} enable_add SEC(".maps"); + +SEC("tp_btf/sched_wakeup") +int BPF_PROG(sched_wakeup, struct task_struct *p) +{ + pid_t pid = BPF_CORE_READ(p,pid); + int tgid = BPF_CORE_READ(p,tgid); + int cpu = bpf_get_smp_processor_id(); + + if(tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && pid==target_pid) || + (target_pid==0 && pid==target_pid && cpu==target_cpu_id))){ + struct schedule_event *schedule_event; + struct proc_id pd = {}; + u64 current_time = bpf_ktime_get_ns(); + + pd.pid = pid; + if(pid == 0) pd.cpu_id = cpu; + schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd); + if(!schedule_event){ + struct schedule_event schedule_event = {}; + bool e_add = false; + + schedule_event.pid = pid; + // 提前将 count 值赋值为 1,避免输出时进程还没有被调度,导致除数出现 0 的情况 + schedule_event.count = 1; + schedule_event.enter_time = current_time; + + bpf_map_update_elem(&enable_add,&pd,&e_add,BPF_ANY); + bpf_map_update_elem(&proc_schedule,&pd,&schedule_event,BPF_ANY); + }else{ + schedule_event->enter_time = current_time; + } + } + + return 0; +} + +SEC("tp_btf/sched_wakeup_new") +int BPF_PROG(sched_wakeup_new, struct task_struct *p) +{ + pid_t pid = BPF_CORE_READ(p,pid); + int tgid = BPF_CORE_READ(p,tgid); + int cpu = bpf_get_smp_processor_id(); + + if(tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && pid==target_pid) || + (target_pid==0 && pid==target_pid && cpu==target_cpu_id))){ + struct schedule_event *schedule_event; + struct proc_id pd = {}; + u64 current_time = bpf_ktime_get_ns(); + + pd.pid = pid; + if(pid == 0) pd.cpu_id = cpu; + schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd); + if(!schedule_event){ + struct schedule_event schedule_event = {}; + bool e_add = false; + + schedule_event.pid = pid; + schedule_event.count = 1; + schedule_event.enter_time = current_time; + + bpf_map_update_elem(&enable_add,&pd,&e_add,BPF_ANY); + bpf_map_update_elem(&proc_schedule,&pd,&schedule_event,BPF_ANY); + }else{ + schedule_event->enter_time = current_time; + } + } + + return 0; +} + +SEC("tp_btf/sched_switch") +int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev, struct task_struct *next) +{ + pid_t prev_pid = BPF_CORE_READ(prev,pid); + int prev_tgid = BPF_CORE_READ(prev,tgid); + int prev_cpu = bpf_get_smp_processor_id(); + unsigned int prev_state = BPF_CORE_READ(prev,__state); + pid_t next_pid = BPF_CORE_READ(next,pid); + int next_tgid = BPF_CORE_READ(next,tgid); + int next_cpu = prev_cpu; + u64 current_time = bpf_ktime_get_ns(); + + if(prev_tgid!=ignore_tgid && prev_state==TASK_RUNNING && (target_pid==-1 || (target_pid!=0 && prev_pid==target_pid) || + (target_pid==0 && prev_pid==target_pid && prev_cpu==target_cpu_id))){ + struct schedule_event *schedule_event; + struct proc_id pd = {}; + + pd.pid = prev_pid; + if(prev_pid == 0) pd.cpu_id = prev_cpu; + schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd); + if(!schedule_event){ + struct schedule_event schedule_event = {}; + bool e_add = false; + + schedule_event.pid = prev_pid; + schedule_event.count = 1; + schedule_event.enter_time = current_time; + + bpf_map_update_elem(&enable_add,&pd,&e_add,BPF_ANY); + bpf_map_update_elem(&proc_schedule,&pd,&schedule_event,BPF_ANY); + }else{ + schedule_event->enter_time = current_time; + } + } + + if(next_tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && next_pid==target_pid) || + (target_pid==0 && next_pid==target_pid && next_cpu==target_cpu_id))){ + struct schedule_event *schedule_event; + bool * e_add; + struct proc_id pd = {}; + u64 this_delay; + + pd.pid = next_pid; + if(next_pid == 0) pd.cpu_id = next_cpu; + schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd); + if(!schedule_event) + return 0; + + e_add = bpf_map_lookup_elem(&enable_add,&pd); + if(!e_add) return 0; + // 因为 count 值初值赋值为了 1,避免多加一次 + if(*e_add) schedule_event->count++; + else *e_add = true; + this_delay = current_time-schedule_event->enter_time; + + schedule_event->prio = BPF_CORE_READ(next,prio); + schedule_event->sum_delay += this_delay; + if(this_delay > schedule_event->max_delay) + schedule_event->max_delay = this_delay; + if(schedule_event->min_delay==0 || this_delaymin_delay) + schedule_event->min_delay = this_delay; + } + + return 0; +} \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/helpers.h b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/helpers.h index 3b10c911e..92325cabe 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/helpers.h +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/helpers.h @@ -70,4 +70,5 @@ typedef unsigned int u32; #define RESOURCE_IMAGE 1 #define SYSCALL_IMAGE 2 #define LOCK_IMAGE 3 -#define KEYTIME_IMAGE 4 \ No newline at end of file +#define KEYTIME_IMAGE 4 +#define SCHEDULE_IMAGE 5 \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h index 7baa6965a..f0e651e3a 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/include/proc_image.h @@ -26,6 +26,8 @@ #define FULL_MAX_ARGS_ARR 440 #define LAST_ARG (FULL_MAX_ARGS_ARR - ARGSIZE) +#define TASK_RUNNING 0x00000000 + // resource_image struct proc_id{ int pid; @@ -101,4 +103,16 @@ struct keytime_event{ char char_info[FULL_MAX_ARGS_ARR]; }; +// schedule_image +struct schedule_event{ + int pid; + int prio; + int count; + long long unsigned int enter_time; + long long unsigned int sum_delay; + long long unsigned int max_delay; + long long unsigned int min_delay; +}; + + #endif /* __PROCESS_H */ \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c index 4a032dbec..322e3fd17 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/proc_image.c @@ -33,6 +33,7 @@ #include "syscall_image.skel.h" #include "lock_image.skel.h" #include "keytime_image.skel.h" +#include "schedule_image.skel.h" #include "helpers.h" static int prev_image = 0; @@ -44,7 +45,8 @@ static struct env { int cpu_id; int time; bool enable_myproc; - bool enable_output; + bool output_resourse; + bool output_schedule; bool create_thread; bool exit_thread; bool enable_resource; @@ -61,12 +63,14 @@ static struct env { bool quote; int max_args; bool enable_keytime; + bool enable_schedule; } env = { .pid = -1, .cpu_id = -1, .time = 0, .enable_myproc = false, - .enable_output = false, + .output_resourse = false, + .output_schedule = false, .create_thread = false, .exit_thread = false, .enable_resource = false, @@ -83,6 +87,7 @@ static struct env { .quote = false, .max_args = DEFAULT_MAXARGS, .enable_keytime = false, + .enable_schedule = false, }; static struct timespec prevtime; @@ -113,6 +118,7 @@ static const struct argp_option opts[] = { { "lock", 'l', NULL, 0, "Collects lock information about processes" }, { "quote", 'q', NULL, 0, "Add quotemarks (\") around arguments" }, { "keytime", 'k', NULL, 0, "Collects keytime information about processes" }, + { "schedule", 'S', NULL, 0, "Collects schedule information about processes" }, { NULL, 'h', NULL, OPTION_HIDDEN, "show the full help" }, {}, }; @@ -154,6 +160,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) env.enable_syscall = true; env.enable_lock = true; env.enable_keytime = true; + env.enable_schedule = true; break; case 'r': env.enable_resource = true; @@ -176,6 +183,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'k': env.enable_keytime = true; break; + case 'S': + env.enable_schedule = true; + break; case 'h': argp_state_help(state, stderr, ARGP_HELP_STD_HELP); break; @@ -255,7 +265,44 @@ static int print_resource(struct bpf_map *map) // 获取当前高精度时间 clock_gettime(CLOCK_REALTIME, &prevtime); - env.enable_output = false; + env.output_resourse = false; + + return 0; +} + +static int print_schedule(struct bpf_map *map) +{ + struct proc_id lookup_key = {-1}, next_key; + int err, fd = bpf_map__fd(map); + struct schedule_event event; + time_t now = time(NULL); + struct tm *localTime = localtime(&now); + int hour = localTime->tm_hour; + int min = localTime->tm_min; + int sec = localTime->tm_sec; + u64 avg_delay; + + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + if(prev_image != SCHEDULE_IMAGE){ + printf("SCHEDULE-------------------------------------------------------------------------------------------------\n"); + printf("%-8s %-6s %-4s %-13s %-13s %-13s\n","TIME","PID","PRIO","AVG_DELAY(ns)","MAX_DELAY(ns)","MIN_DELAY(ns)"); + prev_image = SCHEDULE_IMAGE; + } + + err = bpf_map_lookup_elem(fd, &next_key, &event); + if (err < 0) { + fprintf(stderr, "failed to lookup infos: %d\n", err); + return -1; + } + + avg_delay = event.sum_delay/event.count; + printf("%02d:%02d:%02d %-6d %-4d %-13lld %-13lld %-13lld\n", + hour,min,sec,event.pid,event.prio,avg_delay,event.max_delay,event.min_delay); + + lookup_key = next_key; + } + + env.output_schedule = false; return 0; } @@ -500,7 +547,8 @@ static int keytime_attach(struct keytime_image_bpf *skel) void *enable_function(void *arg) { env.create_thread = true; sleep(1); - env.enable_output = true; + if(env.enable_resource) env.output_resourse = true; + if(env.enable_schedule) env.output_schedule = true; env.create_thread = false; env.exit_thread = true; @@ -521,6 +569,7 @@ int main(int argc, char **argv) struct ring_buffer *lock_rb = NULL; struct keytime_image_bpf *keytime_skel = NULL; struct ring_buffer *keytime_rb = NULL; + struct schedule_image_bpf *schedule_skel = NULL; pthread_t thread_enable; int err; static const struct argp argp = { @@ -563,7 +612,6 @@ int main(int argc, char **argv) fprintf(stderr, "Failed to attach BPF resource skeleton\n"); goto cleanup; } - } if(env.enable_syscall){ @@ -664,26 +712,52 @@ int main(int argc, char **argv) } } + if(env.enable_schedule){ + schedule_skel = schedule_image_bpf__open(); + if(!schedule_skel) { + fprintf(stderr, "Failed to open BPF schedule skeleton\n"); + return 1; + } + + schedule_skel->rodata->target_pid = env.pid; + schedule_skel->rodata->target_cpu_id = env.cpu_id; + if(!env.enable_myproc) schedule_skel->rodata->ignore_tgid = env.ignore_tgid; + + err = schedule_image_bpf__load(schedule_skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF schedule skeleton\n"); + goto cleanup; + } + + err = schedule_image_bpf__attach(schedule_skel); + if (err) { + fprintf(stderr, "Failed to attach BPF schedule skeleton\n"); + goto cleanup; + } + } + /* 处理事件 */ while (!exiting) { - // 等待新线程结束,回收资源 - if(env.exit_thread){ - env.exit_thread = false; - if (pthread_join(thread_enable, NULL) != 0) { - perror("pthread_join"); - exit(EXIT_FAILURE); - } - } - - // 创建新线程,设置 env.enable_output - if(!env.create_thread){ - if (pthread_create(&thread_enable, NULL, enable_function, NULL) != 0) { - perror("pthread_create"); - exit(EXIT_FAILURE); - } - } - - if(env.enable_resource && env.enable_output){ + if(env.enable_resource || env.enable_schedule){ + // 等待新线程结束,回收资源 + if(env.exit_thread){ + env.exit_thread = false; + if (pthread_join(thread_enable, NULL) != 0) { + perror("pthread_join"); + exit(EXIT_FAILURE); + } + } + + // 创建新线程,设置 output + if(!env.create_thread){ + if (pthread_create(&thread_enable, NULL, enable_function, NULL) != 0) { + perror("pthread_create"); + exit(EXIT_FAILURE); + } + } + } + + if(env.enable_resource && env.output_resourse){ err = print_resource(resource_skel->maps.total); /* Ctrl-C will cause -EINTR */ if (err == -EINTR) { @@ -733,6 +807,18 @@ int main(int argc, char **argv) break; } } + + if(env.enable_schedule && env.output_schedule){ + err = print_schedule(schedule_skel->maps.proc_schedule); + /* Ctrl-C will cause -EINTR */ + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + break; + } + } } /* 卸载BPF程序 */ @@ -744,6 +830,7 @@ int main(int argc, char **argv) lock_image_bpf__destroy(lock_skel); ring_buffer__free(keytime_rb); keytime_image_bpf__destroy(keytime_skel); + schedule_image_bpf__destroy(schedule_skel); return err < 0 ? -err : 0; } \ No newline at end of file From 866580e20c0eaf0225d4e1fd74d6ff65f26e8ec4 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Thu, 1 Feb 2024 20:53:58 +0800 Subject: [PATCH 07/24] =?UTF-8?q?=E4=BF=AE=E6=94=B9action?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/kvm_watcher.yml | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/.github/workflows/kvm_watcher.yml b/.github/workflows/kvm_watcher.yml index 45d8286bc..6a7a0dd09 100644 --- a/.github/workflows/kvm_watcher.yml +++ b/.github/workflows/kvm_watcher.yml @@ -22,30 +22,5 @@ jobs: - name: Install dependencies run: | - sudo apt install clang libelf1 libelf-dev zlib1g-dev - sudo apt install libbpf-dev - sudo apt install linux-tools-$(uname -r) - sudo apt install linux-cloud-tools-$(uname -r) - sudo apt-get update && sudo apt-get install -y qemu-kvm - - name: Download Cirros image - run: | - wget http://download.cirros-cloud.net/0.5.1/cirros-0.5.1-x86_64-disk.img - - name: Load KVM module - run: | - sudo modprobe kvm && sudo modprobe kvm-intel - - name: Run QEMU to start VM - run: | - sudo qemu-system-x86_64 -enable-kvm -cpu host -m 2048 -drive file=cirros-0.5.1-x86_64-disk.img,format=qcow2 -boot c -nographic & - sleep 5 - - name: Run kvm_watcher - run: | - cd eBPF_Supermarket/kvm_watcher/ - make - sudo ./kvm_watcher -w -t 2 - sudo ./kvm_watcher -e -t 2 -s - sudo ./kvm_watcher -n -t 2 - sudo ./kvm_watcher -d -t 2 - sudo ./kvm_watcher -f -m -t 2 - sudo ./kvm_watcher -c -t 2 - make clean + make test From 279a13dd4cc85d2afcf939ee62da989cd6942798 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Thu, 1 Feb 2024 21:03:15 +0800 Subject: [PATCH 08/24] =?UTF-8?q?=E4=BF=AE=E6=94=B9action=E5=92=8Cmakefile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/kvm_watcher.yml | 3 ++- eBPF_Supermarket/kvm_watcher/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/kvm_watcher.yml b/.github/workflows/kvm_watcher.yml index 6a7a0dd09..49b22a6c0 100644 --- a/.github/workflows/kvm_watcher.yml +++ b/.github/workflows/kvm_watcher.yml @@ -20,7 +20,8 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Install dependencies + - name: Test program execution run: | + cd eBPF_Supermarket/kvm_watcher/ make test diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index 95ed7f3d2..f58a30f11 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -7,7 +7,7 @@ ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ | sed 's/riscv64/riscv/' \ | sed 's/loongarch64/loongarch/') APP = src/kvm_watcher -OPTIONS = -f -w -n -d -i '-e -s' +OPTIONS = -f -w -n -d -c '-e -s' # 共同规则1 define common_rules1 @@ -25,7 +25,7 @@ define common_rules2 clang -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) -I/usr/include/x86_64-linux-gnu -I. -c $@.bpf.c -o $@.bpf.o bpftool gen skeleton $@.bpf.o > $@.skel.h clang -g -O2 -Wall -I . -c $@.c -o $@.o - clang -Wall -O2 -g $@.o -static -lbpf -lelf -lz -o $(notdir $@) + clang -Wall -O2 -g $@.o -static -lbpf -lelf -lz -lzstd -o $(notdir $@) # 6.5内核编译需要lzstd库 endef # 判断是否已安装 qemu-system-x86_64 From dc7a146c0c71f20bf6bf4fd2693d822b2e93fa87 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Thu, 1 Feb 2024 21:10:50 +0800 Subject: [PATCH 09/24] update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 5bd7586e6..05c5ae246 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,6 @@ tags *.iml nohup.out + +#Virtual machine image file +eBPF_Supermarket/kvm_watcher/*.img From 0454955a4b4d259bc32fd28c5405007a5bcda657 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Thu, 1 Feb 2024 21:11:27 +0800 Subject: [PATCH 10/24] update makefile --- eBPF_Supermarket/kvm_watcher/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index f58a30f11..b330f8503 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -30,7 +30,7 @@ endef # 判断是否已安装 qemu-system-x86_64 ifeq (,$(shell which qemu-system-x86_64)) - INSTALL_QEMU = sudo apt update && sudo apt install qemu-system-x86_64 + INSTALL_QEMU = sudo apt update && sudo apt install qemu-kvm endif bpf: $(APP) From b9e1c74b5bff4b6a21897c9425e16e06d3237d88 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Thu, 1 Feb 2024 21:23:32 +0800 Subject: [PATCH 11/24] update readme --- eBPF_Supermarket/kvm_watcher/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eBPF_Supermarket/kvm_watcher/README.md b/eBPF_Supermarket/kvm_watcher/README.md index 0ab70f979..796ea58cd 100755 --- a/eBPF_Supermarket/kvm_watcher/README.md +++ b/eBPF_Supermarket/kvm_watcher/README.md @@ -122,7 +122,7 @@ graph TD; B --> C[加载 KVM 模块]; C --> D[下载 CirrOs 镜像]; D --> E[使用 QEMU 启动虚拟机]; - E --> F[编译并运行程序]; + E --> F[编译运行kvm_watcher]; F --> G[结束虚拟机进程]; ``` From a835769277059db7f17a8b7596666247aeee484b Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 00:25:15 +0800 Subject: [PATCH 12/24] update .gitignore --- .gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 05c5ae246..547d3f08e 100644 --- a/.gitignore +++ b/.gitignore @@ -52,5 +52,11 @@ tags nohup.out -#Virtual machine image file +# Virtual machine image file eBPF_Supermarket/kvm_watcher/*.img + +eBPF_Supermarket/kvm_watcher/**/*.o +eBPF_Supermarket/kvm_watcher/**/*.skel.h +eBPF_Supermarket/kvm_watcher/**/vmlinux.h +eBPF_Supermarket/kvm_watcher/kvm_watcher +eBPF_Supermarket/kvm_watcher/**/temp* \ No newline at end of file From f1c470c016abec654e0a229c926406f987d1fc40 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 00:25:59 +0800 Subject: [PATCH 13/24] update kvm_watcher.h --- .../kvm_watcher/include/kvm_watcher.h | 102 ++++++++++-------- 1 file changed, 60 insertions(+), 42 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index bb7b1a9fb..c810547ea 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -31,9 +31,9 @@ #define MICROSECONDS_IN_SECOND 1000000 #define OUTPUT_INTERVAL_SECONDS 0.5 -#define OUTPUT_INTERVAL(us) usleep((unsigned int)(us * MICROSECONDS_IN_SECOND)) +#define OUTPUT_INTERVAL(us) usleep((__u32)(us * MICROSECONDS_IN_SECOND)) -#define OPTIONS_LIST "-w, -p, -d, -f, -c, or -e" +#define OPTIONS_LIST "-w, -p, -d, -f, -c, -i, or -e" #define PFERR_PRESENT_BIT 0 #define PFERR_WRITE_BIT 1 @@ -83,25 +83,33 @@ } while (0) #define CHECK_PID(vm_pid) \ - unsigned pid = bpf_get_current_pid_tgid() >> 32; \ + __u32 pid = bpf_get_current_pid_tgid() >> 32; \ if ((vm_pid) > 0 && pid != (vm_pid)) { \ return 0; \ } struct ExitReason { - int number; + __u32 number; const char *name; }; struct reason_info { - unsigned long long time; - unsigned long reason; - int count; + __u64 time; + __u64 reason; + __u32 count; +}; + +struct dirty_page_info { + __u64 gfn; + __u64 rel_gfn; + __u16 slot_id; + __u16 pad; + __u32 pid; }; struct process { - unsigned pid; - unsigned tid; + __u32 pid; + __u32 tid; char comm[TASK_COMM_LEN]; }; @@ -113,75 +121,85 @@ enum EventType { MARK_PAGE_DIRTY, PAGE_FAULT, IRQCHIP, + IRQ_INJECT, } event_type; struct common_event { struct process process; - unsigned long long time; + __u64 time; // 成员特定于每个事件类型的数据 union { struct { - unsigned long long dur_hlt_ns; + __u64 dur_hlt_ns; bool waited; - unsigned vcpu_id; + __u32 vcpu_id; bool valid; // VCPU_WAKEUP 特有成员 } vcpu_wakeup_data; struct { - unsigned reason_number; - unsigned long long duration_ns; - int count; - int total; + __u32 reason_number; + __u64 duration_ns; + __u32 count; + __u32 total; // EXIT 特有成员 } exit_data; struct { bool grow; - unsigned int new; - unsigned int old; - unsigned vcpu_id; + __u32 new; + __u32 old; + __u32 vcpu_id; // HALT_POLL 特有成员 } halt_poll_data; struct { - unsigned long npages; - unsigned long userspace_addr; - unsigned long long rel_gfn; - unsigned long long gfn; - short slot_id; + __u64 npages; + __u64 userspace_addr; + __u64 rel_gfn; + __u64 gfn; + __u16 slot_id; // MARK_PAGE_DIRTY 特有成员 } mark_page_dirty_data; struct { - unsigned long long delay; - unsigned long long error_code; - unsigned long long addr; - unsigned long long pfn; - unsigned long long hva; - unsigned count; - short memslot_id; + __u64 delay; + __u64 error_code; + __u64 addr; + __u64 pfn; + __u64 hva; + __u32 count; + __u16 memslot_id; // PAGE_FAULT 特有成员 } page_fault_data; struct { - unsigned long long delay; - int ret; - int irqchip_type; + __u64 delay; + __u32 ret; + __u32 irqchip_type; /*pic*/ - unsigned char chip; - unsigned pin; - unsigned char elcr; - unsigned char imr; + __u16 chip; + __u32 pin; + __u16 elcr; + __u16 imr; /*ioapic*/ - unsigned long long ioapic_bits; - unsigned int irq_nr; + __u64 ioapic_bits; + __u32 irq_nr; /*msi*/ - unsigned long long address; - unsigned long long data; + __u64 address; + __u64 data; // IRQCHIP 特有成员 } irqchip_data; + + struct { + __u64 delay; + bool soft; + __u32 irq_nr; + __u32 vcpu_id; + __u64 injections; + // IRQ_INJECT 特有成员 + } irq_inject_data; }; }; From f159624173e59abefb900a66650ae5ba22b694ff Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 00:28:21 +0800 Subject: [PATCH 14/24] update Makefile --- eBPF_Supermarket/kvm_watcher/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index b330f8503..88030700d 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -7,7 +7,7 @@ ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ | sed 's/riscv64/riscv/' \ | sed 's/loongarch64/loongarch/') APP = src/kvm_watcher -OPTIONS = -f -w -n -d -c '-e -s' +OPTIONS = -f -w -n -d -c '-e -s' -i # 共同规则1 define common_rules1 From ee85360bfa68b03f6d9404834c09c5b920d19c5c Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 00:34:34 +0800 Subject: [PATCH 15/24] =?UTF-8?q?virq=20inject=E5=BB=B6=E6=97=B6=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E7=BB=9F=E8=AE=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kvm_watcher/include/kvm_irq.h | 42 +++++++++++++ .../kvm_watcher/src/kvm_watcher.bpf.c | 10 ++++ .../kvm_watcher/src/kvm_watcher.c | 60 +++++++++++++++---- 3 files changed, 101 insertions(+), 11 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h b/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h index aed9de1db..3a7921872 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h @@ -31,6 +31,13 @@ struct { __type(value, u64); } irq_set_delay SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, u32); + __type(value, u64); +} irq_inject_delay SEC(".maps"); + static int entry_kvm_pic_set_irq(int irq, pid_t vm_pid) { CHECK_PID(vm_pid); if (irq < 0 || irq >= PIC_NUM_PINS) { @@ -149,4 +156,39 @@ static int exit_kvm_set_msi_irq( return 0; } +static int entry_vmx_inject_irq(struct kvm_vcpu *vcpu, pid_t vm_pid) { + CHECK_PID(vm_pid); + u32 irq_nr; + bool rei; + bpf_probe_read_kernel(&irq_nr, sizeof(u32), &vcpu->arch.interrupt.nr); + u64 ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&irq_inject_delay, &irq_nr, &ts, BPF_ANY); + return 0; +} + +static int exit_vmx_inject_irq(struct kvm_vcpu *vcpu, void *rb, + struct common_event *e) { + u32 irq_nr; + bpf_probe_read_kernel(&irq_nr, sizeof(u32), &vcpu->arch.interrupt.nr); + u64 *ts = bpf_map_lookup_elem(&irq_inject_delay, &irq_nr); + if (!ts) { + return 0; + } + u64 time = bpf_ktime_get_ns(); + u64 delay = time - *ts; + bpf_map_delete_elem(&irq_inject_delay, &irq_nr); + bool soft; + bpf_probe_read_kernel(&soft, sizeof(bool), &vcpu->arch.interrupt.soft); + RESERVE_RINGBUF_ENTRY(rb, e); + e->time = *ts; + e->process.pid = bpf_get_current_pid_tgid() >> 32; + bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); + e->irq_inject_data.delay=delay; + e->irq_inject_data.irq_nr=irq_nr; + e->irq_inject_data.soft=soft; + bpf_probe_read_kernel(&e->irq_inject_data.vcpu_id,sizeof(u32),&vcpu->vcpu_id); + bpf_probe_read_kernel(&e->irq_inject_data.injections,sizeof(u64),&vcpu->stat.irq_injections); + bpf_ringbuf_submit(e, 0); + return 0; +} #endif /* __KVM_IRQ_H */ \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c index e9a588a6f..a6cc51222 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c @@ -129,3 +129,13 @@ int BPF_PROG(fexit_kvm_set_msi_irq, struct kvm *kvm, struct kvm_lapic_irq *irq) { return exit_kvm_set_msi_irq(kvm, routing_entry, &rb, e); } + +SEC("fentry/vmx_inject_irq") +int BPF_PROG(fentry_vmx_inject_irq, struct kvm_vcpu *vcpu,bool reinjected) { + return entry_vmx_inject_irq(vcpu, vm_pid); +} + +SEC("fexit/vmx_inject_irq") +int BPF_PROG(fexit_vmx_inject_irq, struct kvm_vcpu *vcpu,bool reinjected) { + return exit_vmx_inject_irq(vcpu,&rb, e); +} \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 4fdd81129..81dbfd4c2 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -29,7 +29,8 @@ #include #include "../include/kvm_watcher.h" #include "kvm_watcher.skel.h" -// 定义具体的退出原因 + +// 定义具体的退出原因 arch/x86/include/uapi/asm/vmx.h struct ExitReason exitReasons[] = {{0, "EXCEPTION_NMI"}, {1, "EXTERNAL_INTERRUPT"}, {2, "TRIPLE_FAULT"}, @@ -177,7 +178,7 @@ void printExitInfo(Node *head) { "-----------------------------------------------------------------" "----------\n"); printf("%-21s %-18s %-8s %-8s %-13s \n", "EXIT_REASON", "COMM", "PID", - "COUNT", "AVG_DURATION(ns)"); + "COUNT", "AVG_DURATION(us)"); while (current != NULL) { printf("%-2d/%-18s %-33s %-13.4f \n", current->data.exit_reason, getExitReasonName(current->data.exit_reason), current->data.info, @@ -214,17 +215,18 @@ int doesVmProcessExist(pid_t pid) { return 0; // VmProcess with the given PID not found } -// 结构用于保存键值对 +// 定义键值对结构体 struct KeyValPair { - unsigned long long key; + struct dirty_page_info key; unsigned int value; }; -// 比较函数,用于 qsort +// 比较函数,用于排序 int compare(const void *a, const void *b) { - return ((struct KeyValPair *)b)->value - ((struct KeyValPair *)a)->value; + return (((struct KeyValPair *)b)->value - ((struct KeyValPair *)a)->value); } -// 保存脏页信息到./temp/dirty_temp文件中 + +// 保存脏页信息到文件 int save_count_dirtypagemap_to_file(struct bpf_map *map) { const char *directory = "./temp"; const char *filename = "./temp/dirty_temp"; @@ -246,7 +248,8 @@ int save_count_dirtypagemap_to_file(struct bpf_map *map) { } int count_dirty_fd = bpf_map__fd(map); - unsigned long long lookup_key = -1, next_key; + struct dirty_page_info lookup_key = {}; + struct dirty_page_info next_key = {}; unsigned int dirty_counts; // 保存键值对到数组 @@ -276,14 +279,21 @@ int save_count_dirtypagemap_to_file(struct bpf_map *map) { free(pairs); return -1; } + + // 更新 lookup_key + lookup_key = next_key; } // 对数组进行排序 qsort(pairs, size, sizeof(struct KeyValPair), compare); // 输出到文件 + fprintf(output, "%-10s %-10s %-10s %-10s %s\n", "PID", "GFN", "REL_GFN", + "SLOT_ID", "COUNTS"); for (size_t i = 0; i < size; i++) { - fprintf(output, "%llx %d\n", pairs[i].key, pairs[i].value); + fprintf(output, "%-10d %-10llx %-10llx %-10d %u\n", pairs[i].key.pid, + pairs[i].key.gfn, pairs[i].key.rel_gfn, pairs[i].key.slot_id, + pairs[i].value); } fclose(output); @@ -300,6 +310,7 @@ static struct env { bool execute_page_fault; bool mmio_page_fault; bool execute_irqchip; + bool execute_irq_inject; int monitoring_time; pid_t vm_pid; enum EventType event_type; @@ -311,6 +322,7 @@ static struct env { .execute_mark_page_dirty = false, .execute_page_fault = false, .execute_irqchip = false, + .execute_irq_inject = false, .mmio_page_fault = false, .monitoring_time = 0, .vm_pid = -1, @@ -333,6 +345,8 @@ static const struct argp_option opts[] = { "Monitoring the data of kvmmmu page fault."}, {"kvm_irqchip", 'c', NULL, 0, "Monitor the irqchip setting information in KVM VM."}, + {"irq_inject(x86)", 'i', NULL, 0, + "Monitor the virq injection information in KVM VM "}, {"stat", 's', NULL, 0, "Display statistical data.(The -e option must be specified.)"}, {"mmio", 'm', NULL, 0, @@ -370,6 +384,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) { case 'c': SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_irqchip); break; + case 'i': + SET_OPTION_AND_CHECK_USAGE(option_selected, env.execute_irq_inject); + break; case 's': if (env.execute_exit) { env.ShowStats = true; @@ -448,7 +465,10 @@ static int determineEventType(struct env *env) { env->event_type = PAGE_FAULT; } else if (env->execute_irqchip) { env->event_type = IRQCHIP; - } else { + } else if (env->execute_irq_inject) + { + env->event_type = IRQ_INJECT; + }else { env->event_type = NONE_TYPE; // 或者根据需要设置一个默认的事件类型 } return 0; @@ -501,7 +521,7 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { case MARK_PAGE_DIRTY: { // 使用 e->mark_page_dirty_data 访问 MARK_PAGE_DIRTY 特有成员 printf( - "%-18.6f %-15s %-6d/%-8d %-10llx %-10llx %-10lu %-15lx %d \n", + "%-18.6f %-15s %-6d/%-8d %-10llx %-10llx %-10llu %-15llx %d \n", timestamp_ms, e->process.comm, e->process.pid, e->process.tid, e->mark_page_dirty_data.gfn, e->mark_page_dirty_data.rel_gfn, e->mark_page_dirty_data.npages, @@ -612,6 +632,15 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { default: break; } + case IRQ_INJECT: { + printf( + "%-18.6f %-15s %-10d %-10lld %#-10x %-10d %-10lld %-10s\n", + timestamp_ms, e->process.comm, e->process.pid, + e->irq_inject_data.delay, e->irq_inject_data.irq_nr, + e->irq_inject_data.vcpu_id, e->irq_inject_data.injections, + e->irq_inject_data.soft ? "Soft/INTn" : "IRQ"); + break; + } } default: // 处理未知事件类型 @@ -654,6 +683,11 @@ static int print_event_head(struct env *env) { printf("%-18s %-15s %-10s %-10s %-14s %-10s %-10s\n", "TIME(ms)", "COMM", "PID", "DELAY", "CHIP/PIN", "DST/VEC", "OTHERS"); break; + case IRQ_INJECT: + printf("%-18s %-15s %-10s %-10s %-10s %-10s %-10s %-10s\n", + "TIME(ms)", "COMM", "PID", "DELAY", "IRQ_NR", "VCPU_ID", + "INJECTIONS", "TYPE"); + break; default: // Handle default case or display an error message break; @@ -695,6 +729,10 @@ static void set_disable_load(struct kvm_watcher_bpf *skel) { env.execute_irqchip ? true : false); bpf_program__set_autoload(skel->progs.fexit_kvm_set_msi_irq, env.execute_irqchip ? true : false); + bpf_program__set_autoload(skel->progs.fentry_vmx_inject_irq, + env.execute_irq_inject ? true : false); + bpf_program__set_autoload(skel->progs.fexit_vmx_inject_irq, + env.execute_irq_inject ? true : false); } int main(int argc, char **argv) { From 85a90a3703f99b6394506ec8cef5b333814c842f Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 00:48:08 +0800 Subject: [PATCH 16/24] update makefile --- eBPF_Supermarket/kvm_watcher/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eBPF_Supermarket/kvm_watcher/Makefile b/eBPF_Supermarket/kvm_watcher/Makefile index 88030700d..b330f8503 100644 --- a/eBPF_Supermarket/kvm_watcher/Makefile +++ b/eBPF_Supermarket/kvm_watcher/Makefile @@ -7,7 +7,7 @@ ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ | sed 's/riscv64/riscv/' \ | sed 's/loongarch64/loongarch/') APP = src/kvm_watcher -OPTIONS = -f -w -n -d -c '-e -s' -i +OPTIONS = -f -w -n -d -c '-e -s' # 共同规则1 define common_rules1 From 846e9def075b957c6f829de536e024afc33eee3a Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 00:49:14 +0800 Subject: [PATCH 17/24] update kvm_exit.h --- eBPF_Supermarket/kvm_watcher/include/kvm_exits.h | 1 + 1 file changed, 1 insertion(+) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h index 38106ef08..d41e5cc3f 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_exits.h @@ -69,6 +69,7 @@ static int trace_kvm_exit(struct exit *ctx, pid_t vm_pid) { if (count) { (*count)++; reas.count = *count; + bpf_map_update_elem(&counts, &reason, count, BPF_ANY); } else { u32 new_count = 1; reas.count = new_count; From 5f3c91a63928dc29148ca4a3bd2adfdba55f2678 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 00:49:30 +0800 Subject: [PATCH 18/24] =?UTF-8?q?dirty=20page=E5=8A=9F=E8=83=BD=E5=AE=8C?= =?UTF-8?q?=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kvm_watcher/include/kvm_vcpu.h | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h index ffc1eb224..fa2c75d34 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h @@ -42,8 +42,8 @@ struct halt_poll_ns { struct { __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 8192); - __type(key, u64); + __uint(max_entries, 128 * 1024); + __type(key, struct dirty_page_info); __type(value, u32); } count_dirty_map SEC(".maps"); @@ -114,14 +114,6 @@ static int trace_mark_page_dirty_in_slot(struct kvm *kvm, bpf_probe_read_kernel(&flags, sizeof(memslot->flags), &memslot->flags); if (slot && (flags & KVM_MEM_LOG_DIRTY_PAGES)) { // 检查memslot是否启用了脏页追踪 - gfn_t gfnum = gfn; - u32 *count = bpf_map_lookup_elem(&count_dirty_map, &gfnum); - if (count) { - *count += 1; - } else { - u32 init_count = 1; - bpf_map_update_elem(&count_dirty_map, &gfnum, &init_count, BPF_ANY); - } u32 tid = bpf_get_current_pid_tgid(); unsigned long base_gfn; RESERVE_RINGBUF_ENTRY(rb, e); @@ -140,9 +132,28 @@ static int trace_mark_page_dirty_in_slot(struct kvm *kvm, &memslot->userspace_addr); bpf_probe_read_kernel(&e->mark_page_dirty_data.slot_id, sizeof(memslot->id), &memslot->id); + short int s_id; + bpf_probe_read_kernel(&s_id, + sizeof(memslot->id), &memslot->id); bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); + struct dirty_page_info dirty_page_info = {.gfn = gfn, + .slot_id = s_id, + .rel_gfn = gfn - base_gfn, + .pid = pid}; + u32 *count; + count = bpf_map_lookup_elem(&count_dirty_map, &dirty_page_info); + if (count) { + *count += 1; + bpf_map_update_elem(&count_dirty_map, &dirty_page_info, count, + BPF_ANY); + } else { + u32 init_count = 1; + bpf_map_update_elem(&count_dirty_map, &dirty_page_info, &init_count, + BPF_ANY); + } bpf_ringbuf_submit(e, 0); } + return 0; } #endif /* __KVM_VCPU_H */ From bd54bd81e7a55ec0a5cdc62c8182c780ecc0cddc Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 01:01:02 +0800 Subject: [PATCH 19/24] =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kvm_watcher/include/kvm_irq.h | 12 +++++---- .../kvm_watcher/include/kvm_vcpu.h | 13 ++++----- .../kvm_watcher/include/kvm_watcher.h | 6 ++--- .../kvm_watcher/src/kvm_watcher.bpf.c | 6 ++--- .../kvm_watcher/src/kvm_watcher.c | 27 +++++++++---------- 5 files changed, 31 insertions(+), 33 deletions(-) diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h b/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h index 3a7921872..6fa41d5df 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_irq.h @@ -183,11 +183,13 @@ static int exit_vmx_inject_irq(struct kvm_vcpu *vcpu, void *rb, e->time = *ts; e->process.pid = bpf_get_current_pid_tgid() >> 32; bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); - e->irq_inject_data.delay=delay; - e->irq_inject_data.irq_nr=irq_nr; - e->irq_inject_data.soft=soft; - bpf_probe_read_kernel(&e->irq_inject_data.vcpu_id,sizeof(u32),&vcpu->vcpu_id); - bpf_probe_read_kernel(&e->irq_inject_data.injections,sizeof(u64),&vcpu->stat.irq_injections); + e->irq_inject_data.delay = delay; + e->irq_inject_data.irq_nr = irq_nr; + e->irq_inject_data.soft = soft; + bpf_probe_read_kernel(&e->irq_inject_data.vcpu_id, sizeof(u32), + &vcpu->vcpu_id); + bpf_probe_read_kernel(&e->irq_inject_data.injections, sizeof(u64), + &vcpu->stat.irq_injections); bpf_ringbuf_submit(e, 0); return 0; } diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h index fa2c75d34..7bba6477d 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_vcpu.h @@ -132,14 +132,11 @@ static int trace_mark_page_dirty_in_slot(struct kvm *kvm, &memslot->userspace_addr); bpf_probe_read_kernel(&e->mark_page_dirty_data.slot_id, sizeof(memslot->id), &memslot->id); - short int s_id; - bpf_probe_read_kernel(&s_id, - sizeof(memslot->id), &memslot->id); + short int s_id; + bpf_probe_read_kernel(&s_id, sizeof(memslot->id), &memslot->id); bpf_get_current_comm(&e->process.comm, sizeof(e->process.comm)); - struct dirty_page_info dirty_page_info = {.gfn = gfn, - .slot_id = s_id, - .rel_gfn = gfn - base_gfn, - .pid = pid}; + struct dirty_page_info dirty_page_info = { + .gfn = gfn, .slot_id = s_id, .rel_gfn = gfn - base_gfn, .pid = pid}; u32 *count; count = bpf_map_lookup_elem(&count_dirty_map, &dirty_page_info); if (count) { @@ -153,7 +150,7 @@ static int trace_mark_page_dirty_in_slot(struct kvm *kvm, } bpf_ringbuf_submit(e, 0); } - + return 0; } #endif /* __KVM_VCPU_H */ diff --git a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h index c810547ea..55d55343b 100644 --- a/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h +++ b/eBPF_Supermarket/kvm_watcher/include/kvm_watcher.h @@ -82,10 +82,10 @@ e = _tmp; \ } while (0) -#define CHECK_PID(vm_pid) \ +#define CHECK_PID(vm_pid) \ __u32 pid = bpf_get_current_pid_tgid() >> 32; \ - if ((vm_pid) > 0 && pid != (vm_pid)) { \ - return 0; \ + if ((vm_pid) > 0 && pid != (vm_pid)) { \ + return 0; \ } struct ExitReason { diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c index a6cc51222..0564e4704 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.bpf.c @@ -131,11 +131,11 @@ int BPF_PROG(fexit_kvm_set_msi_irq, struct kvm *kvm, } SEC("fentry/vmx_inject_irq") -int BPF_PROG(fentry_vmx_inject_irq, struct kvm_vcpu *vcpu,bool reinjected) { +int BPF_PROG(fentry_vmx_inject_irq, struct kvm_vcpu *vcpu, bool reinjected) { return entry_vmx_inject_irq(vcpu, vm_pid); } SEC("fexit/vmx_inject_irq") -int BPF_PROG(fexit_vmx_inject_irq, struct kvm_vcpu *vcpu,bool reinjected) { - return exit_vmx_inject_irq(vcpu,&rb, e); +int BPF_PROG(fexit_vmx_inject_irq, struct kvm_vcpu *vcpu, bool reinjected) { + return exit_vmx_inject_irq(vcpu, &rb, e); } \ No newline at end of file diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 81dbfd4c2..804eb3a3f 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -465,10 +465,9 @@ static int determineEventType(struct env *env) { env->event_type = PAGE_FAULT; } else if (env->execute_irqchip) { env->event_type = IRQCHIP; - } else if (env->execute_irq_inject) - { - env->event_type = IRQ_INJECT; - }else { + } else if (env->execute_irq_inject) { + env->event_type = IRQ_INJECT; + } else { env->event_type = NONE_TYPE; // 或者根据需要设置一个默认的事件类型 } return 0; @@ -632,15 +631,15 @@ static int handle_event(void *ctx, void *data, size_t data_sz) { default: break; } - case IRQ_INJECT: { - printf( - "%-18.6f %-15s %-10d %-10lld %#-10x %-10d %-10lld %-10s\n", - timestamp_ms, e->process.comm, e->process.pid, - e->irq_inject_data.delay, e->irq_inject_data.irq_nr, - e->irq_inject_data.vcpu_id, e->irq_inject_data.injections, - e->irq_inject_data.soft ? "Soft/INTn" : "IRQ"); - break; - } + break; + } + case IRQ_INJECT: { + printf("%-18.6f %-15s %-10d %-10lld %#-10x %-10d %-10lld %-10s\n", + timestamp_ms, e->process.comm, e->process.pid, + e->irq_inject_data.delay, e->irq_inject_data.irq_nr, + e->irq_inject_data.vcpu_id, e->irq_inject_data.injections, + e->irq_inject_data.soft ? "Soft/INTn" : "IRQ"); + break; } default: // 处理未知事件类型 @@ -807,7 +806,7 @@ int main(int argc, char **argv) { goto cleanup; } while (!exiting) { - //OUTPUT_INTERVAL(OUTPUT_INTERVAL_SECONDS); // 输出间隔 + // OUTPUT_INTERVAL(OUTPUT_INTERVAL_SECONDS); // 输出间隔 err = ring_buffer__poll(rb, RING_BUFFER_TIMEOUT_MS /* timeout, ms */); /* Ctrl-C will cause -EINTR */ if (err == -EINTR) { From 3bd81cc707f568943bb053711b40b1c511597b76 Mon Sep 17 00:00:00 2001 From: zhangzihengya Date: Fri, 2 Feb 2024 15:18:19 +0800 Subject: [PATCH 20/24] add schedule_develop.md --- .../eBPF_proc_image/docs/schedule_develop.md | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/schedule_develop.md diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/schedule_develop.md b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/schedule_develop.md new file mode 100644 index 000000000..b1d8d760c --- /dev/null +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/schedule_develop.md @@ -0,0 +1,31 @@ +# schedule功能类开发文档 + +**参考资料:** + +- [Linux 的调度延迟 - 原理与观测 - 知乎 (zhihu.com)](https://zhuanlan.zhihu.com/p/462728452) +- [linux 内核抢占那些事 - 知乎 (zhihu.com)](https://zhuanlan.zhihu.com/p/166032722) + +**调度延迟的计算得分两种情况:** + +1. 任务因等待 event 进入休眠态([Voluntary Switch](https://zhuanlan.zhihu.com/p/402423877)),那么就是从被唤醒("wakeup/wakeup_new" 的时间点),到获得 CPU (任务切换时的 *"next_pid"*)的间隔。 +2. 任务因 [Involuntary Switch](https://zhuanlan.zhihu.com/p/402423877) 让出 CPU(任务切换时作为 *"prev_pid"*),到再次获得 CPU (之后的某次任务切换时作为*"next_pid"*)所经历的时间。在这期间,任务始终在 runqueue 上,始终是 runnable 的状态,所以有 "prev_state" 是否为 *TASK_RUNNING* 的判断。 + +**内核中提供了三个接口来唤醒进程:** + +- wake_up_new_task:用来唤醒新进程,fork出来的进程/线程; +- wake_up_process:唤醒处于TASK_NORMAL状态的进程; +- wake_up_state:唤醒指定状态的进程; + +后两个接口最终都会调用try_to_wake_up接口: + +``` +try_to_wake_up-->ttwu_queue-->ttwu_do_active-->ttwu_do_wakeup +``` + +**相关挂载点:** + +``` +ttwu_do_wakeup() --> trace_sched_wakeup +wake_up_new_task() --> trace_sched_wakeup_new +__schedule() --> trace_sched_switch +``` \ No newline at end of file From 11b7d6fbb1a0677f98f30784a5df0923d51adfe3 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 17:55:08 +0800 Subject: [PATCH 21/24] update kvm_watcher.c --- eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index 804eb3a3f..a4ae9d307 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -826,6 +826,9 @@ int main(int argc, char **argv) { if (err < 0) { printf("Save count dirty page map to file fail: %d\n", err); goto cleanup; + }else{ + printf("Save count dirty page map to file success!\n"); + goto cleanup; } } cleanup: From 6f60116c9779182fc3b357f039f7a28d062d7675 Mon Sep 17 00:00:00 2001 From: nanshuaibo Date: Fri, 2 Feb 2024 17:57:27 +0800 Subject: [PATCH 22/24] update kvm_watcher.c --- eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c index a4ae9d307..127b5c69c 100644 --- a/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c +++ b/eBPF_Supermarket/kvm_watcher/src/kvm_watcher.c @@ -827,7 +827,7 @@ int main(int argc, char **argv) { printf("Save count dirty page map to file fail: %d\n", err); goto cleanup; }else{ - printf("Save count dirty page map to file success!\n"); + printf("\nSave count dirty page map to file success!\n"); goto cleanup; } } From 9044654657d07059c36456b2c0b8082578d5cad3 Mon Sep 17 00:00:00 2001 From: Y_y_s <78297703+Monkey857@users.noreply.github.com> Date: Tue, 6 Feb 2024 10:21:36 +0800 Subject: [PATCH 23/24] =?UTF-8?q?kvm=5Fwatcher=E9=A1=B9=E7=9B=AE=EF=BC=9A?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0kvm=5Fexit=E4=BA=8B=E4=BB=B6=E7=9B=91?= =?UTF-8?q?=E6=B5=8B=E5=8A=9F=E8=83=BD=20(#673)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add kvmexit watcher * Update kvmexit.py --- .../kvm_watcher/kvm_exit_bcc/kvmexit.py | 378 ++++++++++++++++++ .../kvm_exit_bcc/kvmexit_example.txt | 250 ++++++++++++ 2 files changed, 628 insertions(+) create mode 100644 eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py create mode 100644 eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt diff --git a/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py new file mode 100644 index 000000000..dd157488f --- /dev/null +++ b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python +# +# kvmexit.py +# +# Display the exit_reason and its statistics of each vm exit +# for all vcpus of all virtual machines. For example: +# $./kvmexit.py +# PID TID KVM_EXIT_REASON COUNT +# 1273551 1273568 EXIT_REASON_MSR_WRITE 6 +# 1274253 1274261 EXIT_REASON_EXTERNAL_INTERRUPT 1 +# 1274253 1274261 EXIT_REASON_HLT 12 +# ... +# +# Besides, we also allow users to specify one pid, tid(s), or one +# pid and its vcpu. See kvmexit_example.txt for more examples. +# +# @PID: each vitual machine's pid in the user space. +# @TID: the user space's thread of each vcpu of that virtual machine. +# @KVM_EXIT_REASON: the reason why the vm exits. +# @COUNT: the counts of the @KVM_EXIT_REASONS. +# +# REQUIRES: Linux 4.7+ (BPF_PROG_TYPE_TRACEPOINT support) +# +# Copyright (c) 2024 YYS. All rights reserved. +# Original code © 2024 ByteDance Inc. All rights reserved. +# Author(s): +# YYS +# 以下代码段是根据Fei Li的实现进行的修改 +# 原始代码链接:https://github.com/iovisor/bcc/blob/master/tools/kvmexit.py + + +from __future__ import print_function +from time import sleep +from bcc import BPF +import argparse +import multiprocessing +import os +import subprocess + +# +# Process Arguments +# +def valid_args_list(args): + args_list = args.split(",") + for arg in args_list: + try: + int(arg) + except: + raise argparse.ArgumentTypeError("must be valid integer") + return args_list + +# arguments +examples = """examples: + ./kvmexit # Display kvm_exit_reason and its statistics in real-time until Ctrl-C + ./kvmexit 5 # Display in real-time after sleeping 5s + ./kvmexit -p 3195281 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order + ./kvmexit -p 3195281 20 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order, and display after sleeping 20s + ./kvmexit -p 3195281 -v 0 # Display only vcpu0 for pid 3195281, descending sort by default + ./kvmexit -p 3195281 -a # Display all tids for pid 3195281 + ./kvmexit -t 395490 # Display only for tid 395490 with exit reasons sorted in descending order + ./kvmexit -t 395490 20 # Display only for tid 395490 with exit reasons sorted in descending order after sleeping 20s + ./kvmexit -T '395490,395491' # Display for a union like {395490, 395491} +""" +parser = argparse.ArgumentParser( + description="Display kvm_exit_reason and its statistics at a timed interval", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("duration", nargs="?", default=99999999, type=int, help="show delta for next several seconds") +parser.add_argument("-p", "--pid", type=int, help="trace this PID only") +exgroup = parser.add_mutually_exclusive_group() +exgroup.add_argument("-t", "--tid", type=int, help="trace this TID only") +exgroup.add_argument("-T", "--tids", type=valid_args_list, help="trace a comma separated series of tids with no space in between") +exgroup.add_argument("-v", "--vcpu", type=int, help="trace this vcpu only") +exgroup.add_argument("-a", "--alltids", action="store_true", help="trace all tids for this pid") +args = parser.parse_args() +duration = int(args.duration) + +# +# Setup BPF +# + +# load BPF program +bpf_text = """ +#include + +#define REASON_NUM 76 +#define TGID_NUM 1024 + +struct exit_count { + u64 exit_ct[REASON_NUM]; +}; +BPF_PERCPU_ARRAY(init_value, struct exit_count, 1); +BPF_TABLE("percpu_hash", u64, struct exit_count, pcpu_kvm_stat, TGID_NUM); + +struct cache_info { + u64 cache_pid_tgid; + struct exit_count cache_exit_ct; +}; +BPF_PERCPU_ARRAY(pcpu_cache, struct cache_info, 1); + +TRACEPOINT_PROBE(kvm, kvm_exit) { + int cache_miss = 0; + int zero = 0; + u32 er = args->exit_reason; + if (er >= REASON_NUM) { + return 0; + } + + u64 cur_pid_tgid = bpf_get_current_pid_tgid(); + u32 tgid = cur_pid_tgid >> 32; + u32 pid = cur_pid_tgid; + + if (THREAD_FILTER) + return 0; + + struct exit_count *tmp_info = NULL, *initial = NULL; + struct cache_info *cache_p; + cache_p = pcpu_cache.lookup(&zero); + if (cache_p == NULL) { + return 0; + } + + if (cache_p->cache_pid_tgid == cur_pid_tgid) { + //a. If the cur_pid_tgid hit this physical cpu consecutively, save it to pcpu_cache + tmp_info = &cache_p->cache_exit_ct; + } else { + //b. If another pid_tgid matches this pcpu for the last hit, OR it is the first time to hit this physical cpu. + cache_miss = 1; + + // b.a Try to load the last cache struct if exists. + tmp_info = pcpu_kvm_stat.lookup(&cur_pid_tgid); + + // b.b If it is the first time for the cur_pid_tgid to hit this pcpu, employ a + // per_cpu array to initialize pcpu_kvm_stat's exit_count with each exit reason's count is zero + if (tmp_info == NULL) { + initial = init_value.lookup(&zero); + if (initial == NULL) { + return 0; + } + + pcpu_kvm_stat.update(&cur_pid_tgid, initial); + tmp_info = pcpu_kvm_stat.lookup(&cur_pid_tgid); + // To pass the verifier + if (tmp_info == NULL) { + return 0; + } + } + } + + if (er < REASON_NUM) { + tmp_info->exit_ct[er]++; + if (cache_miss == 1) { + if (cache_p->cache_pid_tgid != 0) { + // b.*.a Let's save the last hit cache_info into kvm_stat. + pcpu_kvm_stat.update(&cache_p->cache_pid_tgid, &cache_p->cache_exit_ct); + } + // b.* As the cur_pid_tgid meets current pcpu_cache_array for the first time, save it. + cache_p->cache_pid_tgid = cur_pid_tgid; + bpf_probe_read(&cache_p->cache_exit_ct, sizeof(*tmp_info), tmp_info); + } + return 0; + } + + return 0; +} +""" + +# format output +exit_reasons = ( + "EXCEPTION_NMI", + "EXTERNAL_INTERRUPT", + "TRIPLE_FAULT", + "INIT_SIGNAL", + "SIPI_SIGNAL ", + "N/A", + "N/A", + "INTERRUPT_WINDOW", + "NMI_WINDOW", + "TASK_SWITCH", + "CPUID", + "N/A", + "HLT", + "INVD", + "INVLPG", + "RDPMC", + "RDTSC", + "N/A", + "VMCALL", + "VMCLEAR", + "VMLAUNCH", + "VMPTRLD", + "VMPTRST", + "VMREAD", + "VMRESUME", + "VMWRITE", + "VMOFF", + "VMON", + "CR_ACCESS", + "DR_ACCESS", + "IO_INSTRUCTION", + "MSR_READ", + "MSR_WRITE", + "INVALID_STATE", + "MSR_LOAD_FAIL", + "N/A", + "MWAIT_INSTRUCTION", + "MONITOR_TRAP_FLAG", + "N/A", + "MONITOR_INSTRUCTION", + "PAUSE_INSTRUCTION", + "MCE_DURING_VMENTRY", + "N/A", + "TPR_BELOW_THRESHOLD", + "APIC_ACCESS", + "EOI_INDUCED", + "GDTR_IDTR", + "LDTR_TR", + "EPT_VIOLATION", + "EPT_MISCONFIG", + "INVEPT", + "RDTSCP", + "PREEMPTION_TIMER", + "INVVPID", + "WBINVD", + "XSETBV", + "APIC_WRITE", + "RDRAND", + "INVPCID", + "VMFUNC", + "ENCLS", + "RDSEED", + "PML_FULL", + "XSAVES", + "XRSTORS", + "N/A", + "N/A", + "UMWAIT", + "TPAUSE", + "N/A", + "N/A", + "N/A", + "N/A", + "N/A", + "BUS_LOCK", + "NOTIFY " +) + +# +# Do some checks +# +try: + # Currently, only adapte on intel architecture + cmd = "cat /proc/cpuinfo | grep vendor_id | head -n 1" + arch_info = subprocess.check_output(cmd, shell=True).strip() + if b"Intel" in arch_info: + pass + else: + raise Exception("Currently we only support Intel architecture, please do expansion if needs more.") + + # Check if kvm module is loaded + if os.access("/dev/kvm", os.R_OK | os.W_OK): + pass + else: + raise Exception("Please insmod kvm module to use kvmexit tool.") +except Exception as e: + raise Exception("Failed to do precondition check, due to: %s." % e) + +def find_tid(tgt_dir, tgt_vcpu): + for tid in os.listdir(tgt_dir): + path = tgt_dir + "/" + tid + "/comm" + fp = open(path, "r") + comm = fp.read() + if (comm.find(tgt_vcpu) != -1): + return tid + return -1 + +# set process/thread filter +thread_context = "" +header_format = "" +need_collapse = not args.alltids +if args.tid is not None: + thread_context = "TID %s" % args.tid + thread_filter = 'pid != %s' % args.tid +elif args.tids is not None: + thread_context = "TIDS %s" % args.tids + thread_filter = "pid != " + " && pid != ".join(args.tids) + header_format = "TIDS " +elif args.pid is not None: + thread_context = "PID %s" % args.pid + thread_filter = 'tgid != %s' % args.pid + if args.vcpu is not None: + thread_context = "PID %s VCPU %s" % (args.pid, args.vcpu) + # transfer vcpu to tid + tgt_dir = '/proc/' + str(args.pid) + '/task' + tgt_vcpu = "CPU " + str(args.vcpu) + args.tid = find_tid(tgt_dir, tgt_vcpu) + if args.tid == -1: + raise Exception("There's no v%s for PID %d." % (tgt_vcpu, args.pid)) + thread_filter = 'pid != %s' % args.tid + elif args.alltids: + thread_context = "PID %s and its all threads" % args.pid + header_format = "TID " +else: + thread_context = "all threads" + thread_filter = '0' + header_format = "PID TID " +bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) +b = BPF(text=bpf_text) + + +# header +print("Display kvm exit reasons and statistics for %s" % thread_context, end="") +if duration < 99999999: + print(" after sleeping %d secs." % duration) +else: + print("... Hit Ctrl-C to end.") + +try: + sleep(duration) +except KeyboardInterrupt: + print() + + +# Currently, sort multiple tids in descending order is not supported. +if (args.pid or args.tid): + ct_reason = [] + if args.pid: + tgid_exit = [0 for i in range(len(exit_reasons))] + +# output +print("%s%-35s %s" % (header_format, "KVM_EXIT_REASON", "COUNT")) + +pcpu_kvm_stat = b["pcpu_kvm_stat"] +pcpu_cache = b["pcpu_cache"] +for k, v in pcpu_kvm_stat.items(): + tgid = k.value >> 32 + pid = k.value & 0xffffffff + for i in range(0, len(exit_reasons)): + sum1 = 0 + for inner_cpu in range(0, multiprocessing.cpu_count()): + cachePIDTGID = pcpu_cache[0][inner_cpu].cache_pid_tgid + # Take priority to check if it is in cache + if cachePIDTGID == k.value: + sum1 += pcpu_cache[0][inner_cpu].cache_exit_ct.exit_ct[i] + # If not in cache, find from kvm_stat + else: + sum1 += v[inner_cpu].exit_ct[i] + if sum1 == 0: + continue + + if (args.pid and args.pid == tgid and need_collapse): + tgid_exit[i] += sum1 + elif (args.tid and args.tid == pid): + ct_reason.append((sum1, i)) + elif not need_collapse or args.tids: + print("%-8u %-35s %-8u" % (pid, exit_reasons[i], sum1)) + else: + print("%-8u %-8u %-35s %-8u" % (tgid, pid, exit_reasons[i], sum1)) + + # Display only for the target tid in descending sort + if (args.tid and args.tid == pid): + ct_reason.sort(reverse=True) + for i in range(0, len(ct_reason)): + if ct_reason[i][0] == 0: + continue + print("%-35s %-8u" % (exit_reasons[ct_reason[i][1]], ct_reason[i][0])) + break + + +# Aggregate all tids' counts for this args.pid in descending sort +if args.pid and need_collapse: + for i in range(0, len(exit_reasons)): + ct_reason.append((tgid_exit[i], i)) + ct_reason.sort(reverse=True) + for i in range(0, len(ct_reason)): + if ct_reason[i][0] == 0: + continue + print("%-35s %-8u" % (exit_reasons[ct_reason[i][1]], ct_reason[i][0])) diff --git a/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt new file mode 100644 index 000000000..3ee773bbe --- /dev/null +++ b/eBPF_Supermarket/kvm_watcher/kvm_exit_bcc/kvmexit_example.txt @@ -0,0 +1,250 @@ +Demonstrations of kvm exit reasons, the Linux eBPF/bcc version. + + +Considering virtual machines' frequent exits can cause performance problems, +this tool aims to locate the frequent exited reasons and then find solutions +to reduce or even avoid the exit, by displaying the detail exit reasons and +the counts of each vm exit for all vms running on one physical machine. + + +Features of this tool +===================== + +- Although there is a patch: [KVM: x86: add full vm-exit reason debug entries] + (https://patchwork.kernel.org/project/kvm/patch/1555939499-30854-1-git-send-email-pizhenwei@bytedance.com/) + trying to fill more vm-exit reason debug entries, just as the comments said, + the code allocates lots of memory that may never be consumed, misses some + arch-specific kvm causes, and can not do kernel aggregation. Instead bcc, as + a user space tool, can implement all these functions more easily and flexibly. +- The bcc python logic could provide nice kernel aggregation and custom output, + like collpasing all tids for one pid (e.i. one vm's qemu process id) with exit + reasons sorted in descending order. For more information, see the following + #USAGE message. +- The bpf in-kernel percpu_array and percpu_cache further improves performance. + For more information, see the following #Help to understand. + + +Limited +======= + +In view of the hardware-assisted virtualization technology of +different architectures, currently we only adapt on vmx in intel. +And the amd feature is on the road.. + + +Example output: +=============== + +# ./kvmexit.py +Display kvm exit reasons and statistics for all threads... Hit Ctrl-C to end. +PID TID KVM_EXIT_REASON COUNT +^C1273551 1273568 EXIT_REASON_HLT 12 +1273551 1273568 EXIT_REASON_MSR_WRITE 6 +1274253 1274261 EXIT_REASON_EXTERNAL_INTERRUPT 1 +1274253 1274261 EXIT_REASON_HLT 12 +1274253 1274261 EXIT_REASON_MSR_WRITE 4 + +# ./kvmexit.py 6 +Display kvm exit reasons and statistics for all threads after sleeping 6 secs. +PID TID KVM_EXIT_REASON COUNT +1273903 1273922 EXIT_REASON_EXTERNAL_INTERRUPT 175 +1273903 1273922 EXIT_REASON_CPUID 10 +1273903 1273922 EXIT_REASON_HLT 6043 +1273903 1273922 EXIT_REASON_IO_INSTRUCTION 24 +1273903 1273922 EXIT_REASON_MSR_WRITE 15025 +1273903 1273922 EXIT_REASON_PAUSE_INSTRUCTION 11 +1273903 1273922 EXIT_REASON_EOI_INDUCED 12 +1273903 1273922 EXIT_REASON_EPT_VIOLATION 6 +1273903 1273922 EXIT_REASON_EPT_MISCONFIG 380 +1273903 1273922 EXIT_REASON_PREEMPTION_TIMER 194 +1273551 1273568 EXIT_REASON_EXTERNAL_INTERRUPT 18 +1273551 1273568 EXIT_REASON_HLT 989 +1273551 1273568 EXIT_REASON_IO_INSTRUCTION 10 +1273551 1273568 EXIT_REASON_MSR_WRITE 2205 +1273551 1273568 EXIT_REASON_PAUSE_INSTRUCTION 1 +1273551 1273568 EXIT_REASON_EOI_INDUCED 5 +1273551 1273568 EXIT_REASON_EPT_MISCONFIG 61 +1273551 1273568 EXIT_REASON_PREEMPTION_TIMER 14 + +# ./kvmexit.py -p 1273795 5 +Display kvm exit reasons and statistics for PID 1273795 after sleeping 5 secs. +KVM_EXIT_REASON COUNT +MSR_WRITE 13467 +HLT 5060 +PREEMPTION_TIMER 345 +EPT_MISCONFIG 264 +EXTERNAL_INTERRUPT 169 +EPT_VIOLATION 18 +PAUSE_INSTRUCTION 6 +IO_INSTRUCTION 4 +EOI_INDUCED 2 + +# ./kvmexit.py -p 1273795 5 -a +Display kvm exit reasons and statistics for PID 1273795 and its all threads after sleeping 5 secs. +TID KVM_EXIT_REASON COUNT +1273819 EXTERNAL_INTERRUPT 64 +1273819 HLT 2802 +1273819 IO_INSTRUCTION 4 +1273819 MSR_WRITE 7196 +1273819 PAUSE_INSTRUCTION 2 +1273819 EOI_INDUCED 2 +1273819 EPT_VIOLATION 6 +1273819 EPT_MISCONFIG 162 +1273819 PREEMPTION_TIMER 194 +1273820 EXTERNAL_INTERRUPT 78 +1273820 HLT 2054 +1273820 MSR_WRITE 5199 +1273820 EPT_VIOLATION 2 +1273820 EPT_MISCONFIG 77 +1273820 PREEMPTION_TIMER 102 + +# ./kvmexit.py -p 1273795 -v 0 +Display kvm exit reasons and statistics for PID 1273795 VCPU 0... Hit Ctrl-C to end. +KVM_EXIT_REASON COUNT +^CMSR_WRITE 2076 +HLT 795 +PREEMPTION_TIMER 86 +EXTERNAL_INTERRUPT 20 +EPT_MISCONFIG 10 +PAUSE_INSTRUCTION 2 +IO_INSTRUCTION 2 +EPT_VIOLATION 1 +EOI_INDUCED 1 + +# ./kvmexit.py -p 1273795 -v 0 4 +Display kvm exit reasons and statistics for PID 1273795 VCPU 0 after sleeping 4 secs. +KVM_EXIT_REASON COUNT +MSR_WRITE 4726 +HLT 1827 +PREEMPTION_TIMER 78 +EPT_MISCONFIG 67 +EXTERNAL_INTERRUPT 28 +IO_INSTRUCTION 4 +EOI_INDUCED 2 +PAUSE_INSTRUCTION 2 + +# ./kvmexit.py -p 1273795 -v 4 4 +Traceback (most recent call last): + File "tools/kvmexit.py", line 306, in + raise Exception("There's no v%s for PID %d." % (tgt_vcpu, args.pid)) + Exception: There's no vCPU 4 for PID 1273795. + +# ./kvmexit.py -t 1273819 10 +Display kvm exit reasons and statistics for TID 1273819 after sleeping 10 secs. +KVM_EXIT_REASON COUNT +MSR_WRITE 13318 +HLT 5274 +EPT_MISCONFIG 263 +PREEMPTION_TIMER 171 +EXTERNAL_INTERRUPT 109 +IO_INSTRUCTION 8 +PAUSE_INSTRUCTION 5 +EOI_INDUCED 4 +EPT_VIOLATION 2 + +# ./kvmexit.py -T '1273820,1273819' +Display kvm exit reasons and statistics for TIDS ['1273820', '1273819']... Hit Ctrl-C to end. +TIDS KVM_EXIT_REASON COUNT +^C1273819 EXTERNAL_INTERRUPT 300 +1273819 HLT 13718 +1273819 IO_INSTRUCTION 26 +1273819 MSR_WRITE 37457 +1273819 PAUSE_INSTRUCTION 13 +1273819 EOI_INDUCED 13 +1273819 EPT_VIOLATION 53 +1273819 EPT_MISCONFIG 654 +1273819 PREEMPTION_TIMER 958 +1273820 EXTERNAL_INTERRUPT 212 +1273820 HLT 9002 +1273820 MSR_WRITE 25495 +1273820 PAUSE_INSTRUCTION 2 +1273820 EPT_VIOLATION 64 +1273820 EPT_MISCONFIG 396 +1273820 PREEMPTION_TIMER 268 + + +Help to understand +================== + +We use a PERCPU_ARRAY: pcpuArrayA and a percpu_hash: hashA to collaboratively +store each kvm exit reason and its count. The reason is there exists a rule when +one vcpu exits and re-enters, it tends to continue to run on the same physical +cpu (pcpu as follows) as the last cycle, which is also called 'cache hit'. Thus +we turn to use a PERCPU_ARRAY to record the 'cache hit' situation to speed +things up; and for other cases, then use a percpu_hash. + +BTW, we originally use a common hash to do this, with a u64(exit_reason) +key and a struct exit_info {tgid_pid, exit_reason} value. But due to +the big lock in bpf_hash, each updating is quite performance consuming. + +Now imagine here is a pid_tgidA (vcpu A) exits and is going to run on +pcpuArrayA, the BPF code flow is as follows: + + pid_tgidA keeps running on the same pcpu + // \\ + // \\ + // Y N \\ + // \\ + a. cache_hit b. cache_miss +(cacheA's pid_tgid matches pid_tgidA) || + | || + | || + "increase percpu exit_ct and return" || + [*Note*] || + pid_tgidA ever been exited on pcpuArrayA? + // \\ + // \\ + // \\ + // Y N \\ + // \\ + b.a load_last_hashA b.b initialize_hashA_with_zero + \ / + \ / + \ / + "increase percpu exit_ct" + || + || + is another pid_tgid been running on pcpuArrayA? + // \\ + // Y N \\ + // \\ + b.*.a save_theLastHit_hashB do_nothing + \\ // + \\ // + \\ // + b.* save_to_pcpuArrayA + + +[*Note*] we do not update the table in above "a.", in case the vcpu hit the same +pcpu again when exits next time, instead we only update until this pcpu is not +hitted by the same tgidpid(vcpu) again, which is in "b.*.a" and "b.*". + + +USAGE message: +============== + +# ./kvmexit.py -h +usage: kvmexit.py [-h] [-p PID [-v VCPU | -a] ] [-t TID | -T 'TID1,TID2'] [duration] + +Display kvm_exit_reason and its statistics at a timed interval + +optional arguments: + -h, --help show this help message and exit + -p PID, --pid PID display process with this PID only, collpase all tids with exit reasons sorted in descending order + -v VCPU, --v VCPU display this VCPU only for this PID + -a, --alltids display all TIDS for this PID + -t TID, --tid TID display thread with this TID only with exit reasons sorted in descending order + -T 'TID1,TID2', --tids 'TID1,TID2' + display threads for a union like {395490, 395491} + duration duration of display, after sleeping several seconds + +examples: + ./kvmexit # Display kvm_exit_reason and its statistics in real-time until Ctrl-C + ./kvmexit 5 # Display in real-time after sleeping 5s + ./kvmexit -p 3195281 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order + ./kvmexit -p 3195281 20 # Collpase all tids for pid 3195281 with exit reasons sorted in descending order, and display after sleeping 20s + ./kvmexit -p 3195281 -v 0 # Display only vcpu0 for pid 3195281, descending sort by default + ./kvmexit -p 3195281 -a # Display all tids for pid 3195281 + ./kvmexit -t 395490 # Display only for tid 395490 with exit reasons sorted in descending order + ./kvmexit -t 395490 20 # Display only for tid 395490 with exit reasons sorted in descending order after sleeping 20s + ./kvmexit -T '395490,395491' # Display for a union like {395490, 395491} \ No newline at end of file From 3cd317ba520c52b9abf40ed9d2025f0cd5d1c7e3 Mon Sep 17 00:00:00 2001 From: helight Date: Tue, 6 Feb 2024 10:31:03 +0800 Subject: [PATCH 24/24] Update CODEOWNERS --- CODEOWNERS | 1 - 1 file changed, 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index 0c0ae2c79..5fbf2e9a6 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -5,5 +5,4 @@ /eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image @helight @LinkinPF @chenamy2017 @zhangzihengya /eBPF_Supermarket/Stack_Analyser @helight @LinkinPF @chenamy2017 @GorilaMond /eBPF_Supermarket/kvm_watcher @helight @LinkinPF @chenamy2017 @nanshuaibo -/eBPF_Supermarket/Stack_Analyser @helight @LinkinPF @chenamy2017 @nanshuaibo /eBPF_Supermarket/Memory_Subsystem @helight @LinkinPF @chenamy2017