Skip to content

Commit

Permalink
mem_watcher:添加直接回收追踪的功能,修改流水线 (#888)
Browse files Browse the repository at this point in the history
* mem_watcher:添加跟踪虚拟内存管理的功能

* mem_watcher:添加直接回收追踪的功能

* mem_watcher:添加直接回收追踪的功能

* mem_watcher:添加直接回收追踪的功能

* mem_watcher:添加直接回收追踪的功能

* mem_watcher:添加直接回收追踪的功能

* mem_watcher:添加直接回收追踪的功能
  • Loading branch information
syxl-time authored Sep 13, 2024
1 parent a4e8e61 commit ad04464
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 9 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ebpf_mem_watcher.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ jobs:
- name: Run mem_watcher
continue-on-error: true
run: |
cd eBPF_Supermarket/Memory_Subsystem/mem_watcher/
cd eBPF_Supermarket/Memory_Subsystem/vmlinux/x86
bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
cd ../../mem_watcher
make
sudo ./mem_watcher -f -i 10
sudo timeout 20 ./mem_watcher
2 changes: 1 addition & 1 deletion eBPF_Supermarket/Memory_Subsystem/mem_watcher/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(LIBB
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)

APPS = paf pr procstat sysstat memleak fraginfo vmasnap oomkiller
APPS = paf pr procstat sysstat memleak fraginfo vmasnap drsnoop oomkiller

TARGETS= mem_watcher
CARGO ?= $(shell which cargo)
Expand Down
90 changes: 90 additions & 0 deletions eBPF_Supermarket/Memory_Subsystem/mem_watcher/bpf/drsnoop.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include "mem_watcher.h"

char LICENSE[] SEC("license") = "Dual BSD/GPL";

// Define BPF maps
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 256 * 1024);
__type(key, u64);
__type(value, struct val_t);
} start SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 256 * 1024);
} rb SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__type(key, u32);
__type(value, u64);
} vm_stat_map SEC(".maps");

struct trace_event_raw_mm_vmscan_direct_reclaim_end_template___x {
long unsigned int nr_reclaimed;
} __attribute__((preserve_access_index));

SEC("tracepoint/vmscan/mm_vmscan_direct_reclaim_begin")
int trace_mm_vmscan_direct_reclaim_begin(void *ctx) {
struct val_t val = {};
u64 id = bpf_get_current_pid_tgid();
u64 *vm_stat_addr;
__u32 key = 0; // 使用与用户态相同的键值

// Capture start timestamp and process information
if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
val.id = id;
val.ts = bpf_ktime_get_ns();
// Retrieve the vm_stat address from the map
vm_stat_addr = bpf_map_lookup_elem(&vm_stat_map, &key);
if (vm_stat_addr) {
bpf_probe_read_kernel(&val.vm_stat, sizeof(val.vm_stat), (const void *)*vm_stat_addr);
}
else {
bpf_printk("vm_stat address not found in map\n");
}
bpf_map_update_elem(&start, &id, &val, BPF_ANY);
}

return 0;
}

SEC("tracepoint/vmscan/mm_vmscan_direct_reclaim_end")
int trace_mm_vmscan_direct_reclaim_end(void *ctx) {
struct trace_event_raw_mm_vmscan_direct_reclaim_end_template___x *args = ctx;

u64 id = bpf_get_current_pid_tgid();
struct val_t *valp;
struct data_t *data;
u64 ts = bpf_ktime_get_ns();

valp = bpf_map_lookup_elem(&start, &id);
if (!valp) {
bpf_printk("No start record found for PID %llu\n", id >> 32);
return 0;
}

data = bpf_ringbuf_reserve(&rb, sizeof(*data), 0);
if (!data) {
bpf_printk("Failed to reserve space in ringbuf\n");
return 0;
}

data->id = valp->id;
data->delta = ts - valp->ts;
data->ts = ts / 1000;
bpf_probe_read_kernel(&data->name, sizeof(data->name), valp->name);
bpf_probe_read_kernel(&data->vm_stat, sizeof(data->vm_stat), valp->vm_stat);
data->nr_reclaimed = BPF_CORE_READ(args, nr_reclaimed);

bpf_ringbuf_submit(data, 0);
bpf_map_delete_elem(&start, &id);

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pid_t user_pid = 0;
SEC("kprobe/finish_task_switch")
int BPF_KPROBE(finish_task_switch, struct task_struct *prev) {
struct procstat_event *e;
struct mm_rss_stat rss = {};
struct percpu_counter rss = {};
struct mm_struct *mms;
long long *t;
pid_t pid = bpf_get_current_pid_tgid() >> 32;
Expand Down Expand Up @@ -65,7 +65,7 @@ int BPF_KPROBE(finish_task_switch, struct task_struct *prev) {
e->nvcsw = BPF_CORE_READ(prev, nvcsw);
e->nivcsw = BPF_CORE_READ(prev, nivcsw);

rss = BPF_CORE_READ(prev, mm, rss_stat);
rss = *BPF_CORE_READ(prev, mm, rss_stat);
t = (long long *)(rss.count);
e->rssfile = *t;
e->rssanon = *(t + 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,36 @@ struct find_event_t {
unsigned long long vm_end;
};

/* drsnoop.h */
#define KALLSYMS_PATH "/proc/kallsyms"
#define VM_STAT_SYMBOL "vm_stat"
#define VM_ZONE_STAT_SYMBOL "vm_zone_stat"

#define NR_VM_ZONE_STAT_ITEMS 5
#define TASK_COMM_LEN 16
#define NR_FREE_PAGES 0

#define PAGE_SHIFT 12
#define K(x) ((x) << (PAGE_SHIFT - 10))

// Define structures used in maps and tracepoints
struct val_t {
unsigned long long id;
unsigned long long ts; // start time
char name[TASK_COMM_LEN];
unsigned long long vm_stat[NR_VM_ZONE_STAT_ITEMS];
};

struct data_t {
unsigned long long id;
unsigned long uid;
unsigned long long nr_reclaimed;
unsigned long long delta;
unsigned long long ts; // end time
char name[TASK_COMM_LEN];
unsigned long long vm_stat[NR_VM_ZONE_STAT_ITEMS];
};

/* OOM Killer Event */
struct event {
uint32_t triggered_pid; // 触发 OOM 的进程 PID
Expand Down
120 changes: 116 additions & 4 deletions eBPF_Supermarket/Memory_Subsystem/mem_watcher/mem_watcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include "fraginfo.skel.h"
#include "memleak.skel.h"
#include "vmasnap.skel.h"
#include "drsnoop.skel.h"

#include "mem_watcher.h"
#include "fraginfo.h"
#include "oomkiller.skel.h"
Expand Down Expand Up @@ -263,6 +265,7 @@ static struct env
bool memleak; // 是否启用内核态/用户态内存泄漏检测
bool fraginfo; // 是否启用内存碎片信息
bool vmasnap; // 是否启用虚拟内存区域信息
bool drsnoop;
bool kernel_trace; // 是否启用内核态跟踪
bool print_time; // 是否打印地址申请时间
int interval; // 打印间隔,单位为秒
Expand All @@ -281,6 +284,7 @@ static struct env
.memleak = false, // 默认关闭内存泄漏检测
.fraginfo = false, // 默认关闭内存碎片信息
.vmasnap = false, // 默认关闭虚拟内存区域信息
.drsnoop = false,
.kernel_trace = true, // 默认启用内核态跟踪
.print_time = false, // 默认不打印地址申请时间
.rss = false, // 默认不打印进程页面信息
Expand Down Expand Up @@ -328,7 +332,10 @@ static const struct argp_option opts[] = {
{0, 0, 0, 0, "vmasnap:", 13},
{"vmasnap", 'v', 0, 0, "print vmasnap (虚拟内存区域信息)"},

{0, 0, 0, 0, "oomkiller:", 14}, // 新增的 oomkiller 选项
{0, 0, 0, 0, "drsnoop:", 14},
{"drsnoop", 'b', 0, 0, "print drsnoop (直接回收追踪信息)"},

{0, 0, 0, 0, "oomkiller:", 15}, // 新增的 oomkiller 选项
{"oomkiller", 'o', 0, 0, "print oomkiller (内存不足时被杀死的进程信息)"},

{NULL, 'h', NULL, OPTION_HIDDEN, "show the full help"},
Expand Down Expand Up @@ -374,6 +381,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'l':
env.memleak = true;
break;
case 'b':
env.drsnoop = true;
break;
case 'm':
env.print_time = true;
break;
Expand Down Expand Up @@ -411,10 +421,12 @@ static void print_frame(const char *name, uintptr_t input_addr, uintptr_t addr,
static void show_stack_trace(__u64 *stack, int stack_sz, pid_t pid);
static int print_outstanding_allocs(struct memleak_bpf *skel);
static int print_outstanding_combined_allocs(struct memleak_bpf *skel, pid_t pid);
static int get_vm_stat_addr(__u64 *addr);
static int handle_event_paf(void *ctx, void *data, size_t data_sz);
static int handle_event_pr(void *ctx, void *data, size_t data_sz);
static int handle_event_procstat(void *ctx, void *data, size_t data_sz);
static int handle_event_sysstat(void *ctx, void *data, size_t data_sz);
static int handle_event_drsnoop(void *ctx, void *data, size_t data_sz);
static int attach_uprobes(struct memleak_bpf *skel);
static void print_flag_modifiers(int flag);
static int process_paf(struct paf_bpf *skel_paf);
Expand All @@ -424,6 +436,7 @@ static int process_sysstat(struct sysstat_bpf *skel_sysstat);
static int process_memleak(struct memleak_bpf *skel_memleak, struct env);
static int process_fraginfo(struct fraginfo_bpf *skel_fraginfo);
static int process_vmasnap(struct vmasnap_bpf *skel_vmasnap);
static int process_drsnoop(struct drsnoop_bpf *skel_drsnoop);
static int process_oomkiller(struct oomkiller_bpf *skel_oomkiller); // 新增的oomkiller处理函数原型
static int handle_event_oomkiller(void *ctx, void *data, size_t data_sz); // 新增的oomkiller事件处理函数
static __u64 adjust_time_to_program_start_time(__u64 first_query_time);
Expand All @@ -444,6 +457,7 @@ int main(int argc, char **argv)
struct fraginfo_bpf *skel_fraginfo;
struct vmasnap_bpf *skel_vmasnap;
struct oomkiller_bpf *skel_oomkiller;
struct drsnoop_bpf *skel_drsnoop;

err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
if (err)
Expand Down Expand Up @@ -505,6 +519,10 @@ int main(int argc, char **argv)
{
PROCESS_SKEL(skel_oomkiller, oomkiller); // 使用处理 oomkiller 的函数
}
else if (env.drsnoop)
{
PROCESS_SKEL(skel_drsnoop, drsnoop);
}

return 0;
}
Expand Down Expand Up @@ -884,6 +902,31 @@ void disable_kernel_tracepoints(struct memleak_bpf *skel)
bpf_program__set_autoload(skel->progs.memleak__mm_page_free, false);
}

static int get_vm_stat_addr(__u64 *addr)
{
FILE *file = fopen(KALLSYMS_PATH, "r");
if (!file) {
perror("fopen");
return -1;
}

char line[256];
while (fgets(line, sizeof(line), file)) {
unsigned long address;
char symbol[256];
if (sscanf(line, "%lx %*s %s", &address, symbol) == 2) {
if (strcmp(symbol, VM_STAT_SYMBOL) == 0 || strcmp(symbol, VM_ZONE_STAT_SYMBOL) == 0) {
*addr = address;
fclose(file);
return 0;
}
}
}

fclose(file);
return -1; // Symbol not found
}

// static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
// {
// return vfprintf(stderr, format, args);
Expand Down Expand Up @@ -1029,6 +1072,26 @@ static int handle_event_sysstat(void *ctx, void *data, size_t data_sz)
return 0;
}

static int handle_event_drsnoop(void *ctx, void *data, size_t data_sz)
{
const struct data_t *e = data;
struct tm *tm;
char ts[32];
time_t t;

time(&t);
tm = localtime(&t);
strftime(ts, sizeof(ts), "%H:%M:%S", tm);

__u64 delta_us = e->delta / 1000;
__u64 delta_ms = delta_us / 1000;
__u64 fractional_us = delta_us % 1000;

printf("%-8s %-16s %-7llu %-9llu %llu.%02llu\n", ts, e->name, e->id >> 32, K(e->vm_stat[NR_FREE_PAGES]), delta_ms, fractional_us);

return 0;
}

static int handle_event_oomkiller(void *ctx, void *data, size_t data_sz)
{
const struct event *e = data; // 假设事件结构为 struct event
Expand Down Expand Up @@ -1057,7 +1120,6 @@ static int handle_event_oomkiller(void *ctx, void *data, size_t data_sz)
return 0;
}


int attach_uprobes(struct memleak_bpf *skel)
{
ATTACH_UPROBE_CHECKED(skel, malloc, malloc_enter);
Expand Down Expand Up @@ -1542,6 +1604,56 @@ static int process_vmasnap(struct vmasnap_bpf *skel_vmasnap)
}

vmasnap_cleanup:
vmasnap_bpf__destroy(skel_vmasnap);
return 0;
vmasnap_bpf__destroy(skel_vmasnap);
return 0;
}

static int process_drsnoop(struct drsnoop_bpf *skel_drsnoop) {
int err;
struct ring_buffer *rb;

__u64 vm_stat_addr;
__u32 key = 0; // Key for the vm_stat_map

if (get_vm_stat_addr(&vm_stat_addr) != 0) {
fprintf(stderr, "Failed to get vm_stat or vm_zone_stat address\n");
return 1;
}

err = drsnoop_bpf__load(skel_drsnoop);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
return 1;
}

// Update BPF map with the address of vm_stat
err = bpf_map_update_elem(bpf_map__fd(skel_drsnoop->maps.vm_stat_map), &key, &vm_stat_addr, BPF_ANY);
if (err) {
fprintf(stderr, "Failed to update BPF map: %s\n", strerror(errno));
goto drsnoop_cleanup;
}

err = drsnoop_bpf__attach(skel_drsnoop);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto drsnoop_cleanup;
}

rb = ring_buffer__new(bpf_map__fd(skel_drsnoop->maps.rb), handle_event_drsnoop, NULL, NULL);
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
goto drsnoop_cleanup;
}

printf("%-8s %-16s %-7s %-9s %-7s\n", "TIME", "COMM", "PID", "FREE(KB)", "LAT(ms)");

POLL_RING_BUFFER(rb, 1000, err);\

drsnoop_cleanup:
/* 清理 */
ring_buffer__free(rb);
drsnoop_bpf__destroy(skel_drsnoop);

return err < 0 ? -err : 0;
}

0 comments on commit ad04464

Please sign in to comment.