Skip to content

Commit

Permalink
Merge branch 'linuxkerneltravel:develop' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
albertxu216 committed Feb 29, 2024
2 parents dd2d974 + b9c2551 commit da88c55
Show file tree
Hide file tree
Showing 27 changed files with 1,312 additions and 143 deletions.
28 changes: 2 additions & 26 deletions .github/workflows/kvm_watcher.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,8 @@ jobs:
steps:
- uses: actions/checkout@v3

- name: Install dependencies
run: |
sudo apt install clang libelf1 libelf-dev zlib1g-dev
sudo apt install libbpf-dev
sudo apt install linux-tools-$(uname -r)
sudo apt install linux-cloud-tools-$(uname -r)
sudo apt-get update && sudo apt-get install -y qemu-kvm
- name: Download Cirros image
run: |
wget http://download.cirros-cloud.net/0.5.1/cirros-0.5.1-x86_64-disk.img
- name: Load KVM module
run: |
sudo modprobe kvm && sudo modprobe kvm-intel
- name: Run QEMU to start VM
run: |
sudo qemu-system-x86_64 -enable-kvm -cpu host -m 2048 -drive file=cirros-0.5.1-x86_64-disk.img,format=qcow2 -boot c -nographic &
sleep 5
- name: Run kvm_watcher
- name: Test program execution
run: |
cd eBPF_Supermarket/kvm_watcher/
make
sudo ./kvm_watcher -w -t 2
sudo ./kvm_watcher -e -t 2 -s
sudo ./kvm_watcher -n -t 2
sudo ./kvm_watcher -d -t 2
sudo ./kvm_watcher -f -m -t 2
sudo ./kvm_watcher -c -t 2
make clean
make test
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,12 @@ tags
*.iml

nohup.out

# Virtual machine image file
eBPF_Supermarket/kvm_watcher/*.img

eBPF_Supermarket/kvm_watcher/**/*.o
eBPF_Supermarket/kvm_watcher/**/*.skel.h
eBPF_Supermarket/kvm_watcher/**/vmlinux.h
eBPF_Supermarket/kvm_watcher/kvm_watcher
eBPF_Supermarket/kvm_watcher/**/temp*
1 change: 0 additions & 1 deletion CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@
/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image @helight @LinkinPF @chenamy2017 @zhangzihengya
/eBPF_Supermarket/Stack_Analyser @helight @LinkinPF @chenamy2017 @GorilaMond
/eBPF_Supermarket/kvm_watcher @helight @LinkinPF @chenamy2017 @nanshuaibo
/eBPF_Supermarket/Stack_Analyser @helight @LinkinPF @chenamy2017 @nanshuaibo
/eBPF_Supermarket/Memory_Subsystem @helight @LinkinPF @chenamy2017
2 changes: 1 addition & 1 deletion eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ INCLUDES := -I$(OUTPUT) -I../libbpf/include/uapi -I$(LIBBLAZESYM_INC) -I./includ
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)

APPS = resource_image lock_image syscall_image keytime_image
APPS = resource_image lock_image syscall_image keytime_image schedule_image
TARGETS = proc_image

# Get Clang's default includes on this system. We'll explicitly add these dirs
Expand Down
6 changes: 4 additions & 2 deletions eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ proc_image 工具的参数信息:
| -l, --lock | 采集进程持有的用户态锁信息,包括用户态互斥锁、用户态读写锁(可持续开发) |
| -q, --quote | 在参数周围添加引号(") |
| -k, --keytime | 采集进程关键时间点的相关信息,包括execve、exit、fork、vfork、pthread_create |
| -S, --schedule | 采集进程的调度信息 |
| -a, --all | 启动所有的采集进程数据的功能 |
| -h, --help | 显示帮助信息 |

Expand All @@ -44,10 +45,11 @@ tools文件夹中的eBPF程序是按照进程生命周期中数据的类型分

| 工具 | 描述 |
| --------------- | ------------------------------- |
| lifecycle_image | 对进程上下CPU进行画像 |
| resource_image | 对进程的资源使用情况进行画像 |
| lock_image | 对进程/线程持有锁的区间进行画像 |
| keytime_image | 对进程的关键时间点进行画像 |
| newlife_image | 对新创建进程或线程进行画像 |
| syscall_image | 对进程的系统调用序列进行画像 |
| schedule_image | 对进程的调度信息进行画像 |

## 五、test_proc 测试程序

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,29 @@
//
// eBPF kernel-mode code that collects process resource usage

#include "vmlinux.h"
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
#include <linux/version.h>
#include "proc_image.h"

char LICENSE[] SEC("license") = "Dual BSD/GPL";

const volatile pid_t target_pid = -1;
const volatile int target_cpu_id = -1;
const volatile pid_t ignore_tgid = -1;

char LICENSE[] SEC("license") = "Dual BSD/GPL";

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 7000);
__uint(max_entries, 10240);
__type(key, struct proc_id);
__type(value, struct start_rsc);
} start SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 7000);
__uint(max_entries, 10240);
__type(key, struct proc_id);
__type(value, struct total_rsc);
} total SEC(".maps");
Expand All @@ -57,7 +57,7 @@ int kprobe__finish_task_switch(struct pt_regs *ctx)

if(prev_tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && prev_pid==target_pid) ||
(target_pid==0 && prev_pid==target_pid && prev_cpu==target_cpu_id))){
struct proc_id prev_pd = {0};
struct proc_id prev_pd = {};
prev_pd.pid = prev_pid;
if(prev_pid == 0) prev_pd.cpu_id = prev_cpu;

Expand All @@ -68,7 +68,7 @@ int kprobe__finish_task_switch(struct pt_regs *ctx)
}

if(bpf_map_lookup_elem(&total,&prev_pd) == NULL){
struct total_rsc prev_total = {0};
struct total_rsc prev_total = {};
long unsigned int memused;

// #if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 2, 0)
Expand Down Expand Up @@ -128,8 +128,8 @@ int kprobe__finish_task_switch(struct pt_regs *ctx)

if(next_tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && next_pid==target_pid) ||
(target_pid==0 && next_pid==target_pid && next_cpu==target_cpu_id))){
struct proc_id next_pd = {0};
struct start_rsc next_start={0};
struct proc_id next_pd = {};
struct start_rsc next_start={};

next_pd.pid = next_pid;
if(next_pid == 0) next_pd.cpu_id = next_cpu;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Copyright 2023 The LMP Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// author: [email protected]
//
// eBPF kernel-mode code that collects process schedule information

#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
#include "proc_image.h"

char LICENSE[] SEC("license") = "Dual BSD/GPL";

const volatile pid_t target_pid = -1;
const volatile int target_cpu_id = -1;
const volatile pid_t ignore_tgid = -1;

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10240);
__type(key, struct proc_id);
__type(value,struct schedule_event);
} proc_schedule SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10240);
__type(key, struct proc_id);
__type(value,bool);
} enable_add SEC(".maps");

SEC("tp_btf/sched_wakeup")
int BPF_PROG(sched_wakeup, struct task_struct *p)
{
pid_t pid = BPF_CORE_READ(p,pid);
int tgid = BPF_CORE_READ(p,tgid);
int cpu = bpf_get_smp_processor_id();

if(tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && pid==target_pid) ||
(target_pid==0 && pid==target_pid && cpu==target_cpu_id))){
struct schedule_event *schedule_event;
struct proc_id pd = {};
u64 current_time = bpf_ktime_get_ns();

pd.pid = pid;
if(pid == 0) pd.cpu_id = cpu;
schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd);
if(!schedule_event){
struct schedule_event schedule_event = {};
bool e_add = false;

schedule_event.pid = pid;
// 提前将 count 值赋值为 1,避免输出时进程还没有被调度,导致除数出现 0 的情况
schedule_event.count = 1;
schedule_event.enter_time = current_time;

bpf_map_update_elem(&enable_add,&pd,&e_add,BPF_ANY);
bpf_map_update_elem(&proc_schedule,&pd,&schedule_event,BPF_ANY);
}else{
schedule_event->enter_time = current_time;
}
}

return 0;
}

SEC("tp_btf/sched_wakeup_new")
int BPF_PROG(sched_wakeup_new, struct task_struct *p)
{
pid_t pid = BPF_CORE_READ(p,pid);
int tgid = BPF_CORE_READ(p,tgid);
int cpu = bpf_get_smp_processor_id();

if(tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && pid==target_pid) ||
(target_pid==0 && pid==target_pid && cpu==target_cpu_id))){
struct schedule_event *schedule_event;
struct proc_id pd = {};
u64 current_time = bpf_ktime_get_ns();

pd.pid = pid;
if(pid == 0) pd.cpu_id = cpu;
schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd);
if(!schedule_event){
struct schedule_event schedule_event = {};
bool e_add = false;

schedule_event.pid = pid;
schedule_event.count = 1;
schedule_event.enter_time = current_time;

bpf_map_update_elem(&enable_add,&pd,&e_add,BPF_ANY);
bpf_map_update_elem(&proc_schedule,&pd,&schedule_event,BPF_ANY);
}else{
schedule_event->enter_time = current_time;
}
}

return 0;
}

SEC("tp_btf/sched_switch")
int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev, struct task_struct *next)
{
pid_t prev_pid = BPF_CORE_READ(prev,pid);
int prev_tgid = BPF_CORE_READ(prev,tgid);
int prev_cpu = bpf_get_smp_processor_id();
unsigned int prev_state = BPF_CORE_READ(prev,__state);
pid_t next_pid = BPF_CORE_READ(next,pid);
int next_tgid = BPF_CORE_READ(next,tgid);
int next_cpu = prev_cpu;
u64 current_time = bpf_ktime_get_ns();

if(prev_tgid!=ignore_tgid && prev_state==TASK_RUNNING && (target_pid==-1 || (target_pid!=0 && prev_pid==target_pid) ||
(target_pid==0 && prev_pid==target_pid && prev_cpu==target_cpu_id))){
struct schedule_event *schedule_event;
struct proc_id pd = {};

pd.pid = prev_pid;
if(prev_pid == 0) pd.cpu_id = prev_cpu;
schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd);
if(!schedule_event){
struct schedule_event schedule_event = {};
bool e_add = false;

schedule_event.pid = prev_pid;
schedule_event.count = 1;
schedule_event.enter_time = current_time;

bpf_map_update_elem(&enable_add,&pd,&e_add,BPF_ANY);
bpf_map_update_elem(&proc_schedule,&pd,&schedule_event,BPF_ANY);
}else{
schedule_event->enter_time = current_time;
}
}

if(next_tgid!=ignore_tgid && (target_pid==-1 || (target_pid!=0 && next_pid==target_pid) ||
(target_pid==0 && next_pid==target_pid && next_cpu==target_cpu_id))){
struct schedule_event *schedule_event;
bool * e_add;
struct proc_id pd = {};
u64 this_delay;

pd.pid = next_pid;
if(next_pid == 0) pd.cpu_id = next_cpu;
schedule_event = bpf_map_lookup_elem(&proc_schedule,&pd);
if(!schedule_event)
return 0;

e_add = bpf_map_lookup_elem(&enable_add,&pd);
if(!e_add) return 0;
// 因为 count 值初值赋值为了 1,避免多加一次
if(*e_add) schedule_event->count++;
else *e_add = true;
this_delay = current_time-schedule_event->enter_time;

schedule_event->prio = BPF_CORE_READ(next,prio);
schedule_event->sum_delay += this_delay;
if(this_delay > schedule_event->max_delay)
schedule_event->max_delay = this_delay;
if(schedule_event->min_delay==0 || this_delay<schedule_event->min_delay)
schedule_event->min_delay = this_delay;
}

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# schedule功能类开发文档

**参考资料:**

- [Linux 的调度延迟 - 原理与观测 - 知乎 (zhihu.com)](https://zhuanlan.zhihu.com/p/462728452)
- [linux 内核抢占那些事 - 知乎 (zhihu.com)](https://zhuanlan.zhihu.com/p/166032722)

**调度延迟的计算得分两种情况:**

1. 任务因等待 event 进入休眠态([Voluntary Switch](https://zhuanlan.zhihu.com/p/402423877)),那么就是从被唤醒("wakeup/wakeup_new" 的时间点),到获得 CPU (任务切换时的 *"next_pid"*)的间隔。
2. 任务因 [Involuntary Switch](https://zhuanlan.zhihu.com/p/402423877) 让出 CPU(任务切换时作为 *"prev_pid"*),到再次获得 CPU (之后的某次任务切换时作为*"next_pid"*)所经历的时间。在这期间,任务始终在 runqueue 上,始终是 runnable 的状态,所以有 "prev_state" 是否为 *TASK_RUNNING* 的判断。

**内核中提供了三个接口来唤醒进程:**

- wake_up_new_task:用来唤醒新进程,fork出来的进程/线程;
- wake_up_process:唤醒处于TASK_NORMAL状态的进程;
- wake_up_state:唤醒指定状态的进程;

后两个接口最终都会调用try_to_wake_up接口:

```
try_to_wake_up-->ttwu_queue-->ttwu_do_active-->ttwu_do_wakeup
```

**相关挂载点:**

```
ttwu_do_wakeup() --> trace_sched_wakeup
wake_up_new_task() --> trace_sched_wakeup_new
__schedule() --> trace_sched_switch
```
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,5 @@ typedef unsigned int u32;
#define RESOURCE_IMAGE 1
#define SYSCALL_IMAGE 2
#define LOCK_IMAGE 3
#define KEYTIME_IMAGE 4
#define KEYTIME_IMAGE 4
#define SCHEDULE_IMAGE 5
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#define FULL_MAX_ARGS_ARR 440
#define LAST_ARG (FULL_MAX_ARGS_ARR - ARGSIZE)

#define TASK_RUNNING 0x00000000

// resource_image
struct proc_id{
int pid;
Expand Down Expand Up @@ -101,4 +103,16 @@ struct keytime_event{
char char_info[FULL_MAX_ARGS_ARR];
};

// schedule_image
struct schedule_event{
int pid;
int prio;
int count;
long long unsigned int enter_time;
long long unsigned int sum_delay;
long long unsigned int max_delay;
long long unsigned int min_delay;
};


#endif /* __PROCESS_H */
Loading

0 comments on commit da88c55

Please sign in to comment.