Skip to content

Commit

Permalink
[eBPF] Add profiler debugging function (#4497)
Browse files Browse the repository at this point in the history
Debug eBPF on-cpu profiler, when the debug function is enabled, the profiler data is sent to the agent log.

```
Continuous profiler debug: profiler data output to agent log
Usage:
    deepflow-ebpfctl cpdbg {on|off} [OPTIONS]
Options:
    '-t, --timeout':      set profiler debug timout. Unit: second
For example:
    deepflow-ebpfctl cpdbg on --timeout 60
    deepflow-ebpfctl cpdbg off
```
  • Loading branch information
yinjiping authored Oct 17, 2023
1 parent 7746589 commit 6714362
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 25 deletions.
5 changes: 5 additions & 0 deletions agent/src/ebpf/user/ctrl.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ struct datadump_msg {
char comm[16];
};

struct cpdbg_msg {
bool enable; // Whether to enable the datadump ?
int timeout;
};

int sockopt_ctl(void *arg);
int ctrl_init(void);
int sockopt_register(struct tracer_sockopts *sockopts);
Expand Down
100 changes: 88 additions & 12 deletions agent/src/ebpf/user/ctrl_tracer.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,27 @@ static void socktrace_help(void)
fprintf(stderr, "Usage:\n" " %s tracer show\n", DF_BPF_NAME);
}

static void cpdbg_help(void)
{
fprintf(stderr,
"Continuous profiler debug: profiler data output to agent log\n");
fprintf(stderr, "Usage:\n" " %s cpdbg {on|off} [OPTIONS]\n",
DF_BPF_NAME);
fprintf(stderr, "Options:\n");
fprintf(stderr,
" '-t, --timeout': set profiler debug timout. Unit: second\n");
fprintf(stderr, "For example:\n");
fprintf(stderr, " %s cpdbg on --timeout 60\n", DF_BPF_NAME);
fprintf(stderr, " %s cpdbg off\n", DF_BPF_NAME);
}

static void datadump_help(void)
{
fprintf(stderr, "Usage:\n" " %s datadump {on|off} [OPTIONS]\n", DF_BPF_NAME);
fprintf(stderr, " %s datadump set pid PID comm COMM proto PROTO_NUM\n", DF_BPF_NAME);
fprintf(stderr, "Usage:\n" " %s datadump {on|off} [OPTIONS]\n",
DF_BPF_NAME);
fprintf(stderr,
" %s datadump set pid PID comm COMM proto PROTO_NUM\n",
DF_BPF_NAME);
fprintf(stderr, "PROTO_NUM:\n");
fprintf(stderr, " 0: PROTO_ALL\n");
fprintf(stderr, " 1: PROTO_ORTHER\n");
Expand All @@ -98,16 +115,20 @@ static void datadump_help(void)
fprintf(stderr, "PID:\n");
fprintf(stderr, " 0: all process/thread\n");
fprintf(stderr, "COMM:\n");
fprintf(stderr, " '': The process name or thread name is not restricted.\n");
fprintf(stderr,
" '': The process name or thread name is not restricted.\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " '-O, --only-stdout': dump to stdout only.\n");
fprintf(stderr, " '-t, --timeout': set datadump timout. Unit: second\n");
fprintf(stderr,
" '-t, --timeout': set datadump timout. Unit: second\n");
fprintf(stderr, "For example:\n");
fprintf(stderr, " %s datadump set pid 4567 comm curl proto 0\n",
DF_BPF_NAME);
fprintf(stderr, " %s datadump set pid 4567 comm '' proto 20\n", DF_BPF_NAME);
fprintf(stderr, " %s datadump set pid 4567 comm '' proto 20\n",
DF_BPF_NAME);
fprintf(stderr, " %s datadump on --timeout 60\n", DF_BPF_NAME);
fprintf(stderr, " %s datadump on --only-stdout --timeout 60\n", DF_BPF_NAME);
fprintf(stderr, " %s datadump on --only-stdout --timeout 60\n",
DF_BPF_NAME);
fprintf(stderr, " %s datadump off\n", DF_BPF_NAME);
}

Expand Down Expand Up @@ -446,12 +467,10 @@ static int socktrace_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd,
sk_trace_params->kern_trace_map_used);
printf("datadump_enable:\t%s\n",
sk_trace_params->datadump_enable ? "true" : "false");
printf("datadump_pid:\t%d\n",
sk_trace_params->datadump_pid);
printf("datadump_pid:\t%d\n", sk_trace_params->datadump_pid);
printf("datadump_proto:\t%d\n",
sk_trace_params->datadump_proto);
printf("datadump_comm:\t%s\n",
sk_trace_params->datadump_comm);
printf("datadump_comm:\t%s\n", sk_trace_params->datadump_comm);
printf("datadump_file_path:\t%s\n\n",
sk_trace_params->datadump_file_path);

Expand Down Expand Up @@ -496,7 +515,8 @@ static int datadump_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd,
if (conf->cmd == DF_BPF_CMD_ON) {
msg.enable = true;
if (msg.timeout == 0) {
printf("Miss --timeout setting.\n");
printf
("Miss --timeout setting.\n");
return ETR_NOTSUPP;
}
printf("Set datadump on, timeout %ds ",
Expand Down Expand Up @@ -562,6 +582,54 @@ static int datadump_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd,
return ETR_OK;
}

static int cpdbg_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd,
struct df_bpf_conf *conf)
{
switch (conf->cmd) {
case DF_BPF_CMD_ON:
case DF_BPF_CMD_OFF:
{
struct cpdbg_msg msg;
msg.timeout = conf->timeout;
if (conf->cmd == DF_BPF_CMD_ON
|| conf->cmd == DF_BPF_CMD_OFF) {
if (conf->argc != 0) {
obj->help();
return ETR_NOTSUPP;
}
if (conf->cmd == DF_BPF_CMD_ON) {
msg.enable = true;
if (msg.timeout == 0) {
printf
("Miss --timeout setting.\n");
return ETR_NOTSUPP;
}
printf("Set cpdbg on, timeout %ds ",
msg.timeout);
} else {
msg.enable = false;
printf("Set cpdbg off ");
}
} else {
obj->help();
return ETR_NOTSUPP;
}

if (df_bpf_setsockopt(SOCKOPT_SET_CPDBG_ADD, &msg,
sizeof(msg)) == 0) {
printf("Success.\n");
} else {
printf("Failed.\n");
}

break;
}
default:
return ETR_NOTSUPP;
}
return ETR_OK;
}

static int tracer_do_cmd(struct df_bpf_obj *obj, df_bpf_cmd_t cmd,
struct df_bpf_conf *conf)
{
Expand Down Expand Up @@ -613,13 +681,19 @@ struct df_bpf_obj datadump_obj = {
.do_cmd = datadump_do_cmd,
};

struct df_bpf_obj cpdbg_obj = {
.name = "cpdbg",
.help = cpdbg_help,
.do_cmd = cpdbg_do_cmd,
};

static void usage(void)
{
fprintf(stderr,
"Usage:\n"
" " DF_BPF_NAME " [OPTIONS] OBJECT { COMMAND | help }\n"
"Parameters:\n"
" OBJECT := { tracer socktrace datadump }\n"
" OBJECT := { tracer socktrace datadump cpdbg }\n"
" COMMAND := { show list set }\n"
"Options:\n"
" -v, --verbose\n"
Expand All @@ -634,6 +708,8 @@ static struct df_bpf_obj *df_bpf_obj_get(const char *name)
return &socktrace_obj;
} else if (strcmp(name, "datadump") == 0) {
return &datadump_obj;
} else if (strcmp(name, "cpdbg") == 0) {
return &cpdbg_obj;
}

return NULL;
Expand Down
122 changes: 110 additions & 12 deletions agent/src/ebpf/user/profile/perf_profiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ static void print_cp_tracer_status(struct bpf_tracer *t);
*/
static atomic64_t process_lost_count;

/* Continuous Profiler debug lock */
static pthread_mutex_t cpdbg_mutex;
static bool cpdbg_enable;
static uint32_t cpdbg_start_time;
static uint32_t cpdbg_timeout;

static u64 get_process_lost_count(void)
{
return atomic64_read(&process_lost_count);
Expand Down Expand Up @@ -322,19 +328,60 @@ static int init_stack_trace_msg_hash(stack_trace_msg_hash_t * h,
nbuckets, hash_memory_size);
}

static inline bool is_cpdbg_timeout(void)
{
uint32_t passed_sec;
passed_sec = get_sys_uptime() - cpdbg_start_time;
if (passed_sec > cpdbg_timeout) {
cpdbg_start_time = 0;
cpdbg_enable = false;
ebpf_info("\n\ncpdbg is finished, use time: %us.\n\n",
cpdbg_timeout);
cpdbg_timeout = 0;
return true;
}

return false;
}

static void print_cp_data(stack_trace_msg_t * msg)
{
char *cid;
if (strlen((char *)msg->container_id) == 0)
cid = "null";
else
cid = (char *)msg->container_id;

ebpf_info
("netns_id %lu container_id %s process_name %s pid %u stime %lu "
"u_stack_id %lu k_statck_id %lu cpu %u count %u comm %s tiemstamp"
" %lu datalen %u data %s\n",
msg->netns_id, cid,
msg->process_name, msg->pid, msg->stime, msg->u_stack_id,
msg->k_stack_id, msg->cpu, msg->count, msg->comm, msg->time_stamp,
msg->data_len, msg->data);
}

static void cpdbg_process(stack_trace_msg_t * msg)
{
pthread_mutex_lock(&cpdbg_mutex);
if (unlikely(cpdbg_enable)) {
if (!is_cpdbg_timeout())
print_cp_data(msg);

}
pthread_mutex_unlock(&cpdbg_mutex);
}

static int push_and_free_msg_kvp_cb(stack_trace_msg_hash_kv * kv, void *ctx)
{
stack_trace_msg_kv_t *msg_kv = (stack_trace_msg_kv_t *) kv;
if (msg_kv->msg_ptr != 0) {
stack_trace_msg_t *msg = (stack_trace_msg_t *) msg_kv->msg_ptr;
#ifdef CP_DEBUG
ebpf_debug
("tiemstamp %lu pid %u stime %lu u_stack_id %lu k_statck_id"
"%lu cpu %u count %u comm %s datalen %u data %s\n",
msg->time_stamp, msg->pid, msg->stime, msg->u_stack_id,
msg->k_stack_id, msg->cpu, msg->count, msg->comm,
msg->data_len, msg->data);
#endif

/* continuous profiler debug */
cpdbg_process(msg);

tracer_callback_t fun = profiler_tracer->process_fn;
/*
* Execute callback function to hand over the data to the
Expand Down Expand Up @@ -523,7 +570,8 @@ static void aggregate_stack_traces(struct bpf_tracer *t,

char *trace_str =
resolve_and_gen_stack_trace_str(t, v, stack_map_name,
stack_str_hash, matched, info_p);
stack_str_hash, matched,
info_p);
if (trace_str) {
/*
* append process/thread name to stack string
Expand Down Expand Up @@ -557,8 +605,8 @@ static void aggregate_stack_traces(struct bpf_tracer *t,
("stack_trace_msg_hash_add_del() failed.\n");
clib_mem_free(msg);
} else {
__sync_fetch_and_add(&msg_hash->
hash_elems_count, 1);
__sync_fetch_and_add
(&msg_hash->hash_elems_count, 1);
}
}

Expand Down Expand Up @@ -936,6 +984,48 @@ static bool check_kallsyms_addr_is_zero(void)
return (count == check_num);
}

static int cpdbg_sockopt_get(sockoptid_t opt, const void *conf, size_t size,
void **out, size_t * outsize)
{
return 0;
}

static int cpdbg_sockopt_set(sockoptid_t opt, const void *conf, size_t size)
{
struct cpdbg_msg *msg = (struct cpdbg_msg *)conf;
pthread_mutex_lock(&cpdbg_mutex);
if (msg->enable) {
cpdbg_start_time = get_sys_uptime();
cpdbg_timeout = msg->timeout;
}

if (cpdbg_enable && !msg->enable) {
cpdbg_timeout = 0;
cpdbg_start_time = 0;
}

cpdbg_enable = msg->enable;
if (cpdbg_enable) {
ebpf_info("cpdbg enable timeout %ds\n", cpdbg_timeout);
} else {
ebpf_info("cpdbg disable");
}

pthread_mutex_unlock(&cpdbg_mutex);

return 0;
}

static struct tracer_sockopts cpdbg_sockopts = {
.version = SOCKOPT_VERSION,
.set_opt_min = SOCKOPT_SET_CPDBG_ADD,
.set_opt_max = SOCKOPT_SET_CPDBG_OFF,
.set = cpdbg_sockopt_set,
.get_opt_min = SOCKOPT_GET_CPDBG_SHOW,
.get_opt_max = SOCKOPT_GET_CPDBG_SHOW,
.get = cpdbg_sockopt_get,
};

/*
* start continuous profiler
* @freq sample frequency, Hertz. (e.g. 99 profile stack traces at 99 Hertz)
Expand Down Expand Up @@ -971,6 +1061,11 @@ int start_continuous_profiler(int freq, tracer_callback_t callback)

atomic64_init(&process_lost_count);

/*
* Initialize cpdbg
*/
pthread_mutex_init(&cpdbg_mutex, NULL);

profiler_stop = 0;
start_time = gettime(CLOCK_MONOTONIC, TIME_TYPE_SEC);

Expand Down Expand Up @@ -1049,6 +1144,9 @@ int start_continuous_profiler(int freq, tracer_callback_t callback)
if (tracer == NULL)
return (-1);

if (sockopt_register(&cpdbg_sockopts) != ETR_OK)
return (-1);

tracer->state = TRACER_RUNNING;
return (0);
}
Expand Down Expand Up @@ -1202,7 +1300,7 @@ int stop_continuous_profiler(void)
return (0);
}

void process_stack_trace_data_for_flame_graph(stack_trace_msg_t *val)
void process_stack_trace_data_for_flame_graph(stack_trace_msg_t * val)
{
return;
}
Expand Down
9 changes: 8 additions & 1 deletion agent/src/ebpf/user/tracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,14 @@ enum {
SOCKOPT_SET_DATADUMP_ON,
SOCKOPT_SET_DATADUMP_OFF,
/* get */
SOCKOPT_GET_DATADUMP_SHOW
SOCKOPT_GET_DATADUMP_SHOW,

/* set */
SOCKOPT_SET_CPDBG_ADD = 700,
SOCKOPT_SET_CPDBG_ON,
SOCKOPT_SET_CPDBG_OFF,
/* get */
SOCKOPT_GET_CPDBG_SHOW
};

struct mem_block_head {
Expand Down

0 comments on commit 6714362

Please sign in to comment.