Skip to content

Commit

Permalink
Update and add more code comments (linuxkerneltravel#538)
Browse files Browse the repository at this point in the history
* [doc] update test_bench and README

* [doc] update and add more code comments

* [chore] update miscs
  • Loading branch information
jyf111 authored Oct 14, 2023
1 parent 2b9dfad commit b65280e
Show file tree
Hide file tree
Showing 30 changed files with 799 additions and 674 deletions.
3 changes: 1 addition & 2 deletions eBPF_Supermarket/User_Function_Tracer/.clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@ IndentWidth: 2
ColumnLimit: 100
DerivePointerAlignment: false
PointerAlignment: Right
Cpp11BracedListStyle: false
IncludeCategories:
- Regex: '"vmlinux.h"' # let vmlinux.h come first
Priority: -1
- Regex: '"utrace.*h"'
Priority: 0
- Regex: '<getopt.h>'
Priority: -1
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|isl|json)/)'
Expand Down
1 change: 1 addition & 0 deletions eBPF_Supermarket/User_Function_Tracer/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ doc/
vmlinux/vmlinux.h
*.out
*.data/
utrace.data
9 changes: 6 additions & 3 deletions eBPF_Supermarket/User_Function_Tracer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ cmake_minimum_required(VERSION 3.16)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

project(utrace
DESCRIPTION "User Function Tracer"
project(eBPF-utrace
DESCRIPTION "eBPF Based User Function Tracer"
LANGUAGES C
)

SET(CMAKE_C_COMPILER /usr/bin/clang)
# we use abi::__cxa_demangle(), which is only availabel on clang
SET(CMAKE_C_COMPILER /usr/bin/clang CACHE STRING "C compiler" FORCE)

list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake)

Expand All @@ -27,6 +28,7 @@ file(GLOB
"src/*.c"
"src/*.h"
"test/*.c"
"test/*.cpp"
)

add_custom_target(format
Expand All @@ -35,6 +37,7 @@ add_custom_target(format
VERBATIM
)

# doc target
add_custom_target(doc
COMMAND doxygen ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile
COMMENT "Generating docs."
Expand Down
12 changes: 11 additions & 1 deletion eBPF_Supermarket/User_Function_Tracer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
`eBPF-utrace` is an eBPF-based user function tracer targeted for C/C++ programs. It offers function-level insights into program execution **without** requiring
recompilation, and can be used for debugging or performance analysis.

### Overview

[![eBPF-utrace-over.md.png](https://z1.ax1x.com/2023/10/07/pPjEPPJ.md.png)](https://imgse.com/i/pPjEPPJ)

### Screenshot

[![eBPF-utrace-screen.md.png](https://z1.ax1x.com/2023/10/11/pPzXQW6.md.png)](https://imgse.com/i/pPzXQW6)

[![flame-graph.jpg](https://z1.ax1x.com/2023/10/11/pPzXMJx.md.jpg)](https://imgse.com/i/pPzXMJx)

## Getting Started

### Install Dependencies
Expand Down Expand Up @@ -67,7 +77,7 @@ rather than using approximate sampling methods.

## Overhead

It brings an overhead of around **10us** on a native Linux machine/WSL2 and 35us on a virtual machine for each traced function.
It brings an overhead of around **10us** on a native Linux machine/WSL2 and 20us on a virtual machine for each traced function.

You can verify this by running `test/bench.cpp` yourself.

Expand Down
103 changes: 59 additions & 44 deletions eBPF_Supermarket/User_Function_Tracer/src/demangle.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,59 +14,59 @@
//
// author: [email protected]
//
// demangle mangled C++ symbols
// demangle and simplify mangled C++ symbols

#include "demangle.h"

#include <stdio.h> // for perror
#include <stdlib.h>
#include <string.h>

#include "util.h"

// simplify the demangled symbol name
/**
* @brief simplify the demangled symbol name
*/
static char *simplify(char *name) {
size_t len = strlen(name);
if (!len) return name;

// remove function template "<...>"
// remove all function templates, i.e., "<...>"
for (size_t i = 0; i < len; i++) {
if (name[i] == '<') {
if (name[i + 1] == '<' && i >= 8 &&
strncmp(name + i - 8, "operator", 8) == 0) { // skip operator<<
i++;
if (i >= 8 && !strncmp(name + i - 8, "operator", 8)) { // skip `operator<` and `operator<<`
if (name[i + 1] == '<') ++i;
// remove useless extra blanks
size_t j = i + 1;
while (name[j] == ' ') ++j;
memmove(name + i + 1, name + j, len - j + 1);
len -= j - i - 1;
continue;
}
size_t j = i;
int nested = 1;
while (j + 1 < len) {
++j;
if (name[j] == '<') {
++nested;
} else if (name[j] == '>') {
--nested;
if (!nested) break;
} else {
size_t j = i;
int nested = 1;
while (j + 1 < len) {
++j;
if (name[j] == '<') {
++nested;
} else if (name[j] == '>') {
--nested;
if (!nested) break;
}
}
memmove(name + i, name + j + 1, len - j);
len -= j - i + 1;
}
memmove(name + i, name + j + 1, len - j);
len -= j - i + 1;
}
}

// remove function cv-qualifier
// remove the last function cv-qualifier
for (size_t i = len - 1; i > 0; i--) {
if (name[i] == ')') break;
if (name[i] == ' ' && name[i - 1] == ')') {
name[i] = '\0';
len = i;
name[len = i] = '\0';
break;
}
}

// remove lambda function parameters, i.e., {lambda(...)}
// remove all lambda function parameters, i.e., "{lambda(...)}"
for (size_t i = 0; i < len; i++) {
if (strncmp(name + i, "{lambda", 7) == 0) {
i += 7; // name[i] == '('
Expand All @@ -83,11 +83,10 @@ static char *simplify(char *name) {
}
memmove(name + i, name + j + 1, len - j);
len -= j - i + 1;
break;
}
}

// remove function parameters, i.e., the last "(...)"
// remove all function parameters, i.e., "(...)"
for (size_t i = len - 1; i > 0; i--) {
if (name[i] == ')') {
size_t j = i;
Expand All @@ -103,48 +102,64 @@ static char *simplify(char *name) {
}
memmove(name + i, name + j + 1, len - j);
len -= j - i + 1;
// remove function return type
for (j = i; j > 0; j--) {
if (name[j] == ' ') {
if (j != 8 || strncmp(name, "operator", 8)) {
for (j = i - 1; j > 0; j--) {
if (name[j] == ':' && name[j + 1] == ':') { // there may be nested lambdas or namespaces
i = j - 1;
while (i > 0 && name[i] != ')' && name[i] != ':') --i;
if (name[i] == ')') {
if (!strncmp(name + i + 1, " const", 6)) { // remove function cv-qualifier
memmove(name + i + 1, name + j, len - j + 1);
len -= j - i - 1;
}
break;
}
} else if (name[j] == ' ') { // remove function return type at the beginning
if (!(j == 8 && !strncmp(name, "operator", 8))) {
memmove(name, name + j + 1, len - j);
len -= j + 1;
}
i = 0;
break;
}
}
break;
++i;
}
}

// remove trailing space
while (len >= 1 && name[len - 1] == ' ') --len;

// remove trailing spaces
while (len >= 1 && name[len - 1] == ' ') name[--len] = '\0';
if (name[0] == '(' && name[len - 1] == ')') {
memmove(name, name + 1, len);
name[len - 2] = '\0';
return simplify(name);
}
int st = 0;
while (name[st] == '*' || name[st] == '&') ++st;
if (st) memmove(name, name + st, len - st + 1);
return name;
}

char *demangle(const char *mangled_name) {
const char *GLOBAL_PREFIX = "_GLOBAL__sub_I_";
const size_t LEN = 15;

char *demangled_name;
size_t demangled_len;
int status;
size_t offset = 0;

// handle symbols starting with GLOBAL_PREFIX introduced by <iostream>
if (strncmp(mangled_name, GLOBAL_PREFIX, LEN) == 0) offset = LEN;
// handle symbols starting with `GLOBAL_PREFIX` introduced by <iostream>
const char *GLOBAL_PREFIX = "_GLOBAL__sub_I_";
const size_t LEN = 15;
if (!strncmp(mangled_name, GLOBAL_PREFIX, LEN)) offset = LEN;

// ensure mangled_name is really mangled (start with "_Z")
if (strncmp(mangled_name + offset, "_Z", 2) == 0) {
// ensure `mangled_name` is really mangled (start with "_Z")
if (!strncmp(mangled_name + offset, "_Z", 2)) {
demangled_name = __cxa_demangle(mangled_name + offset, NULL, NULL, &status);
if (!status) {
demangled_name = simplify(demangled_name);
demangled_len = strlen(demangled_name);
if (offset > 0) {
if (offset > 0) { // concat `GLOBAL_PREFIX` with `demangled_name`
demangled_name = realloc(demangled_name, demangled_len + 1 + LEN);
if (!demangled_name) die("realloc");
memmove(demangled_name + LEN, demangled_name, demangled_len + 1);
memmove(demangled_name + LEN, demangled_name, demangled_len + 1); // keep the last '\0'
memcpy(demangled_name, GLOBAL_PREFIX, LEN);
}
return demangled_name;
Expand Down
15 changes: 6 additions & 9 deletions eBPF_Supermarket/User_Function_Tracer/src/demangle.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,21 @@
//
// author: [email protected]
//
// demangle mangled C++ symbols
// demangle and simplify mangled C++ symbols

#ifndef UTRACE_DEMANGLE_H
#define UTRACE_DEMANGLE_H

#include <stddef.h>
#include <stddef.h> // for size_t

// defined in libstdc++
extern char *__cxa_demangle(const char *mangled_name, char *output_buffer, size_t *length,
int *status);

/**
* @brief 还原重整符号
* @param[in] mangled_name 符号
* @return 还原后的符号
* @details 对于未重整的符号,调用strdup()
* 对于重整过的符号(以"_Z"起始),调用abi::__cxa_demangle()
* @retval 指向堆内存
* @brief demangle and simplify the `mangled_name`
* @return demangled name malloced from heap
*/
char *demangle(const char *mangled_name);

#endif // UTRACE_DEMANGLE_H
#endif // UTRACE_DEMANGLE_H
10 changes: 5 additions & 5 deletions eBPF_Supermarket/User_Function_Tracer/src/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
//
// author: [email protected]
//
// 解析ELF格式以遍历ELF中的各个节以及符号节中的各个条目
// Use gelf library to parse each section in an ELF file

#include "elf.h"

Expand All @@ -31,7 +31,6 @@ bool elf_head_init(struct elf_head *elf, const char *filename) {
if (!elf->e) return false;

if (elf_kind(elf->e) != ELF_K_ELF) return false;

if (!gelf_getehdr(elf->e, &elf->ehdr)) return false;

return true;
Expand All @@ -47,7 +46,7 @@ void elf_head_free(struct elf_head *elf) {
size_t get_entry_address(const char *filename) {
struct elf_head elf;
if (!elf_head_init(&elf, filename)) return 0;
size_t entry = elf.ehdr.e_entry;
size_t entry = elf.ehdr.e_entry; // the entry address is recorded in ELF header
elf_head_free(&elf);
return entry;
}
Expand All @@ -64,13 +63,14 @@ bool elf_section_next(struct elf_section *elf_s, struct elf_head *elf) {

void elf_sym_entry_begin(struct elf_sym_entry *elf_e, struct elf_section *elf_s) {
elf_e->i = 0;
elf_e->nentries = elf_s->shdr.sh_size / elf_s->shdr.sh_entsize;
elf_e->nentries =
elf_s->shdr.sh_size / elf_s->shdr.sh_entsize; // number of entries in this section
elf_e->sym_data = elf_getdata(elf_s->scn, NULL);
elf_e->str_idx = elf_s->shdr.sh_link;
}

bool elf_sym_entry_next(struct elf_sym_entry *elf_e, struct elf_section *elf_s) {
(void)elf_s;
(void)elf_s; // keep all functions' prototypes consistent
if (elf_e->i >= elf_e->nentries) return false;
gelf_getsym(elf_e->sym_data, elf_e->i, &elf_e->sym);
elf_e->i++;
Expand Down
Loading

0 comments on commit b65280e

Please sign in to comment.