Skip to content

Commit

Permalink
[eBPF] Modify the storage method of Java perf files (#4536)
Browse files Browse the repository at this point in the history
* [eBPF] Modify the storage method of Java perf files

Previously, we stored Java perf map files in the '/tmp' directory of the target POD container, which caused a few issues:

- The target POD's '/tmp' directory might have size limitations, and storing perf files there could exceed those limits, leading to abnormalities in the target POD.
- Java perf map files would persist indefinitely, potentially being seen as invasive behavior.

We have made the following changes:

- We modified the storage location, moving the perf map files to the '/deepflow' directory of the target POD.
- After generating Java perf map files, we immediately transfer the files to the '/tmp' directory of the 'deepflow-agent' POD.
- No files are left behind in the target POD.

Additionally, you can use the 'deepflow-jattach clean' command within the 'deepflow-agent' POD to remove the previously resident 'perf-PID.log' and 'perf-PID.map' files in the customer's POD.

* [eBPF] Add comments for 'deepflow-ebpfctl cpdbg ...'

* [eBPF] Add java symbols write space limit

'java_syms_space_limit' The maximum space occupied by the Java symbol files
in the target POD. Its valid range is [2, 10], which means it falls within
the interval of 2Mi to 10Mi. If the configuration value is outside this
range, the default value of 10(10Mi), will be used.

The size of the Java perf-PID.map file, excluding the additional space occupied
by 'agent.so' and log files, represents the actual limit of space occupied in the
target POD's root path ('/'). The actual writing limit is controlled by the global
variable 'g_java_syms_write_bytes_max', and the size (in bytes) of the Java perf-PID.map
file will not exceed it.

* [eBPF] Make the Java agent configurable and run successfully on the host

HotSpot JVM does not support agent unloading. However, you
may "attach" the same library multiple times with different
arguments. The library will not be loaded again, but
Agent_OnAttach will still be called multiple times with
different arguments.

We have achieved flexibility and configurability for the Java agent.

* [eBPF] Ensure that the socket tracer has completed before the profiler parses

The profiler's processing depends on probe interfaces provided by the socket tracer, such as process exit events. We want to ensure that everything is ready before the profiler performs address translation.

* [eBPF] Add a configurable option 'java_update_delay'

  @java_update_delay To allow Java to run for an extended period and gather
  more symbol information, we delay symbol retrieval when encountering unknown symbols.
  The recommended range for values is [60, 86400], default valuse is 300.

* [eBPF] Adjust the quota for Java symbol space occupancy

* [eBPF] Delete old perf map files in the customer's POD

* [eBPF] Adjust Java symbols delay update time

* [eBPF] Add configuration options for Java ebpf profiler
  • Loading branch information
yinjiping authored Oct 25, 2023
1 parent d9b346d commit 60cc1b9
Show file tree
Hide file tree
Showing 20 changed files with 585 additions and 231 deletions.
11 changes: 11 additions & 0 deletions agent/src/ebpf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,11 +465,22 @@ extern "C" {
/*
* start continuous profiler
* @freq sample frequency, Hertz. (e.g. 99 profile stack traces at 99 Hertz)
* @java_syms_space_limit The maximum space occupied by the Java symbol files
* in the '/' directory of the target POD container.The recommended range for
* values is [2, 100], which means it falls within the interval of 2Mi to 100Mi.
* If the configuration value is outside this range, the default value of
* 10(10Mi), will be used.
* @java_syms_update_delay To allow Java to run for an extended period and gather
* more symbol information, we delay symbol retrieval when encountering unknown
* symbols. The unit of measurement used is seconds.
* The recommended range for values is [5, 3600], default valuse is 60.
* @callback Profile data processing callback interface
* @returns 0 on success, < 0 on error
*/
pub fn start_continuous_profiler(
freq: c_int,
java_syms_space_limit: c_int,
java_syms_update_delay: c_int,
callback: extern "C" fn(_data: *mut stack_profile_data),
) -> c_int;

Expand Down
4 changes: 2 additions & 2 deletions agent/src/ebpf/samples/rust/profiler/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ fn main() {

// Used to test our DeepFlow products, written as 97 frequency, so that
// it will not affect the sampling test of deepflow agent (using 99Hz).
if start_continuous_profiler(97, continuous_profiler_callback) != 0 {
if start_continuous_profiler(97, 10, 300, continuous_profiler_callback) != 0 {
println!("start_continuous_profiler() error.");
::std::process::exit(1);
}
Expand All @@ -179,7 +179,7 @@ fn main() {
std::thread::sleep(Duration::from_secs(1));
}

thread::sleep(Duration::from_secs(65));
thread::sleep(Duration::from_secs(150));
stop_continuous_profiler();
print!(
"====== capture count {}, sum {}\n",
Expand Down
5 changes: 2 additions & 3 deletions agent/src/ebpf/user/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "common.h"
#include "log.h"
#include "string.h"
#include "profile/java/config.h"

#define MAXLINE 1024

Expand Down Expand Up @@ -946,7 +947,7 @@ int exec_command(const char *cmd, const char *args)
{
FILE *fp;
int rc = 0;
char cmd_buf[64];
char cmd_buf[PERF_PATH_SZ * 2];
snprintf(cmd_buf, sizeof(cmd_buf), "%s %s", cmd, args);
fp = popen(cmd_buf, "r");
if (NULL == fp) {
Expand All @@ -968,8 +969,6 @@ int exec_command(const char *cmd, const char *args)
cmd_buf, strerror(errno));
} else {
if (WIFEXITED(rc)) {
ebpf_info("'%s' normal termination, exit status %d\n",
cmd_buf, WEXITSTATUS(rc));
return WEXITSTATUS(rc);
} else if (WIFSIGNALED(rc)) {
ebpf_info
Expand Down
21 changes: 20 additions & 1 deletion agent/src/ebpf/user/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ enum {
* date the Java symbol table. This is done The purpose is to avoid freque-
* nt updates of the java symbol table.
*/
#define JAVA_SYMS_TABLE_UPDATE_PERIOD 300 // 300 seconds
#define JAVA_SYMS_UPDATE_DELAY_DEF 60 // 60 seconds
#define JAVA_SYMS_UPDATE_DELAY_MIN 5 // 5 seconds
#define JAVA_SYMS_UPDATE_DELAY_MAX 3600 // 3600 seconds

/* Profiler - maximum data push interval time (in nanosecond). */
#define MAX_PUSH_MSG_TIME_INTERVAL 1000000000ULL /* 1 seconds */
Expand Down Expand Up @@ -205,4 +207,21 @@ enum {
* check cycle time (unit is milliseconds).
*/
#define CHECK_KERN_ADAPT_PERIOD 100 // 100 ticks(1 seconds)

/*
* The maximum space occupied by the Java symbol files in the target POD.
* Its valid range is [2, 100], which means it falls within the interval
* of 2Mi to 100Mi. If the configuration value is outside this range, the
* default value of 10(10Mi), will be used.
*/
#define JAVA_POD_WRITE_FILES_SPACE_MIN 2097152 // 2Mi
#define JAVA_POD_WRITE_FILES_SPACE_MAX 104857600 // 100Mi
#define JAVA_POD_WRITE_FILES_SPACE_DEF 10485760 // 10Mi
/*
* The `df_java_agent_musl.so` and `df_java_agent.so` files will also be
* placed in the target POD for loading operations. They occupy less than
* 300Ki of space.
*/
#define JAVA_POD_EXTRA_SPACE_MMA 307200 // 300Ki

#endif /* DF_EBPF_CONFIG_H */
28 changes: 25 additions & 3 deletions agent/src/ebpf/user/profile/attach.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,41 @@

#include "../common.h"
#include "../log.h"
#include "java/config.h"
#include "java/df_jattach.h"
#include "attach.h"

extern int g_java_syms_write_bytes_max;

void gen_java_symbols_file(int pid)
{
int target_ns_pid = get_nspid(pid);
if (target_ns_pid < 0) {
return;
}

char args[32];
snprintf(args, sizeof(args), "%d", pid);
char args[PERF_PATH_SZ * 2];
if (!is_same_mntns(pid)) {
snprintf(args, sizeof(args), "%d %d,%s,%s", pid,
g_java_syms_write_bytes_max,
PERF_MAP_FILE_FMT, PERF_MAP_LOG_FILE_FMT);
} else {
snprintf(args, sizeof(args), "%d %d,%s,%s", pid,
g_java_syms_write_bytes_max,
DF_AGENT_LOCAL_PATH_FMT ".map",
DF_AGENT_LOCAL_PATH_FMT ".log");
}

exec_command(DF_JAVA_ATTACH_CMD, args);
if (!is_same_mntns(pid)) {
if (copy_file_from_target_ns(pid, target_ns_pid, "map") ||
copy_file_from_target_ns(pid, target_ns_pid, "log"))
ebpf_warning("Copy pid %d files failed\n", pid);
clear_target_ns(pid, target_ns_pid);
}
}

clear_target_ns_so(pid, target_ns_pid);
void clean_local_java_symbols_files(int pid)
{
clear_local_perf_files(pid);
}
1 change: 1 addition & 0 deletions agent/src/ebpf/user/profile/attach.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@

#define DF_JAVA_ATTACH_CMD "/usr/bin/deepflow-jattach"
void gen_java_symbols_file(int pid);
void clean_local_java_symbols_files(int pid);
#endif /* ATTACH_H */
Loading

0 comments on commit 60cc1b9

Please sign in to comment.