diff --git a/agent/Cargo.toml b/agent/Cargo.toml index c1ebfed50b2..dcfb7b921f9 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -137,6 +137,8 @@ dunce = "0.1.1" tonic-build = "0.8.0" [features] +enterprise = ["off_cpu"] +off_cpu = [] [[bench]] name = "common" diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 70f5b703442..75bbb1e365f 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -326,9 +326,6 @@ pub struct OnCpuProfile { pub frequency: u16, pub cpu: u16, pub regex: String, - pub java_symbol_file_max_space_limit: u8, - #[serde(with = "humantime_serde")] - pub java_symbol_file_refresh_defer_interval: Duration, } impl Default for OnCpuProfile { @@ -338,8 +335,25 @@ impl Default for OnCpuProfile { frequency: 99, cpu: 0, regex: "^deepflow-.*".to_string(), - java_symbol_file_max_space_limit: 10, - java_symbol_file_refresh_defer_interval: Duration::from_secs(600), + } + } +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[serde(default, rename_all = "kebab-case")] +pub struct OffCpuProfile { + pub disabled: bool, + pub regex: String, + #[serde(rename = "minblock", with = "humantime_serde")] + pub min_block: Duration, +} + +impl Default for OffCpuProfile { + fn default() -> Self { + OffCpuProfile { + disabled: false, + regex: "^deepflow-.*".to_string(), + min_block: Duration::from_micros(50), } } } @@ -364,7 +378,11 @@ pub struct EbpfYamlConfig { pub io_event_collect_mode: usize, #[serde(with = "humantime_serde")] pub io_event_minimal_duration: Duration, + pub java_symbol_file_max_space_limit: u8, + #[serde(with = "humantime_serde")] + pub java_symbol_file_refresh_defer_interval: Duration, pub on_cpu_profile: OnCpuProfile, + pub off_cpu_profile: OffCpuProfile, } impl Default for EbpfYamlConfig { @@ -385,7 +403,10 @@ impl Default for EbpfYamlConfig { go_tracing_timeout: 120, io_event_collect_mode: 1, io_event_minimal_duration: Duration::from_millis(1), + java_symbol_file_max_space_limit: 10, + java_symbol_file_refresh_defer_interval: Duration::from_secs(600), on_cpu_profile: OnCpuProfile::default(), + off_cpu_profile: OffCpuProfile::default(), } } } @@ -747,24 +768,20 @@ impl YamlConfig { if c.ebpf.max_trace_entries < 100000 || c.ebpf.max_trace_entries > 2000000 { c.ebpf.max_trace_entries = 524288; } - if c.ebpf.on_cpu_profile.java_symbol_file_max_space_limit < 2 - || c.ebpf.on_cpu_profile.java_symbol_file_max_space_limit > 100 + if c.ebpf.java_symbol_file_max_space_limit < 2 + || c.ebpf.java_symbol_file_max_space_limit > 100 { - c.ebpf.on_cpu_profile.java_symbol_file_max_space_limit = 10 - } - if c.ebpf - .on_cpu_profile - .java_symbol_file_refresh_defer_interval - < Duration::from_secs(5) - || c.ebpf - .on_cpu_profile - .java_symbol_file_refresh_defer_interval - > Duration::from_secs(3600) + c.ebpf.java_symbol_file_max_space_limit = 10 + } + if c.ebpf.java_symbol_file_refresh_defer_interval < Duration::from_secs(5) + || c.ebpf.java_symbol_file_refresh_defer_interval > Duration::from_secs(3600) { - c.ebpf - .on_cpu_profile - .java_symbol_file_refresh_defer_interval = Duration::from_secs(600) + c.ebpf.java_symbol_file_refresh_defer_interval = Duration::from_secs(600) } + c.ebpf.off_cpu_profile.min_block = c.ebpf.off_cpu_profile.min_block.clamp( + Duration::from_micros(1), + Duration::from_micros(u32::MAX as u64 - 2), + ); if c.guard_interval < Duration::from_secs(1) || c.guard_interval > Duration::from_secs(3600) { diff --git a/agent/src/ebpf/mod.rs b/agent/src/ebpf/mod.rs index ec8aee1ab1c..674819ff7e3 100644 --- a/agent/src/ebpf/mod.rs +++ b/agent/src/ebpf/mod.rs @@ -373,9 +373,9 @@ pub struct SK_TRACE_STATS { #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct stack_profile_data { - pub profiler_type : u8, // Profiler type, such as 1(PROFILER_TYPE_ONCPU). - pub timestamp: u64, // Timestamp of the stack trace data(unit: nanoseconds). - pub pid: u32, // User-space process-ID. + pub profiler_type: u8, // Profiler type, such as 1(PROFILER_TYPE_ONCPU). + pub timestamp: u64, // Timestamp of the stack trace data(unit: nanoseconds). + pub pid: u32, // User-space process-ID. /* * Identified within the eBPF program in kernel space. * If the current is a process and not a thread this field(tid) is filled @@ -657,7 +657,22 @@ extern "C" { ) -> c_int; pub fn enable_oncpu_profiler() -> c_int; + pub fn disable_oncpu_profiler() -> c_int; + + cfg_if::cfg_if! { + if #[cfg(feature = "off_cpu")] { + pub fn set_offcpu_profiler_regex(pattern: *const c_char) -> c_int; + + pub fn enable_offcpu_profiler() -> c_int; + + pub fn disable_offcpu_profiler() -> c_int; + + pub fn set_offcpu_minblock_time( + block_time: c_uint, + ) -> c_int; + } + } } #[no_mangle] diff --git a/agent/src/ebpf_dispatcher/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher/ebpf_dispatcher.rs index 32718a86373..6428ea66a75 100644 --- a/agent/src/ebpf_dispatcher/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher/ebpf_dispatcher.rs @@ -37,10 +37,7 @@ use crate::common::proc_event::{BoxedProcEvents, EventType, ProcEvent}; use crate::common::{FlowAclListener, FlowAclListenerId, TaggedFlow}; use crate::config::handler::{CollectorAccess, EbpfAccess, EbpfConfig, LogParserAccess}; use crate::config::FlowAccess; -use crate::ebpf::{ - self, set_allow_port_bitmap, set_bypass_port_bitmap, set_profiler_cpu_aggregation, - set_profiler_regex, set_protocol_ports_bitmap, start_continuous_profiler, -}; +use crate::ebpf; use crate::exception::ExceptionHandler; use crate::flow_generator::{flow_map::Config, AppProto, FlowMap}; use crate::integration_collector::Profile; @@ -461,14 +458,14 @@ impl EbpfCollector { let white_list = &config.ebpf.kprobe_whitelist; if !white_list.port_list.is_empty() { if let Some(b) = parse_u16_range_list_to_bitmap(&white_list.port_list, false) { - set_allow_port_bitmap(b.get_raw_ptr()); + ebpf::set_allow_port_bitmap(b.get_raw_ptr()); } } let black_list = &config.ebpf.kprobe_blacklist; if !black_list.port_list.is_empty() { if let Some(b) = parse_u16_range_list_to_bitmap(&black_list.port_list, false) { - set_bypass_port_bitmap(b.get_raw_ptr()); + ebpf::set_bypass_port_bitmap(b.get_raw_ptr()); } } @@ -509,7 +506,9 @@ impl EbpfCollector { all_proto_map.remove(&protocol.to_lowercase()); let l7_protocol = L7Protocol::from(protocol.clone()); let ports = CString::new(port_range.as_str()).unwrap(); - if set_protocol_ports_bitmap(u8::from(l7_protocol) as i32, ports.as_ptr()) != 0 { + if ebpf::set_protocol_ports_bitmap(u8::from(l7_protocol) as i32, ports.as_ptr()) + != 0 + { warn!( "Ebpf set_protocol_ports_bitmap error: {} {}", protocol, port_range @@ -522,7 +521,9 @@ impl EbpfCollector { for protocol in all_proto_map.iter() { let l7_protocol = L7Protocol::from(protocol.clone()); let ports = CString::new(all_port.as_str()).unwrap(); - if set_protocol_ports_bitmap(u8::from(l7_protocol) as i32, ports.as_ptr()) != 0 { + if ebpf::set_protocol_ports_bitmap(u8::from(l7_protocol) as i32, ports.as_ptr()) + != 0 + { warn!( "Ebpf set_protocol_ports_bitmap error: {} {}", protocol, all_port @@ -543,14 +544,30 @@ impl EbpfCollector { return Err(Error::EbpfRunningError); } - let on_cpu_profile_config = &config.ebpf.on_cpu_profile; - if !on_cpu_profile_config.disabled { - if start_continuous_profiler( - on_cpu_profile_config.frequency as i32, - on_cpu_profile_config.java_symbol_file_max_space_limit as i32, - on_cpu_profile_config - .java_symbol_file_refresh_defer_interval - .as_secs() as i32, + let ebpf_conf = &config.ebpf; + let on_cpu = &ebpf_conf.on_cpu_profile; + let off_cpu = &ebpf_conf.off_cpu_profile; + + let profiler_enabled = + !on_cpu.disabled && (cfg!(feature = "off_cpu") && !off_cpu.disabled); + if profiler_enabled { + if !on_cpu.disabled { + ebpf::enable_oncpu_profiler(); + } else { + ebpf::disable_oncpu_profiler(); + } + + #[cfg(feature = "off_cpu")] + if !off_cpu.disabled { + ebpf::enable_offcpu_profiler(); + } else { + ebpf::disable_offcpu_profiler(); + } + + if ebpf::start_continuous_profiler( + on_cpu.frequency as i32, + ebpf_conf.java_symbol_file_max_space_limit as i32, + ebpf_conf.java_symbol_file_refresh_defer_interval.as_secs() as i32, Self::ebpf_on_cpu_callback, ) != 0 { @@ -558,15 +575,29 @@ impl EbpfCollector { return Err(Error::EbpfInitError); } - set_profiler_regex( - CString::new(on_cpu_profile_config.regex.as_bytes()) - .unwrap() - .as_c_str() - .as_ptr(), - ); + if !on_cpu.disabled { + ebpf::set_profiler_regex( + CString::new(on_cpu.regex.as_bytes()) + .unwrap() + .as_c_str() + .as_ptr(), + ); + + // CPUID will not be included in the aggregation of stack trace data. + ebpf::set_profiler_cpu_aggregation(on_cpu.cpu as i32); + } + + #[cfg(feature = "off_cpu")] + if !off_cpu.disabled { + ebpf::set_offcpu_profiler_regex( + CString::new(off_cpu.regex.as_bytes()) + .unwrap() + .as_c_str() + .as_ptr(), + ); - // CPUID will not be included in the aggregation of stack trace data. - set_profiler_cpu_aggregation(on_cpu_profile_config.cpu as i32); + ebpf::set_offcpu_minblock_time(off_cpu.min_block.as_micros() as u32); + } } ebpf::bpf_tracer_finish(); diff --git a/server/agent_config/config.go b/server/agent_config/config.go index 33e714cfbb7..bb3710d7513 100644 --- a/server/agent_config/config.go +++ b/server/agent_config/config.go @@ -243,30 +243,36 @@ type EbpfKprobePortlist struct { } type OnCpuProfile struct { - Disabled *bool `yaml:"disabled,omitempty"` - Frequency *int `yaml:"frequency,omitempty"` - Cpu *int `yaml:"cpu,omitempty"` - Regex *string `yaml:"regex,omitempty"` - JavaSymbolFileMaxSpaceLimit *int `yaml:"java-symbol-file-max-space-limit,omitempty"` - JavaSymbolFileRefreshDeferInterval *string `yaml:"java-symbol-file-refresh-defer-interval,omitempty"` + Disabled *bool `yaml:"disabled,omitempty"` + Frequency *int `yaml:"frequency,omitempty"` + Cpu *int `yaml:"cpu,omitempty"` + Regex *string `yaml:"regex,omitempty"` +} + +type OffCpuProfile struct { + Disabled *bool `yaml:"disabled,omitempty"` + Regex *string `yaml:"regex,omitempty"` + MinBlock *string `yaml:"minblock,omitempty"` } type EbpfConfig struct { - Disabled *bool `yaml:"disabled,omitempty"` - GlobalEbpfPpsThreshold *int `yaml:"global-ebpf-pps-threshold,omitempty"` - UprobeProcessNameRegexs *EbpfUprobeProcessNameRegexsConfig `yaml:"uprobe-process-name-regexs,omitempty"` - KprobeWhitelist *EbpfKprobePortlist `yaml:"kprobe-whitelist,omitempty"` - KprobeBlacklist *EbpfKprobePortlist `yaml:"kprobe-blacklist,omitempty"` - ThreadNum *int `yaml:"thread-num,omitempty"` - PerfPagesCount *int `yaml:"perf-pages-count,omitempty"` - RingSize *int `yaml:"ring-size,omitempty"` - MaxSocketEntries *int `yaml:"max-socket-entries,omitempty"` - MaxTraceEntries *int `yaml:"max-trace-entries,omitempty"` - SocketMapMaxReclaim *int `yaml:"socket-map-max-reclaim,omitempty"` - GoTracingTimeout *int `yaml:"go-tracing-timeout,omitempty"` - IOEventCollectMode *int `yaml:"io-event-collect-mode,omitempty"` - IOEventMinimalDuration *string `yaml:"io-event-minimal-duration,omitempty"` - OnCpuProfile *OnCpuProfile `yaml:"on-cpu-profile,omitempty"` + Disabled *bool `yaml:"disabled,omitempty"` + GlobalEbpfPpsThreshold *int `yaml:"global-ebpf-pps-threshold,omitempty"` + UprobeProcessNameRegexs *EbpfUprobeProcessNameRegexsConfig `yaml:"uprobe-process-name-regexs,omitempty"` + KprobeWhitelist *EbpfKprobePortlist `yaml:"kprobe-whitelist,omitempty"` + KprobeBlacklist *EbpfKprobePortlist `yaml:"kprobe-blacklist,omitempty"` + ThreadNum *int `yaml:"thread-num,omitempty"` + PerfPagesCount *int `yaml:"perf-pages-count,omitempty"` + RingSize *int `yaml:"ring-size,omitempty"` + MaxSocketEntries *int `yaml:"max-socket-entries,omitempty"` + MaxTraceEntries *int `yaml:"max-trace-entries,omitempty"` + SocketMapMaxReclaim *int `yaml:"socket-map-max-reclaim,omitempty"` + GoTracingTimeout *int `yaml:"go-tracing-timeout,omitempty"` + IOEventCollectMode *int `yaml:"io-event-collect-mode,omitempty"` + IOEventMinimalDuration *string `yaml:"io-event-minimal-duration,omitempty"` + JavaSymbolFileRefreshDeferInterval *string `yaml:"java-symbol-file-refresh-defer-interval,omitempty"` + OnCpuProfile *OnCpuProfile `yaml:"on-cpu-profile,omitempty"` + OffCpuProfile *OffCpuProfile `yaml:"off-cpu-profile,omitempty"` } type OsProcRegex struct {