From d5e26dd2618cf3dbac2c10594d06d75aab3a38e1 Mon Sep 17 00:00:00 2001 From: YUAN Chao <107982607+yuanchaoa@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:25:13 +0800 Subject: [PATCH] fix: process matcher error (#8264) --- agent/src/config/config.rs | 12 +++- .../platform_synchronizer/linux_process.rs | 63 +++++++++---------- server/agent_config/README-CH.md | 3 +- server/agent_config/README.md | 7 ++- server/agent_config/template.yaml | 4 +- 5 files changed, 49 insertions(+), 40 deletions(-) diff --git a/agent/src/config/config.rs b/agent/src/config/config.rs index 92b6f352647..4dc243b458d 100644 --- a/agent/src/config/config.rs +++ b/agent/src/config/config.rs @@ -321,6 +321,7 @@ pub enum ProcessMatchType { ProcessName, ParentProcessName, Tag, + CmdWithArgs, } impl From<&str> for ProcessMatchType { @@ -329,6 +330,7 @@ impl From<&str> for ProcessMatchType { OS_PROC_REGEXP_MATCH_TYPE_CMD => Self::Cmd, OS_PROC_REGEXP_MATCH_TYPE_PARENT_PROC_NAME => Self::ParentProcessName, OS_PROC_REGEXP_MATCH_TYPE_TAG => Self::Tag, + OS_PROC_REGEXP_MATCH_TYPE_CMD_WITH_ARGS => Self::CmdWithArgs, _ => Self::ProcessName, } } @@ -459,10 +461,17 @@ impl ProcessMatcher { match self.match_type { ProcessMatchType::Cmd => { + if match_replace_fn(&self.match_regex, &self.action, &process_data.cmd, &replace) { + Some(process_data) + } else { + None + } + } + ProcessMatchType::CmdWithArgs => { if match_replace_fn( &self.match_regex, &self.action, - &process_data.cmd.join(" "), + &process_data.cmd_with_args.join(" "), &replace, ) { Some(process_data) @@ -3353,6 +3362,7 @@ pub const OS_PROC_REGEXP_MATCH_TYPE_CMD: &'static str = "cmdline"; pub const OS_PROC_REGEXP_MATCH_TYPE_PROC_NAME: &'static str = "process_name"; pub const OS_PROC_REGEXP_MATCH_TYPE_PARENT_PROC_NAME: &'static str = "parent_process_name"; pub const OS_PROC_REGEXP_MATCH_TYPE_TAG: &'static str = "tag"; +pub const OS_PROC_REGEXP_MATCH_TYPE_CMD_WITH_ARGS: &'static str = "cmdline_with_args"; pub const OS_PROC_REGEXP_MATCH_ACTION_ACCEPT: &'static str = "accept"; pub const OS_PROC_REGEXP_MATCH_ACTION_DROP: &'static str = "drop"; diff --git a/agent/src/platform/platform_synchronizer/linux_process.rs b/agent/src/platform/platform_synchronizer/linux_process.rs index f202cdc7ad3..be313008a88 100644 --- a/agent/src/platform/platform_synchronizer/linux_process.rs +++ b/agent/src/platform/platform_synchronizer/linux_process.rs @@ -54,7 +54,8 @@ pub struct ProcessData { pub pid: u64, pub ppid: u64, pub process_name: String, // raw process name - pub cmd: Vec, + pub cmd: String, + pub cmd_with_args: Vec, pub user_id: u32, pub user: String, pub start_time: Duration, // the process start timestamp @@ -150,39 +151,31 @@ impl TryFrom<&Process> for ProcessData { type Error = ProcError; // will not set the username fn try_from(proc: &Process) -> Result { - let (cmd, uid) = (proc.cmdline()?, proc.uid()?); - let proc_name = if cmd.len() == 0 { - return Err(ProcError::Other(format!("pid {} cmd is nil", proc.pid))); + let (cmd, cmd_with_args, uid, status) = + (proc.exe()?, proc.cmdline()?, proc.uid()?, proc.status()?); + let command = if let Some(f) = cmd.file_name() { + f.to_string_lossy().to_string() } else { - let buf = PathBuf::from(&cmd[0]); - if let Some(f) = buf.file_name() { - f.to_string_lossy().to_string() - } else { - return Err(ProcError::Other(format!("pid {} cmd parse fail", proc.pid))); - } + return Err(ProcError::Other(format!("pid {} cmd parse fail", proc.pid))); + }; + let (ppid, start_time) = if let Ok(stat) = proc.stat().as_ref() { + let z = stat.starttime().unwrap_or_default(); + (stat.ppid as u64, Duration::from_secs(z.timestamp() as u64)) + } else { + error!("pid {} get stat fail", proc.pid); + (0, Duration::ZERO) }; Ok(ProcessData { - name: proc_name.clone(), + name: status.name.clone(), pid: proc.pid as u64, - ppid: if let Ok(stat) = proc.stat().as_ref() { - stat.ppid as u64 - } else { - error!("pid {} get stat fail", proc.pid); - 0 - }, - process_name: proc_name, - cmd, + ppid, + process_name: status.name.clone(), + cmd: command, + cmd_with_args, user_id: uid, user: "".to_string(), - start_time: { - if let Ok(stat) = proc.stat() { - let z = stat.starttime().unwrap_or_default(); - Duration::from_secs(z.timestamp() as u64) - } else { - Duration::ZERO - } - }, + start_time, os_app_tags: vec![], netns_id: get_proc_netns(proc)? as u32, container_id: get_container_id(proc).unwrap_or("".to_string()), @@ -197,7 +190,7 @@ impl From<&ProcessData> for ProcessInfo { name: Some(p.name.clone()), pid: Some(p.pid), process_name: Some(p.process_name.clone()), - cmdline: Some(p.cmd.join(" ")), + cmdline: Some(p.cmd_with_args.join(" ")), user: Some(p.user.clone()), start_time: Some(u32::try_from(p.start_time.as_secs()).unwrap_or_default()), os_app_tags: { @@ -223,7 +216,7 @@ impl From<&ProcessData> for trident::ProcessInfo { name: Some(p.name.clone()), pid: Some(p.pid), process_name: Some(p.process_name.clone()), - cmdline: Some(p.cmd.join(" ")), + cmdline: Some(p.cmd_with_args.join(" ")), user: Some(p.user.clone()), start_time: Some(u32::try_from(p.start_time.as_secs()).unwrap_or_default()), os_app_tags: { @@ -558,7 +551,8 @@ mod test { pid: 999, ppid: 0, process_name: "root".into(), - cmd: vec!["root".into()], + cmd: "root".into(), + cmd_with_args: vec!["root".into()], user_id: 0, user: "u".into(), start_time: Duration::ZERO, @@ -574,7 +568,8 @@ mod test { pid: 99, ppid: 999, process_name: "parent".into(), - cmd: vec!["parent".into()], + cmd: "parent".into(), + cmd_with_args: vec!["parent".into()], user_id: 0, user: "u".into(), start_time: Duration::ZERO, @@ -590,7 +585,8 @@ mod test { pid: 9999, ppid: 99, process_name: "child".into(), - cmd: vec!["child".into()], + cmd: "child".into(), + cmd_with_args: vec!["child".into()], user_id: 0, user: "u".into(), start_time: Duration::ZERO, @@ -606,7 +602,8 @@ mod test { pid: 777, ppid: 98, process_name: "other".into(), - cmd: vec!["other".into()], + cmd: "other".into(), + cmd_with_args: vec!["other".into()], user_id: 0, user: "u".into(), start_time: Duration::ZERO, diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index c6b53e4c1da..f16ca49b3db 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -1748,7 +1748,7 @@ will be accepted (essentially will auto append `- match_regex: .*` at the end). Configuration Item: - match_regex: The regexp use for match the process, default value is `.*` - match_type: regexp match field, default value is `process_name`, options are - [process_name, cmdline, parent_process_name, tag] + [process_name, cmdline, cmdline_with_args, parent_process_name, tag] - ignore: Whether to ignore when regex match, default value is `false` - rewrite_name: The name will replace the process name or cmd use regexp replace. Default value `""` means no replacement. @@ -1837,6 +1837,7 @@ inputs: | cmdline | | | parent_process_name | | | tag | | +| cmdline_with_args | | **模式**: | Key | Value | diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 922d9e0b56f..84592ecfa96 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -1765,7 +1765,7 @@ will be accepted (essentially will auto append `- match_regex: .*` at the end). Configuration Item: - match_regex: The regexp use for match the process, default value is `.*` - match_type: regexp match field, default value is `process_name`, options are - [process_name, cmdline, parent_process_name, tag] + [process_name, cmdline, cmdline_with_args, parent_process_name, tag] - ignore: Whether to ignore when regex match, default value is `false` - rewrite_name: The name will replace the process name or cmd use regexp replace. Default value `""` means no replacement. @@ -1854,6 +1854,7 @@ inputs: | cmdline | | | parent_process_name | | | tag | | +| cmdline_with_args | | **Schema**: | Key | Value | @@ -2327,7 +2328,7 @@ Calico: cali.* Cilium lxc.* Kube-OVN [0-9a-f]+_h$ ``` -When the `tap_interface_regex` is not configured, it indicates +When the `tap_interface_regex` is not configured, it indicates that network card traffic is not being collected #### Bond Interfaces {#inputs.cbpf.af_packet.bond_interfaces} @@ -4485,7 +4486,7 @@ inputs: **Description**: -When there are multiple deepflow-agents in the same K8s cluster, +When there are multiple deepflow-agents in the same K8s cluster, only one deepflow-agent will be enabled to collect K8s resources. #### K8s Namespace {#inputs.resources.kubernetes.kubernetes_namespace} diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index 064f8fd7bb8..1de05a047d1 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -1119,7 +1119,7 @@ inputs: # Configuration Item: # - match_regex: The regexp use for match the process, default value is `.*` # - match_type: regexp match field, default value is `process_name`, options are - # [process_name, cmdline, parent_process_name, tag] + # [process_name, cmdline, cmdline_with_args, parent_process_name, tag] # - ignore: Whether to ignore when regex match, default value is `false` # - rewrite_name: The name will replace the process name or cmd use regexp replace. # Default value `""` means no replacement. @@ -1176,7 +1176,7 @@ inputs: # ch: 匹配类型 # unit: # range: [] - # enum_options: [process_name, cmdline, parent_process_name, tag] + # enum_options: [process_name, cmdline, cmdline_with_args, parent_process_name, tag] # modification: agent_restart # ee_feature: false # description: