From e1be30efd924a6611daa3c2342d435e78d7a95ad Mon Sep 17 00:00:00 2001 From: Vladimir Zhuk <52405651+vladimir-dd@users.noreply.github.com> Date: Thu, 26 Sep 2024 21:57:50 +0200 Subject: [PATCH] fix(datadog grok): enable the DOTALL mode by default (#1022) * fix(datadog grok): support multiline logs * fix test * add changelog * correct comments * Improve the changelog comment. Co-authored-by: Bruce Guenter * simplify pattern string concatenation * cargo fmt --------- Co-authored-by: Bruce Guenter Co-authored-by: Tess Neau --- changelog.d/1022.breaking.md | 2 ++ src/datadog/grok/parse_grok.rs | 29 ++++++++++++++-------------- src/datadog/grok/parse_grok_rules.rs | 20 +++++++++++-------- src/stdlib/parse_groks.rs | 2 +- 4 files changed, 29 insertions(+), 24 deletions(-) create mode 100644 changelog.d/1022.breaking.md diff --git a/changelog.d/1022.breaking.md b/changelog.d/1022.breaking.md new file mode 100644 index 0000000000..c696601ace --- /dev/null +++ b/changelog.d/1022.breaking.md @@ -0,0 +1,2 @@ +The multi-line mode of the `parse_groks` VRL function is now enabled by default. +Use the `(?-m)` modifier to disable this behaviour. \ No newline at end of file diff --git a/src/datadog/grok/parse_grok.rs b/src/datadog/grok/parse_grok.rs index 1b427e1f4a..a9a045296f 100644 --- a/src/datadog/grok/parse_grok.rs +++ b/src/datadog/grok/parse_grok.rs @@ -52,7 +52,7 @@ fn apply_grok_rule(source: &str, grok_rule: &GrokRule) -> Result { if let Some(ref mut v) = value { value = match apply_filter(v, filter) { Ok(Value::Null) => None, - Ok(v ) if v.is_object() => Some(parse_keys_as_path(v)), + Ok(v) if v.is_object() => Some(parse_keys_as_path(v)), Ok(v) => Some(v), Err(error) => { warn!(message = "Error applying filter", field = %field, filter = %filter, %error); @@ -290,7 +290,7 @@ mod tests { parse_grok_rules(&["%{unknown}".to_string()], BTreeMap::new()) .unwrap_err() .to_string(), - r#"failed to parse grok expression '\A%{unknown}\z': The given pattern definition name "unknown" could not be found in the definition map"# + r#"failed to parse grok expression '(?m)\A%{unknown}\z': The given pattern definition name "unknown" could not be found in the definition map"# ); } @@ -660,7 +660,7 @@ mod tests { Ok(Value::Array(vec!["1".into(), "2".into()])), ), ( - r#"(?m)%{data:field:array("[]","\\n")}"#, + r#"%{data:field:array("[]","\\n")}"#, "[1\n2]", Ok(Value::Array(vec!["1".into(), "2".into()])), ), @@ -1100,34 +1100,33 @@ mod tests { #[test] fn parses_with_new_lines() { test_full_grok(vec![ + // the DOTALL mode is enabled by default ( - "(?m)%{data:field}", + "%{data:field}", "a\nb", Ok(Value::from(btreemap! { "field" => "a\nb" })), ), + // (?s) enables the DOTALL mode ( - "(?m)%{data:line1}\n%{data:line2}", + "(?s)%{data:field}", "a\nb", Ok(Value::from(btreemap! { - "line1" => "a", - "line2" => "b" + "field" => "a\nb" })), ), - // no DOTALL mode by default - ("%{data:field}", "a\nb", Err(Error::NoMatch)), - // (?s) is not supported by the underlying regex engine(onig) - it uses (?m) instead, so we convert it silently ( - "(?s)%{data:field}", + "%{data:line1}\n%{data:line2}", "a\nb", Ok(Value::from(btreemap! { - "field" => "a\nb" + "line1" => "a", + "line2" => "b" })), ), - // disable DOTALL mode with (?-s) + // disable the DOTALL mode with (?-s) ("(?s)(?-s)%{data:field}", "a\nb", Err(Error::NoMatch)), - // disable and then enable DOTALL mode + // disable and then enable the DOTALL mode ( "(?-s)%{data:field} (?s)%{data:field}", "abc d\ne", @@ -1185,7 +1184,7 @@ mod tests { #[test] fn supports_xml_filter() { test_grok_pattern(vec![( - "(?s)%{data:field:xml}", // (?s) enables DOTALL mode to include newlines + "%{data:field:xml}", r#" Harry Potter J K. Rowling diff --git a/src/datadog/grok/parse_grok_rules.rs b/src/datadog/grok/parse_grok_rules.rs index 25f95bebee..b4bea5d3b4 100644 --- a/src/datadog/grok/parse_grok_rules.rs +++ b/src/datadog/grok/parse_grok_rules.rs @@ -175,14 +175,18 @@ fn parse_pattern( grok: &mut Grok, ) -> Result { parse_grok_rule(pattern, context)?; - let mut pattern = String::new(); - // \A, \z - parses from the beginning to the end of string, not line(until \n) - pattern.push_str(r"\A"); - pattern.push_str(&context.regex); - pattern.push_str(r"\z"); - - // our regex engine(onig) uses (?m) mode modifier instead of (?s) to make the dot match all characters - pattern = pattern.replace("(?s)", "(?m)").replace("(?-s)", "(?-m)"); + let pattern = [ + // In Oniguruma the (?m) modifier is used to enable the DOTALL mode(dot includes newlines), + // as opposed to the (?s) modifier in other regex flavors. + // \A, \z - parses from the beginning to the end of string, not line(until \n) + r"(?m)\A", // (?m) enables the DOTALL mode by default + &context + .regex + .replace("(?s)", "(?m)") + .replace("(?-s)", "(?-m)"), + r"\z", + ] + .concat(); // compile pattern let pattern = grok diff --git a/src/stdlib/parse_groks.rs b/src/stdlib/parse_groks.rs index b7578aff40..f0b2f8f952 100644 --- a/src/stdlib/parse_groks.rs +++ b/src/stdlib/parse_groks.rs @@ -264,7 +264,7 @@ mod test { invalid_grok { args: func_args![ value: "foo", patterns: vec!["%{NOG}"]], - want: Err("failed to parse grok expression '\\A%{NOG}\\z': The given pattern definition name \"NOG\" could not be found in the definition map"), + want: Err("failed to parse grok expression '(?m)\\A%{NOG}\\z': The given pattern definition name \"NOG\" could not be found in the definition map"), tdef: TypeDef::object(Collection::any()).fallible(), }