Skip to content

Commit

Permalink
fix(datadog grok): enable the DOTALL mode by default (#1022)
Browse files Browse the repository at this point in the history
* fix(datadog grok): support multiline logs

* fix test

* add changelog

* correct comments

* Improve the changelog comment.

Co-authored-by: Bruce Guenter <[email protected]>

* simplify pattern string concatenation

* cargo fmt

---------

Co-authored-by: Bruce Guenter <[email protected]>
Co-authored-by: Tess Neau <[email protected]>
  • Loading branch information
3 people authored Sep 26, 2024
1 parent 30296eb commit e1be30e
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 24 deletions.
2 changes: 2 additions & 0 deletions changelog.d/1022.breaking.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The multi-line mode of the `parse_groks` VRL function is now enabled by default.
Use the `(?-m)` modifier to disable this behaviour.
29 changes: 14 additions & 15 deletions src/datadog/grok/parse_grok.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ fn apply_grok_rule(source: &str, grok_rule: &GrokRule) -> Result<Value, Error> {
if let Some(ref mut v) = value {
value = match apply_filter(v, filter) {
Ok(Value::Null) => None,
Ok(v ) if v.is_object() => Some(parse_keys_as_path(v)),
Ok(v) if v.is_object() => Some(parse_keys_as_path(v)),
Ok(v) => Some(v),
Err(error) => {
warn!(message = "Error applying filter", field = %field, filter = %filter, %error);
Expand Down Expand Up @@ -290,7 +290,7 @@ mod tests {
parse_grok_rules(&["%{unknown}".to_string()], BTreeMap::new())
.unwrap_err()
.to_string(),
r#"failed to parse grok expression '\A%{unknown}\z': The given pattern definition name "unknown" could not be found in the definition map"#
r#"failed to parse grok expression '(?m)\A%{unknown}\z': The given pattern definition name "unknown" could not be found in the definition map"#
);
}

Expand Down Expand Up @@ -660,7 +660,7 @@ mod tests {
Ok(Value::Array(vec!["1".into(), "2".into()])),
),
(
r#"(?m)%{data:field:array("[]","\\n")}"#,
r#"%{data:field:array("[]","\\n")}"#,
"[1\n2]",
Ok(Value::Array(vec!["1".into(), "2".into()])),
),
Expand Down Expand Up @@ -1100,34 +1100,33 @@ mod tests {
#[test]
fn parses_with_new_lines() {
test_full_grok(vec![
// the DOTALL mode is enabled by default
(
"(?m)%{data:field}",
"%{data:field}",
"a\nb",
Ok(Value::from(btreemap! {
"field" => "a\nb"
})),
),
// (?s) enables the DOTALL mode
(
"(?m)%{data:line1}\n%{data:line2}",
"(?s)%{data:field}",
"a\nb",
Ok(Value::from(btreemap! {
"line1" => "a",
"line2" => "b"
"field" => "a\nb"
})),
),
// no DOTALL mode by default
("%{data:field}", "a\nb", Err(Error::NoMatch)),
// (?s) is not supported by the underlying regex engine(onig) - it uses (?m) instead, so we convert it silently
(
"(?s)%{data:field}",
"%{data:line1}\n%{data:line2}",
"a\nb",
Ok(Value::from(btreemap! {
"field" => "a\nb"
"line1" => "a",
"line2" => "b"
})),
),
// disable DOTALL mode with (?-s)
// disable the DOTALL mode with (?-s)
("(?s)(?-s)%{data:field}", "a\nb", Err(Error::NoMatch)),
// disable and then enable DOTALL mode
// disable and then enable the DOTALL mode
(
"(?-s)%{data:field} (?s)%{data:field}",
"abc d\ne",
Expand Down Expand Up @@ -1185,7 +1184,7 @@ mod tests {
#[test]
fn supports_xml_filter() {
test_grok_pattern(vec![(
"(?s)%{data:field:xml}", // (?s) enables DOTALL mode to include newlines
"%{data:field:xml}",
r#"<book category="CHILDREN">
<title lang="en">Harry Potter</title>
<author>J K. Rowling</author>
Expand Down
20 changes: 12 additions & 8 deletions src/datadog/grok/parse_grok_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,18 @@ fn parse_pattern(
grok: &mut Grok,
) -> Result<GrokRule, Error> {
parse_grok_rule(pattern, context)?;
let mut pattern = String::new();
// \A, \z - parses from the beginning to the end of string, not line(until \n)
pattern.push_str(r"\A");
pattern.push_str(&context.regex);
pattern.push_str(r"\z");

// our regex engine(onig) uses (?m) mode modifier instead of (?s) to make the dot match all characters
pattern = pattern.replace("(?s)", "(?m)").replace("(?-s)", "(?-m)");
let pattern = [
// In Oniguruma the (?m) modifier is used to enable the DOTALL mode(dot includes newlines),
// as opposed to the (?s) modifier in other regex flavors.
// \A, \z - parses from the beginning to the end of string, not line(until \n)
r"(?m)\A", // (?m) enables the DOTALL mode by default
&context
.regex
.replace("(?s)", "(?m)")
.replace("(?-s)", "(?-m)"),
r"\z",
]
.concat();

// compile pattern
let pattern = grok
Expand Down
2 changes: 1 addition & 1 deletion src/stdlib/parse_groks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ mod test {
invalid_grok {
args: func_args![ value: "foo",
patterns: vec!["%{NOG}"]],
want: Err("failed to parse grok expression '\\A%{NOG}\\z': The given pattern definition name \"NOG\" could not be found in the definition map"),
want: Err("failed to parse grok expression '(?m)\\A%{NOG}\\z': The given pattern definition name \"NOG\" could not be found in the definition map"),
tdef: TypeDef::object(Collection::any()).fallible(),
}

Expand Down

0 comments on commit e1be30e

Please sign in to comment.