Skip to content

Commit

Permalink
Add log classification rule to evaluate by line number
Browse files Browse the repository at this point in the history
Adds a log classification rule + testing to classify log lines based on their position. A follow up pr will link this to rockset
  • Loading branch information
PaliC committed Oct 19, 2023
1 parent d7b809c commit 883e4a7
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 26 deletions.
12 changes: 11 additions & 1 deletion aws/lambda/log-classifier/src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,20 @@ pub fn evaluate_rule(rule: &Rule, log: &Log) -> Option<Match> {

/// Evaluate the ruleset against `log`. Returns the highest-priority match, or
/// None if no rule matched.
pub fn evaluate_ruleset(ruleset: &RuleSet, log: &Log) -> Option<Match> {
pub fn evaluate_ruleset_by_priority(ruleset: &RuleSet, log: &Log) -> Option<Match> {
ruleset
.rules
.par_iter()
.flat_map(|rule| evaluate_rule(rule, log))
.max_by(|a, b| a.rule.priority.cmp(&b.rule.priority))
}

/// Evaluate the ruleset against `log`. Returns the match with the highest line number, or
/// None if no rule matched.
pub fn evaluate_ruleset_by_position(ruleset: &RuleSet, log: &Log) -> Option<Match> {
ruleset
.rules
.par_iter()
.flat_map(|rule| evaluate_rule(rule, log))
.max_by(|a, b| a.line_number.cmp(&b.line_number))
}
96 changes: 71 additions & 25 deletions aws/lambda/log-classifier/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use anyhow::Result;
use std::time::Instant;
use tracing::info;

use log_classifier::engine::evaluate_ruleset;
use log_classifier::engine::evaluate_ruleset_by_position;
use log_classifier::engine::evaluate_ruleset_by_priority;
use log_classifier::log::Log;
use log_classifier::network::{
download_log, get_dynamo_client, get_s3_client, upload_classification_dynamo,
Expand Down Expand Up @@ -37,7 +38,7 @@ async fn handle(
// Run the matching
let start = Instant::now();
let ruleset = RuleSet::new_from_config();
let maybe_match = evaluate_ruleset(&ruleset, &log);
let maybe_match = evaluate_ruleset_by_priority(&ruleset, &log);
info!("evaluate: {:?}", start.elapsed());

match maybe_match {
Expand Down Expand Up @@ -126,9 +127,12 @@ mod test {
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
assert_eq!(match_.line_number, 1);
assert_eq!(match_.rule.name, "foo");
let match_position_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
assert_eq!(match_position_.line_number, 1);
assert_eq!(match_position_.rule.name, "foo");
}

#[test]
Expand All @@ -142,9 +146,12 @@ mod test {
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
assert_eq!(match_.line_number, 1);
assert_eq!(match_.rule.name, "foo");
let match_position_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
assert_eq!(match_position_.line_number, 1);
assert_eq!(match_position_.rule.name, "foo");
}

#[test]
Expand All @@ -159,7 +166,7 @@ mod test {
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
assert_eq!(match_.line_number, 2);
assert_eq!(match_.rule.name, "higher priority");
}
Expand All @@ -176,24 +183,10 @@ mod test {
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log);
let match_ = evaluate_ruleset_by_priority(&ruleset, &log);
assert!(match_.is_none());
}

#[test]
fn match_before_ignore() {
let mut ruleset = RuleSet::new();
ruleset.add("test", r"^test");
let log = Log::new(
"\
testt\n\
=================== sccache compilation log ===================\n\
=========== If your build fails, please take a look at the log above for possible reasons ===========\n\
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
assert_eq!(match_.line_number, 1);
let match_position_ = evaluate_ruleset_by_position(&ruleset, &log);
assert!(match_position_.is_none());
}

#[test]
Expand All @@ -208,8 +201,10 @@ mod test {
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
assert_eq!(match_.line_number, 3);
let match_position_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();
assert_eq!(match_position_.line_number, 3);
}

#[test]
Expand All @@ -223,7 +218,7 @@ mod test {
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
assert_eq!(match_.line_number, 2);
}

Expand All @@ -249,7 +244,7 @@ mod test {
"
.into(),
);
let match_ = evaluate_ruleset(&ruleset, &log).unwrap();
let match_ = evaluate_ruleset_by_priority(&ruleset, &log).unwrap();
assert_eq!(match_.line_number, 4);

let match_json = SerializedMatch::new(&match_, &log, 12);
Expand All @@ -265,4 +260,55 @@ mod test {
// let foo = handle(12421522599, "pytorch/vision", ShouldWriteDynamo(false)).await;
// panic!("{:#?}", foo);
// }
#[test]
fn test_evaluate_ruleset_by_position_smoke_test() {
let ruleset = RuleSet {
rules: vec![Rule {
name: "test".into(),
pattern: r"^test".parse().unwrap(),
priority: 100,
}],
};
let log = Log::new(
"\
testt\n\
test foo\n\
"
.into(),
);

let match_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();

assert_eq!(match_.line_number, 2);
}

#[test]
fn test_evaluate_ruleset_by_position_later_line_wins() {
let ruleset = RuleSet {
rules: vec![
Rule {
name: "test".into(),
pattern: r"^test".parse().unwrap(),
priority: 100,
},
Rule {
name: "foo".into(),
pattern: r"^foo".parse().unwrap(),
priority: 1000,
},
],
};
let log = Log::new(
"\
test foo\n\
testt\n\
"
.into(),
);

let match_ = evaluate_ruleset_by_position(&ruleset, &log).unwrap();

assert_eq!(match_.line_number, 2);
assert_eq!(match_.rule.name, "test");
}
}

0 comments on commit 883e4a7

Please sign in to comment.