diff --git a/gene/benches/engine_benchmark.rs b/gene/benches/engine_benchmark.rs index 47eb9b6..6622001 100644 --- a/gene/benches/engine_benchmark.rs +++ b/gene/benches/engine_benchmark.rs @@ -5,7 +5,7 @@ use std::{ }; use criterion::{criterion_group, criterion_main, Criterion, Throughput}; -use gene::{Engine, Event, FieldGetter, FieldValue, Rule}; +use gene::{Compiler, Engine, Event, FieldGetter, FieldValue, Rule}; use gene_derive::{Event, FieldGetter}; use libflate::gzip; use serde::{Deserialize, Deserializer}; @@ -62,18 +62,21 @@ fn bench_rust_events(c: &mut Criterion) { .map(|e| e.unwrap()) .collect::>(); - let mut engine = Engine::new(); + let mut compiler = Compiler::new(); let mut group = c.benchmark_group("scan-throughput"); group.sample_size(20); group.throughput(Throughput::Bytes(all.len() as u64)); + for i in 0..1 { for r in it.iter() { let mut r = r.clone(); r.name = format!("{}.{}", r.name, i); - engine.insert_rule(r).unwrap(); + compiler.load(r).unwrap(); } + let mut engine = Engine::try_from(compiler.clone()).unwrap(); + group.bench_function(&format!("scan-with-{}-rules", engine.rules_count()), |b| { b.iter(|| { for e in events.iter() { diff --git a/gene/src/compiler.rs b/gene/src/compiler.rs new file mode 100644 index 0000000..f43a592 --- /dev/null +++ b/gene/src/compiler.rs @@ -0,0 +1,316 @@ +use std::{ + collections::{HashMap, HashSet}, + io, +}; + +use serde::Deserialize; +use thiserror::Error; + +use crate::{ + rules::{self, CompiledRule}, + template, Rule, Templates, +}; + +#[derive(Error, Debug)] +pub enum Error { + #[error("duplicate rule={0}")] + DuplicateRule(String), + #[error("unknown rule dependency in rule={0}")] + UnknownRuleDependency(String), + #[error("rule error: {0}")] + Rule(#[from] rules::Error), + #[error("template: error {0}")] + Template(#[from] template::Error), + #[error("yaml error: {0}")] + Serde(#[from] serde_yaml::Error), +} + +/// Rule compiler +#[derive(Default, Clone)] +pub struct Compiler { + templates: Templates, + names: HashMap, + loaded: HashSet, + rules: Vec, + pub(crate) compiled: Vec, +} + +impl Compiler { + /// Creates a new `Compiler` + pub fn new() -> Self { + Self::default() + } + + /// Loads templates from a reader implementing [io::Read] trait. The data within the + /// reader must a `HashMap` YAML formatted. + #[inline] + pub fn load_templates_from_reader(&mut self, r: R) -> Result<(), Error> { + for document in serde_yaml::Deserializer::from_reader(r) { + self.load_templates(Templates::deserialize(document)?)?; + } + Ok(()) + } + + /// Wrapper around [Compiler::load_templates_from_reader] loading a rules + /// from a struct implementing [AsRef] + pub fn load_templates_from_str>(&mut self, s: S) -> Result<(), Error> { + let c = io::Cursor::new(s.as_ref()); + self.load_templates_from_reader(c) + } + + /// Loads a set of string [Templates] into the compiler so that it + /// can replace the appropriate strings into the rules before compiling them + pub fn load_templates(&mut self, t: Templates) -> Result<(), Error> { + self.templates.extend(&t)?; + Ok(()) + } + + /// Load a rule from a reader implementing [io::Read] trait. The data must be formatted + /// in YAML following the YAML documents format otherwise this function will fail. + #[inline] + pub fn load_rules_from_reader(&mut self, r: R) -> Result<(), Error> { + for document in serde_yaml::Deserializer::from_reader(r) { + self.load(Rule::deserialize(document)?)?; + } + Ok(()) + } + + /// Wrapper around [Compiler::load_rules_from_reader] loading a rules + /// from a struct implementing [AsRef] + pub fn load_rules_from_str>(&mut self, s: S) -> Result<(), Error> { + let c = io::Cursor::new(s.as_ref()); + self.load_rules_from_reader(c) + } + + /// Load a rule into the `Compiler`. + #[inline] + pub fn load(&mut self, mut r: Rule) -> Result<(), Error> { + if r.is_disabled() { + return Ok(()); + } + + if self.loaded.contains(&r.name) { + return Err(Error::DuplicateRule(r.name)); + } + + // we replace template strings used in rule + self.templates.replace(&mut r); + + self.loaded.insert(r.name.clone()); + + self.rules.push(r); + + Ok(()) + } + + /// Compile all the [Rule] loaded via [Compiler::load] which + /// have not been compiled yet. + #[inline] + pub fn compile(&mut self) -> Result<(), Error> { + // no need to do the job again + if self.is_ready() { + return Ok(()); + } + + // we must compile in the order of insertion to check + // for dependencies + for (i, r) in self.rules.iter().enumerate() { + // we do not re-compile rules already compiled + if self.names.contains_key(&r.name) { + continue; + } + + let compiled: CompiledRule = r.clone().try_into()?; + + // We verify that all rules we depend on are known. + // The fact that rule dependencies must be known makes + // circular references impossible + for dep in compiled.depends.iter() { + self.names + .get(dep) + .ok_or(Error::UnknownRuleDependency(dep.clone()))?; + } + + // we need to be sure nothing can fail beyond this point not + // to create inconsistencies in compiled and sources members + + // this is the index the rule is going to be inserted at + self.names.insert(compiled.name.clone(), i); + self.compiled.push(compiled); + } + + Ok(()) + } + + /// Returns whether compiler is ready (i.e. all the rules have been compiled) + #[inline(always)] + fn is_ready(&self) -> bool { + self.rules.len() == self.compiled.len() + } + + /// Retrieves the rules loaded in the compiler after all of them have been checked + /// against potential compilation errors. + pub fn rules(&mut self) -> Result<&Vec, Error> { + // we need to re-compile rules as some are missing + if !self.is_ready() { + self.compile()?; + } + Ok(&self.rules) + } + + /// Retrieves all compiled rules + pub fn compiled(&mut self) -> Result<&Vec, Error> { + // we need to re-compile rules as some are missing + if !self.is_ready() { + self.compile()?; + } + Ok(&self.compiled) + } +} + +#[cfg(test)] +mod test { + + use super::*; + + #[test] + fn test_load_from_str() { + let mut c = Compiler::new(); + + c.load_rules_from_str( + r#" +name: test +"#, + ) + .unwrap(); + + assert_eq!(c.rules.len(), 1); + } + + #[test] + fn test_load_duplicate_rule() { + let mut c = Compiler::new(); + + let res = c.load_rules_from_str( + r#" +--- +name: test + +--- +name: test +"#, + ); + + assert!(matches!(res, Err(Error::DuplicateRule(_)))); + } + + #[test] + fn test_load_rule_unk_dep() { + let mut c = Compiler::new(); + + c.load_rules_from_str( + r#" +name: test +matches: + $d: rule(unknown.dep) +condition: any of them +"#, + ) + .unwrap(); + + // Unknown RuleDependency is checked at compile time + assert!(matches!(c.compile(), Err(Error::UnknownRuleDependency(_)))); + } + + #[test] + fn test_load_templates() { + let mut c = Compiler::new(); + + c.load_templates_from_str( + r#" +crazy_re: '(this|is|some|re|template)' +str_template: hello world template +"#, + ) + .unwrap(); + + assert_eq!(c.templates.len(), 2) + } + + #[test] + fn test_load_dup_templates() { + let mut c = Compiler::new(); + + let res = c.load_templates_from_str( + r#" +str_template: hello world template +str_template: duplicate +"#, + ); + + // when the duplicate is within the same file this is going + // to be an error raised by the deserialize that doesn't allow it + assert!(matches!(res, Err(Error::Serde(_)))); + + let res = c.load_templates_from_str( + r#" +str_template: hello world template +--- +str_template: duplicate +"#, + ); + + // when the duplicate is in different yaml documents it + // should raise a template error + assert!(matches!(res, Err(Error::Template(_)))); + } + + #[test] + fn test_templated_rule() { + let mut c = Compiler::new(); + + c.load_templates_from_str( + r#" +tpl_string: hello world template +"#, + ) + .unwrap(); + + c.load_rules_from_str( + r#" +name: test +matches: + $m: .data.path == '{{tpl_string}}' +"#, + ) + .unwrap(); + + assert_eq!( + c.rules() + .unwrap() + .first() + .unwrap() + .matches + .as_ref() + .unwrap() + .get("$m") + .unwrap(), + &String::from(".data.path == 'hello world template'") + ); + } + + #[test] + fn test_rules_order() { + let mut c = Compiler::new(); + + for i in 0..1000 { + c.load_rules_from_str(format!("name: rule.{i}")).unwrap() + } + + c.compile().unwrap(); + + for i in 0..1000 { + assert_eq!(c.rules[i].name, c.compiled[i].name); + } + } +} diff --git a/gene/src/engine.rs b/gene/src/engine.rs index 18d7345..80604fd 100644 --- a/gene/src/engine.rs +++ b/gene/src/engine.rs @@ -1,20 +1,18 @@ -use std::{ - collections::{BTreeMap, HashMap, HashSet}, - io, -}; +use std::collections::{BTreeMap, HashMap, HashSet}; use serde::{Deserialize, Serialize}; use thiserror::Error; use crate::{ + compiler, rules::{self, bound_severity, CompiledRule}, - Event, FieldValue, Rule, + Compiler, Event, FieldValue, }; use crate::FieldGetter; use gene_derive::FieldGetter; -/// structure representing the result of an [Event] scanned by the +/// Structure representing the result of an [Event] scanned by the /// [Engine]. It aggregates information about the rules matching a /// given event as well as some meta data about it (tags, attack ids ...). /// A severity score (sum of all matching rules severity bounded to [MAX_SEVERITY](rules::MAX_SEVERITY)) is also part of a `ScanResult`. @@ -127,12 +125,6 @@ impl ScanResult { #[derive(Debug, Error)] pub enum Error { - #[error("duplicate rule={0}")] - DuplicateRule(String), - #[error("unknown rule dependency in rule={0}")] - UnknownRuleDependency(String), - #[error("{0}")] - Deserialize(#[from] serde_yaml::Error), #[error("{0}")] Rule(#[from] rules::Error), } @@ -145,7 +137,7 @@ pub enum Error { /// /// ``` /// use gene_derive::{Event, FieldGetter}; -/// use gene::{Engine, Event,FieldGetter,FieldValue}; +/// use gene::{Compiler, Engine, Event,FieldGetter,FieldValue}; /// use std::borrow::Cow; /// /// #[derive(FieldGetter)] @@ -177,8 +169,8 @@ pub enum Error { /// some_gen: 3.14, /// }; /// -/// let mut e = Engine::new(); -/// e.load_rules_yaml_str(r#" +/// let mut c = Compiler::new(); +/// c.load_rules_from_str(r#" /// --- /// name: toast.it /// match-on: @@ -197,7 +189,7 @@ pub enum Error { /// $b: .data.b <= '42' /// condition: $n and $pi and $a and $b /// ..."#).unwrap(); -/// +/// let mut e = Engine::try_from(c).unwrap(); /// let scan_res = e.scan(&event).unwrap().unwrap(); /// println!("{:#?}", scan_res); /// @@ -221,6 +213,19 @@ pub struct Engine { deps_cache: HashMap>, } +impl TryFrom for Engine { + type Error = compiler::Error; + fn try_from(mut c: Compiler) -> Result { + let mut e = Self::default(); + // we must be sure rules have been compiled + c.compile()?; + for r in c.compiled { + e.insert_compiled(r); + } + Ok(e) + } +} + impl Engine { /// creates a new event scanning engine pub fn new() -> Self { @@ -229,33 +234,14 @@ impl Engine { } } - /// insert a rule into the engine - #[inline] - pub fn insert_rule(&mut self, r: Rule) -> Result<(), Error> { - if r.is_disabled() { - return Ok(()); - } - - if self.names.contains_key(&r.name) { - return Err(Error::DuplicateRule(r.name)); - } - // we need to be sure nothing can fail beyound this point - let compiled: CompiledRule = r.try_into()?; - let has_deps = !compiled.depends.is_empty(); - - // We verify that all rules we depend on are known. - // The fact that rule dependencies must be known makes - // circular references impossible - for dep in compiled.depends.iter() { - self.names - .get(dep) - .ok_or(Error::UnknownRuleDependency(dep.clone()))?; - } + #[inline(always)] + pub(crate) fn insert_compiled(&mut self, r: CompiledRule) { + let has_deps = !r.depends.is_empty(); // this is the index the rule is going to be inserted at let rule_idx = self.rules.len(); - self.names.insert(compiled.name.clone(), rule_idx); - self.rules.push(compiled); + self.names.insert(r.name.clone(), rule_idx); + self.rules.push(r); // since we know all the dependent rules are there, we can cache // the list of dependencies and we never need to compute it again @@ -266,24 +252,6 @@ impl Engine { // cache becomes outdated self.rules_cache.clear(); - - Ok(()) - } - - /// load rule(s) defined in a string YAML documents, into the engine - pub fn load_rules_yaml_str>(&mut self, s: S) -> Result<(), Error> { - for document in serde_yaml::Deserializer::from_str(s.as_ref()) { - self.insert_rule(Rule::deserialize(document)?)?; - } - Ok(()) - } - - /// loads rules from a [io::Read] containing YAML serialized data - pub fn load_rules_yaml_reader(&mut self, r: R) -> Result<(), Error> { - for document in serde_yaml::Deserializer::from_reader(r) { - self.insert_rule(Rule::deserialize(document)?)?; - } - Ok(()) } #[inline(always)] @@ -434,13 +402,6 @@ impl Engine { mod test { use super::*; - macro_rules! rule { - ($rule: literal) => {{ - let d: Rule = serde_yaml::from_str($rule).unwrap(); - d - }}; - } - macro_rules! fake_event { ($name:tt, id=$id:literal, source=$source:literal, $(($path:literal, $value:expr)),*) => { struct $name {} @@ -474,19 +435,20 @@ mod test { #[test] fn test_basic_match_scan() { - let mut e = Engine::new(); - let r = rule!( + let mut c = Compiler::new(); + + c.load_rules_from_str( r#" ---- name: test matches: $a: .ip ~= "^8\.8\." condition: $a actions: ["do_something"] -..."# - ); +"#, + ) + .unwrap(); - e.insert_rule(r).unwrap(); + let mut e = Engine::try_from(c).unwrap(); fake_event!(Dummy, id = 1, source = "test", (".ip", "8.8.4.4")); let sr = e.scan(&Dummy {}).unwrap().unwrap(); assert!(sr.rules.contains("test")); @@ -498,20 +460,19 @@ actions: ["do_something"] #[test] fn test_basic_filter_scan() { - let mut e = Engine::new(); - let r = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" ---- name: test type: filter matches: $a: .ip ~= "^8\.8\." condition: $a -actions: ["do_something"] -..."# - ); +actions: ["do_something"]"#, + ) + .unwrap(); - e.insert_rule(r).unwrap(); + let mut e = Engine::try_from(c).unwrap(); fake_event!(Dummy, id = 1, source = "test", (".ip", "8.8.4.4")); let sr = e.scan(&Dummy {}).unwrap().unwrap(); // filter matches should not be put in matches @@ -527,19 +488,19 @@ actions: ["do_something"] fn test_include_all_empty_filter() { // test that we must take all events when nothing is // included / excluded - let mut e = Engine::new(); - let r = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" ---- name: test type: filter match-on: events: test: [] -..."# - ); +"#, + ) + .unwrap(); - e.insert_rule(r).unwrap(); + let mut e = Engine::try_from(c).unwrap(); fake_event!(IpEvt, id = 1, source = "test", (".ip", "8.8.4.4")); e.scan(&IpEvt {}).unwrap().unwrap(); @@ -550,19 +511,20 @@ match-on: #[test] fn test_include_filter() { // test that only events included must be included - let mut e = Engine::new(); - let r = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" ---- name: test type: filter match-on: events: test: [ 2 ] -..."# - ); +"#, + ) + .unwrap(); + + let mut e = Engine::try_from(c).unwrap(); - e.insert_rule(r).unwrap(); fake_event!(IpEvt, id = 1, source = "test", (".ip", "8.8.4.4")); // not explicitly included so it should not be assert_eq!(e.scan(&IpEvt {}).unwrap(), None); @@ -574,19 +536,19 @@ match-on: #[test] fn test_exclude_filter() { // test that only stuff excluded must be excluded - let mut e = Engine::new(); - let r = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" ---- name: test type: filter match-on: events: test: [ -1 ] -..."# - ); +"#, + ) + .unwrap(); - e.insert_rule(r).unwrap(); + let mut e = Engine::try_from(c).unwrap(); fake_event!(IpEvt, id = 1, source = "test", (".ip", "8.8.4.4")); assert_eq!(e.scan(&IpEvt {}).unwrap(), None); @@ -602,19 +564,20 @@ match-on: fn test_mix_include_exclude_filter() { // test that when include and exclude filters are // specified we take only events in those - let mut e = Engine::new(); - let r = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" ---- name: test type: filter match-on: events: test: [ -1, 2 ] -..."# - ); +"#, + ) + .unwrap(); + + let mut e = Engine::try_from(c).unwrap(); - e.insert_rule(r).unwrap(); fake_event!(IpEvt, id = 1, source = "test", (".ip", "8.8.4.4")); assert_eq!(e.scan(&IpEvt {}).unwrap(), None); @@ -629,8 +592,8 @@ match-on: #[test] fn test_match_and_filter() { - let mut e = Engine::new(); - let _match = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" --- name: match @@ -638,22 +601,20 @@ matches: $a: .ip ~= "^8\.8\." condition: $a actions: ["do_something"] -..."# - ); - let filter = rule!( - r#" --- + name: filter type: filter match-on: events: test: [1] -..."# - ); +"#, + ) + .unwrap(); + + let mut e = Engine::try_from(c).unwrap(); - e.insert_rule(_match).unwrap(); - e.insert_rule(filter).unwrap(); fake_event!(Dummy, id = 1, source = "test", (".ip", "8.8.4.4")); let sr = e.scan(&Dummy {}).unwrap().unwrap(); assert!(sr.rules.contains("match")); @@ -664,8 +625,8 @@ match-on: #[test] fn test_match_with_tags() { - let mut e = Engine::new(); - let t4343 = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" --- name: test.1 @@ -674,23 +635,22 @@ meta: match-on: events: test: [] -..."# - ); - let t4242 = rule!( - r#" --- + name: test.2 meta: tags: ['another:tag', 'some:random:tag'] match-on: events: test: [] -..."# - ); - e.insert_rule(t4242).unwrap(); - e.insert_rule(t4343).unwrap(); +"#, + ) + .unwrap(); + + let mut e = Engine::try_from(c).unwrap(); + fake_event!(Dummy, id = 1, source = "test", (".ip", "8.8.4.4")); let sr = e.scan(&Dummy {}).unwrap().unwrap(); assert!(sr.rules.contains("test.1")); @@ -701,8 +661,8 @@ match-on: #[test] fn test_match_with_attack() { - let mut e = Engine::new(); - let t4343 = rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" --- name: detect.t4343 @@ -712,12 +672,9 @@ meta: match-on: events: test: [] -..."# - ); - let t4242 = rule!( - r#" --- + name: detect.t4242 meta: attack: @@ -725,11 +682,12 @@ meta: match-on: events: test: [] -..."# - ); +"#, + ) + .unwrap(); + + let mut e = Engine::try_from(c).unwrap(); - e.insert_rule(t4242).map_err(|e| println!("{e}")).unwrap(); - e.insert_rule(t4343).unwrap(); fake_event!(Dummy, id = 1, source = "test", (".ip", "8.8.4.4")); let sr = e.scan(&Dummy {}).unwrap().unwrap(); assert!(sr.rules.contains("detect.t4242")); @@ -740,36 +698,32 @@ match-on: #[test] fn test_rule_dependency() { - let mut e = Engine::new(); - - e.insert_rule(rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" name: dep.rule type: dependency matches: $ip: .ip == '8.8.4.4' condition: any of them -"# - )) - .unwrap(); - e.insert_rule(rule!( - r#" +--- + name: main matches: $dep1: rule(dep.rule) condition: all of them -"# - )) - .unwrap(); - e.insert_rule(rule!( - r#" +--- + name: match.all -"# - )) + +"#, + ) .unwrap(); + let mut e = Engine::try_from(c).unwrap(); + fake_event!(Dummy, id = 1, source = "test", (".ip", "8.8.4.4")); let sr = e.scan(&Dummy {}).unwrap().unwrap(); assert!(sr.rules.contains("main")); @@ -783,67 +737,26 @@ name: match.all assert!(sr.rules.contains("match.all")); } - #[test] - fn test_load_rule_unk_dep() { - let mut e = Engine::new(); - - let res = e.insert_rule(rule!( - r#" -name: test -matches: - $dep: rule(unknown.rule) -condition: any of them -"# - )); - - assert!(matches!(res, Err(Error::UnknownRuleDependency(_)))); - } - - #[test] - fn test_load_circular_rule() { - let mut e = Engine::new(); - - let res = e.insert_rule(rule!( - r#" -name: test -matches: - $dep: rule(test) -condition: any of them -"# - )); - - // when dependencies are checked, rule is not yet inserted in the engine - // so it should result in an UnknownRuleDependency - assert!(matches!(res, Err(Error::UnknownRuleDependency(_)))); - } - #[test] fn test_dep_cache() { - let mut e = Engine::new(); - - e.insert_rule(rule!( + let mut c = Compiler::new(); + c.load_rules_from_str( r#" name: dep.rule type: dependency matches: $ip: .ip == '8.8.4.4' condition: any of them -"# - )) - .unwrap(); - e.insert_rule(rule!( - r#" +--- + name: main matches: $dep1: rule(dep.rule) condition: all of them -"# - )) - .unwrap(); - e.insert_rule(rule!( - r#" +--- + name: multi.deps matches: $dep1: rule(dep.rule) @@ -851,10 +764,12 @@ matches: $dep3: rule(dep.rule) $dep4: rule(dep.rule) condition: all of them -"# - )) +"#, + ) .unwrap(); + let e = Engine::try_from(c).unwrap(); + // we check the dep cache is correct assert_eq!( e.deps_cache diff --git a/gene/src/lib.rs b/gene/src/lib.rs index 7fcf8f9..13c6581 100644 --- a/gene/src/lib.rs +++ b/gene/src/lib.rs @@ -14,3 +14,11 @@ pub use values::FieldValue; mod paths; pub use paths::XPath; + +mod template; +pub use template::Templates; + +mod compiler; +pub use compiler::Compiler; + +mod map; diff --git a/gene/src/rules/map.rs b/gene/src/map.rs similarity index 60% rename from gene/src/rules/map.rs rename to gene/src/map.rs index 111a14f..f2c9e31 100644 --- a/gene/src/rules/map.rs +++ b/gene/src/map.rs @@ -5,23 +5,50 @@ use std::{ ops::{Deref, DerefMut}, }; -#[derive(Debug, Clone, Serialize)] -pub struct MatchHashMap(HashMap); +/// HashMap implementation providing key uniqueness at Deserialization +#[derive(Default, Debug, Clone, Serialize)] +pub(crate) struct UKHashMap(HashMap); -impl Deref for MatchHashMap { +impl From> for HashMap { + fn from(value: UKHashMap) -> Self { + value.0 + } +} + +impl From> for UKHashMap { + fn from(value: HashMap) -> Self { + Self(value) + } +} + +impl Deref for UKHashMap { type Target = HashMap; fn deref(&self) -> &Self::Target { &self.0 } } -impl DerefMut for MatchHashMap { +impl DerefMut for UKHashMap { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } -impl<'de, K, V> Deserialize<'de> for MatchHashMap +/// helper to deserialize a UKHashMap into a HashMap +#[inline(always)] +pub(crate) fn deserialize_uk_hashmap<'de, D, K, V>( + deserializer: D, +) -> Result>, D::Error> +where + D: Deserializer<'de>, + K: Deserialize<'de> + Eq + std::hash::Hash + fmt::Debug, + V: Deserialize<'de>, +{ + let s = Option::>::deserialize(deserializer)?; + Ok(s.map(|m| m.into())) +} + +impl<'de, K, V> Deserialize<'de> for UKHashMap where K: Deserialize<'de> + Eq + std::hash::Hash + fmt::Debug, V: Deserialize<'de>, @@ -37,7 +64,7 @@ where K: Deserialize<'de> + Eq + std::hash::Hash + fmt::Debug, V: Deserialize<'de>, { - type Value = MatchHashMap; + type Value = UKHashMap; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a map with unique keys") @@ -56,7 +83,7 @@ where values.insert(key, value); } - Ok(MatchHashMap(values)) + Ok(UKHashMap(values)) } } diff --git a/gene/src/rules.rs b/gene/src/rules.rs index dac614b..1165425 100644 --- a/gene/src/rules.rs +++ b/gene/src/rules.rs @@ -1,5 +1,5 @@ use self::{attack::AttackId, condition::Condition, matcher::Match}; -use crate::Event; +use crate::{map::deserialize_uk_hashmap, template::Templates, Event}; use lazy_static::lazy_static; use regex::Regex; @@ -16,9 +16,6 @@ mod condition; // used to parse path pub(crate) mod matcher; -mod map; -pub use map::MatchHashMap; - pub const MAX_SEVERITY: u8 = 10; pub(crate) fn bound_severity(sev: u8) -> u8 { @@ -135,14 +132,18 @@ impl<'de> Deserialize<'de> for Type { #[serde(deny_unknown_fields)] pub struct Meta { /// free text tags associated to the rule + #[serde(skip_serializing_if = "Option::is_none")] pub tags: Option>, /// [MITRE ATT&CK](https://attack.mitre.org/) ids concerned by this rule /// This is not a free-text field, when the rule compiles a format checking /// made on the ids. + #[serde(skip_serializing_if = "Option::is_none")] pub attack: Option>, /// authors of the rule + #[serde(skip_serializing_if = "Option::is_none")] pub authors: Option>, /// any comment + #[serde(skip_serializing_if = "Option::is_none")] pub comments: Option>, } @@ -151,6 +152,7 @@ pub struct Meta { #[serde(deny_unknown_fields)] pub struct Params { /// whether to disable the rule or not + #[serde(skip_serializing_if = "Option::is_none")] pub disable: Option, } @@ -164,6 +166,7 @@ pub struct MatchOn { /// **one** of the source and **one** of its associated event id. /// To match all events from a source just leave an empty set of /// event ids. + #[serde(skip_serializing_if = "Option::is_none")] pub events: Option>>, } @@ -174,21 +177,31 @@ pub struct Rule { /// name fo the rule pub name: String, #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] pub ty: Option, /// rule's metadata + #[serde(skip_serializing_if = "Option::is_none")] pub meta: Option, /// miscellaneous parameters + #[serde(skip_serializing_if = "Option::is_none")] pub params: Option, /// match-on directives #[serde(rename = "match-on")] + #[serde(skip_serializing_if = "Option::is_none")] pub match_on: Option, /// matches - pub matches: Option>, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(deserialize_with = "deserialize_uk_hashmap")] + pub matches: Option>, /// rule triggering condition + #[serde(skip_serializing_if = "Option::is_none")] pub condition: Option, /// severity given to the events matching the rule + #[serde(skip_serializing_if = "Option::is_none")] pub severity: Option, /// actions to take when rule triggers + #[serde(skip_serializing_if = "Option::is_none")] pub actions: Option>, } @@ -209,18 +222,8 @@ impl Rule { } #[inline] - pub fn apply_templates(mut self, templates: &HashMap) -> Self { - if let Some(matches) = self.matches.as_mut() { - for op in matches.keys().cloned().collect::>() { - matches.entry(op.clone()).and_modify(|s| { - let mut new = s.clone(); - templates - .iter() - .for_each(|(name, tpl)| new = new.replace(&format!("{{{{{name}}}}}"), tpl)); - *s = new; - }); - } - } + pub fn apply_templates(mut self, templates: &Templates) -> Self { + templates.replace(&mut self); self } @@ -723,7 +726,7 @@ condition: $a and $b let d = serde_yaml::from_str::<'_, Rule>(test) .unwrap() - .apply_templates(&templates); + .apply_templates(&templates.into()); let matches = d.matches.unwrap(); let m = matches.get("$a").unwrap(); diff --git a/gene/src/template.rs b/gene/src/template.rs new file mode 100644 index 0000000..2872da8 --- /dev/null +++ b/gene/src/template.rs @@ -0,0 +1,114 @@ +use std::collections::HashMap; + +use serde::Deserialize; +use thiserror::Error; + +use crate::{map::UKHashMap, Rule}; + +#[derive(Debug, Error)] +pub enum Error { + #[error("duplicate template name: {0}")] + Duplicate(String), +} + +/// Structure holding string templates to replace in rules. Templating +/// mechanism allow to define once complex regex and use them at multiple +/// places in rules, making rule maintenance easier. +/// +/// # Example +/// +/// ``` +/// use gene::Compiler; +/// +/// let mut c = Compiler::new(); +/// +/// /// loading template from string +/// c.load_templates_from_str( +/// r#" +/// some_template: hello world +/// "#, +/// ) +/// .unwrap(); +/// +/// c.load_rules_from_str( +/// r#" +/// name: test +/// matches: +/// $m: .data.path == '{{some_template}}' +/// "#, +/// ).unwrap(); +/// +/// /// we verify our template has been replaced +/// assert_eq!( +/// c.rules() +/// .unwrap() +/// .first() +/// .unwrap() +/// .matches +/// .as_ref() +/// .unwrap() +/// .get("$m") +/// .unwrap(), +/// &String::from(".data.path == 'hello world'") +/// ); +/// ``` +#[derive(Default, Debug, Deserialize, Clone)] +pub struct Templates(UKHashMap); + +impl From> for Templates { + fn from(value: HashMap) -> Self { + Self(value.into()) + } +} + +impl Templates { + pub fn new() -> Self { + Self::default() + } + + /// Inserts a new string template. Under the `matches` section of a [Rule] + /// any occurrence of `{{name}}` (`name` being the template name) will be + /// replaced by the `template`. + #[inline] + pub fn insert(&mut self, name: String, template: String) -> Result<(), Error> { + if self.0.contains_key(&name) { + return Err(Error::Duplicate(name)); + } + self.0.insert(name, template); + Ok(()) + } + + /// Extends templates from another + #[inline] + pub fn extend(&mut self, o: &Self) -> Result<(), Error> { + for (name, template) in o.0.iter() { + self.insert(name.clone(), template.clone())?; + } + Ok(()) + } + + /// Replaces templates in the given [Rule] + pub fn replace(&self, r: &mut Rule) { + if let Some(matches) = r.matches.as_mut() { + for op in matches.keys().cloned().collect::>() { + matches.entry(op.clone()).and_modify(|s| { + let mut new = s.clone(); + self.0 + .iter() + .for_each(|(name, tpl)| new = new.replace(&format!("{{{{{name}}}}}"), tpl)); + *s = new; + }); + } + } + } + + #[inline(always)] + pub fn len(&self) -> usize { + self.0.len() + } + + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +}