diff --git a/Cargo.toml b/Cargo.toml index de9bd65..3d72de0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ lazy_static = { version = "1.4.0", default-features = false } log = { version = "0.4.19", default-features = false, features = ["max_level_trace", "release_max_level_info"] } nix = { version = "0.26.2", default-features = false, features = ["fs"] } pest = { version = "2.7.10", default-features = false, features = ["std", "memchr"], optional = true } -pest_derive = { version = "2.7.10", default-features = false, features = ["std"], optional = true} +pest_derive = { version = "2.7.10", default-features = false, features = ["std", "grammar-extras"], optional = true} rand = { version = "0.8.5", default-features = false, features = ["std", "std_rng"] } regex = { version = "1.9.1", default-features = false, features = ["std", "perf"] } serde = { version = "1.0.193", default-features = false, features = ["std", "derive"] } diff --git a/src/strace/parser/peg.pest b/src/strace/parser/peg.pest index 8c70af4..9f78c45 100644 --- a/src/strace/parser/peg.pest +++ b/src/strace/parser/peg.pest @@ -1,13 +1,67 @@ -// "382944 0.000054 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f52a332e000" +// Pest grammar for strace output (some stuff only works with our strace output arguments) -syscall_line = { SOI ~ pid ~ " "+ ~ timestamp ~ " " ~ name ~ arguments ~ " = " ~ ret ~ EOI } +// Main line tokens + +syscall_line = { SOI ~ pid ~ " "+ ~ rel_ts ~ " " ~ name ~ arguments ~ " = " ~ ret_val ~ EOI } pid = { ASCII_DIGIT+ } -timestamp = { ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ } +rel_ts = { ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ } + +name = { (ASCII_ALPHA_LOWER | ASCII_DIGIT | "_")+ } + +arguments = { + "(" ~ + ( + (expression ~ (", " ~ expression)+) + | + expression? + ) ~ + ")" +} + +ret_val = { expression_int_literal ~ (" " ~ ANY*)? } + + +// Subrules + +expression = { #expr_int = expression_int | #struct = struct | #buf = buffer } + +expression_int = { + #or = expression_int_or | + #lit = expression_int_literal | + #named = expression_named_constant | + #aliases = expression_named_constant_aliases | + #bit_neg = expression_bit_negation +} +expression_int_literal = { #hex = literal_int_hex | #int = literal_int } +expression_int_or = { expression_named_constant ~ ("|" ~ expression_int)+ } +expression_named_constant = { (ASCII_ALPHA_UPPER | ASCII_DIGIT | "_")+ } +expression_named_constant_aliases = { + "[" ~ + expression_named_constant ~ + ( + " " ~ + expression_named_constant + )+ ~ + "]" +} +expression_bit_negation = { "~" ~ expression_int } -name = { (ASCII_ALPHA_LOWER ~ ASCII_DIGIT ~ "_")+ } +literal_int_hex = { "0x" ~ ASCII_HEX_DIGIT+ } +literal_int = { "-"? ~ ASCII_DIGIT+ } -arguments = { "(" ~ ANY* ~ ")" } +struct = { + "{" ~ + ( + (struct_member ~ (", " ~ struct_member)+) + | + struct_member? + ) ~ + "}" +} +struct_member = { struct_member_name ~ "=" ~ expression } +struct_member_name = { (ASCII_ALPHA_LOWER | ASCII_DIGIT | "_")+ } -ret = { ANY+ } +buffer = { "\"" ~ buffer_byte* ~ "\"" } +buffer_byte = { !"\"" ~ ANY } diff --git a/src/strace/parser/peg.rs b/src/strace/parser/peg.rs index 194f3eb..c76bf5b 100644 --- a/src/strace/parser/peg.rs +++ b/src/strace/parser/peg.rs @@ -1,8 +1,10 @@ //! PEG based strace output parser +use itertools::Itertools; +use pest::iterators::Pair; use pest::Parser as _; -use crate::strace::Syscall; +use crate::strace::{BufferType, IntegerExpression, Syscall, SyscallArg}; use super::ParseResult; @@ -11,7 +13,141 @@ use super::ParseResult; struct PegParser; pub fn parse_line(line: &str, unfinished_syscalls: &[Syscall]) -> anyhow::Result { - let res = PegParser::parse(Rule::syscall_line, line); - dbg!(&res); - todo!(); + let pair = match PegParser::parse(Rule::syscall_line, line) { + Err(_) => return Ok(ParseResult::IgnoredLine), + Ok(mut p) => p.next().unwrap(), + }; + dbg!(&pair); + Ok(ParseResult::Syscall(pair.into())) +} + +fn pair_descend(pair: Pair<'_, Rule>, levels: usize) -> Option> { + let mut pair = pair; + let mut levels = levels; + while levels > 0 { + if let Some(below_pair) = pair.into_inner().next() { + pair = below_pair; + } else { + return None; + } + levels -= 1; + } + Some(pair) +} + +fn int_literal_pair_val(pair: Pair) -> Option +where + T: From, +{ + let val: Option = match pair.as_node_tag() { + Some("hex") => pair + .as_str() + .strip_prefix("0x") + .and_then(|s| i128::from_str_radix(s, 16).ok()), + Some("int") => pair.as_str().parse().ok(), + _ => unreachable!(), + }; + val.map(|v| v.into()) +} + +fn buf_val(pair: Pair) -> Option> { + Some( + pair.into_inner() + .flat_map(|b| b.as_str().as_bytes()) + .copied() + .collect(), + ) +} + +impl From> for SyscallArg { + fn from(pair: Pair) -> Self { + match pair.as_node_tag() { + Some("expr_int") => SyscallArg::Integer { + value: pair_descend(pair, 1).unwrap().into(), + metadata: None, + }, + Some("buf") => SyscallArg::Buffer { + value: buf_val(pair).unwrap(), + type_: BufferType::Unknown, + }, + Some("struct") => SyscallArg::Struct( + pair.into_inner() + .map(|m| { + let (name, val) = m.into_inner().next_tuple().unwrap(); + ( + name.as_str().to_owned(), + pair_descend(val, 1).unwrap().into(), + ) + }) + .collect(), + ), + _ => unreachable!("{pair:?}"), + } + } +} + +impl From> for IntegerExpression { + fn from(pair: Pair) -> Self { + match pair.as_node_tag() { + Some("lit") => { + let lit_pair = pair_descend(pair, 1).unwrap(); + IntegerExpression::Literal(int_literal_pair_val(lit_pair).unwrap()) + } + Some("bit_neg") => { + IntegerExpression::BinaryNot(Box::new(pair_descend(pair, 2).unwrap().into())) + } + Some("named") => IntegerExpression::NamedConst(pair.as_str().to_owned()), + Some("aliases") => { + // Only keep the last alias + let alias = pair.into_inner().last().unwrap(); + IntegerExpression::NamedConst(alias.as_str().to_owned()) + } + Some("or") => { + let mut children = pair.into_inner(); + let mut or_elems = Vec::with_capacity(children.len()); + or_elems.push(IntegerExpression::NamedConst( + children.next().unwrap().as_str().to_owned(), + )); + or_elems.extend( + children + .map(|c| pair_descend(c, 1).unwrap().into()) + .flat_map(|e| { + // Flatten or child expressions + if let IntegerExpression::BinaryOr(es) = e { + es.into_iter() + } else { + vec![e].into_iter() + } + }), + ); + IntegerExpression::BinaryOr(or_elems) + } + _ => unreachable!("{pair:?}"), + } + } +} + +impl From> for Syscall { + fn from(pair: Pair) -> Self { + let mut subpairs = pair.into_inner(); + // Note if the grammar is correct, we should *never* panic below + let pid = subpairs.next().unwrap().as_str().parse().unwrap(); + let rel_ts = subpairs.next().unwrap().as_str().parse().unwrap(); + let name = subpairs.next().unwrap().as_str().to_owned(); + let args = subpairs + .next() + .unwrap() + .into_inner() + .map(|p| pair_descend(p, 1).unwrap().into()) + .collect(); + let ret_val_pair = pair_descend(subpairs.next().unwrap(), 2).unwrap(); + let ret_val = int_literal_pair_val(ret_val_pair).unwrap(); + Syscall { + pid, + rel_ts, + name, + args, + ret_val, + } + } }