diff --git a/capstone-rs/Cargo.toml b/capstone-rs/Cargo.toml index 37d4bc23..7cdd58ae 100644 --- a/capstone-rs/Cargo.toml +++ b/capstone-rs/Cargo.toml @@ -22,6 +22,7 @@ libc = { version = "0.2", default-features = false } macho = "0.*" criterion = "0.3" rayon = "1.1" +object = "0.26.2" [[bench]] name = "my_benchmark" diff --git a/capstone-rs/examples/recursive.rs b/capstone-rs/examples/recursive.rs new file mode 100644 index 00000000..4f61d813 --- /dev/null +++ b/capstone-rs/examples/recursive.rs @@ -0,0 +1,100 @@ +//! This example shows how to do recursive disassemble +//! The example is written specificly for X86 ELF binary format with PIE enabled +//! If PIE is disabled `gcc -no-pie ...` offset needs to be properly calculated +//! +use std::collections::{HashSet, VecDeque}; +use std::env; +use std::fs; +use std::process; + +use object::{Object, ObjectSection, SectionKind}; + +use capstone; +use capstone::prelude::*; +use capstone::InsnGroupType; + +fn main() { + let args: Vec<_> = env::args().collect(); + if args.len() != 2 { + eprintln!("Usage: {} ", args[0]); + process::exit(-1); + } + + let buf = fs::read(&args[1]).expect("cannot read file"); + + let obj = object::File::parse(&*buf).expect("cannot parse file"); + + let mut addr_queue: VecDeque = VecDeque::new(); + let mut addr_seen: HashSet = HashSet::new(); + + for section in obj.sections() { + if section.kind() == SectionKind::Text { + println!("{:x?} ", section); + addr_queue.push_back(section.address()); + } + } + + let cs = Capstone::new() + .x86() + .mode(arch::x86::ArchMode::Mode64) + .detail(true) + .build() + .expect("failed to create capstone handle"); + + let mut disasm = cs.get_disasm_iter(); + + while let Some(addr) = addr_queue.pop_front() { + if addr_seen.contains(&addr) { + continue; + } + addr_seen.insert(addr); + + println!("---> addr: {:#02x?}", addr); + + let offset = addr as usize; + let mut cur_insn = disasm.disasm_iter(&buf, offset, addr); + while let Ok(insn) = cur_insn { + let insn_detail: InsnDetail = cs.insn_detail(&insn).unwrap(); + if is_invalid_insn(&insn_detail) { + break; + } + println!("{}", insn); + if is_cflow_insn(&insn_detail) { + break; + } + + // add logic here to add more targets to the addr_queue + // ... + + cur_insn = disasm.disasm_iter_continue(&buf); + } + } +} + +fn is_invalid_insn(insn_detail: &InsnDetail) -> bool { + for insn_grp in insn_detail.groups() { + if insn_grp.0 as u32 == InsnGroupType::CS_GRP_INVALID { + return true; + } + } + false +} + +fn is_cflow_insn(insn_detail: &InsnDetail) -> bool { + for insn_grp in insn_detail.groups() { + if is_cflow_group(insn_grp) { + return true; + } + } + false +} + +fn is_cflow_group(insn_group: &InsnGroupId) -> bool { + match insn_group.0 as u32 { + InsnGroupType::CS_GRP_JUMP + | InsnGroupType::CS_GRP_CALL + | InsnGroupType::CS_GRP_RET + | InsnGroupType::CS_GRP_IRET => true, + _ => false, + } +} diff --git a/capstone-rs/src/capstone.rs b/capstone-rs/src/capstone.rs index 19af21ee..a7b852a5 100644 --- a/capstone-rs/src/capstone.rs +++ b/capstone-rs/src/capstone.rs @@ -165,6 +165,22 @@ impl Capstone { } } + /// Creates an instance of DisasmIter structure + /// + pub fn get_disasm_iter<'a>(&'a self) -> DisasmIter<'a> { + let insn = unsafe { cs_malloc(self.csh()) }; + if insn.is_null() { + panic!("cs_malloc() failed"); + } + DisasmIter { + insn: insn, + csh: self.csh, + _covariant: PhantomData, + offset: 0, + addr: 0, + } + } + /// Disassemble all instructions in buffer /// /// ``` @@ -423,3 +439,64 @@ impl Drop for Capstone { unsafe { cs_close(&mut self.csh()) }; } } + +/// Structure to handle iterative disassembly +/// +/// Create with a capstone instance `get_disasm_iter()` +/// +pub struct DisasmIter<'a> { + insn: *mut cs_insn, // space for current instruction to be processed + csh: *mut c_void, // reference to the the capstone handle required by disasm_iter + offset: usize, + addr: u64, + _covariant: PhantomData<&'a ()>, // used to make sure DIasmIter lifetime doesn't exceed Capstone's lifetime +} + +impl<'a> Drop for DisasmIter<'a> { + fn drop(&mut self) { + unsafe { cs_free(self.insn, 1) }; + } +} + +impl<'a> DisasmIter<'a> { + /// Used to continue to the next instruction without jumping + /// + /// usage shown in examples/recursive.rs + pub fn disasm_iter_continue(&mut self, code: &[u8]) -> CsResult { + self.disasm_iter(code, self.offset, self.addr) + } + + /// Used to start the iterative disassembly + /// + /// usage shown in examples/recursive.rs + pub fn disasm_iter(&mut self, code: &[u8], offset: usize, addr: u64) -> CsResult { + let code_len = code.len(); + let code_ptr = &mut code[offset..].as_ptr(); + let mut count = code_len - offset; + let mut local_addr = addr; + let ret = unsafe { + cs_disasm_iter( + self.csh as csh, // capstone handle + code_ptr, // double pointer to code to disassemble; automatically incremented + &mut count, // number of bytes left to disassemble; automatically decremented + &mut local_addr, // automatically incremented address + self.insn, // pointer to cs_insn object + ) + }; + if ret { + self.offset = code_len - count; + self.addr = local_addr; + let insn = unsafe { Insn::from_raw(self.insn) }; + Ok(insn) + } else { + Err(Error::CustomError("not disasm")) + } + } + + pub fn offset(&self) -> usize { + self.offset + } + pub fn addr(&self) -> u64 { + self.addr + } +}