From a59909c27d6314988cfca7a6556097086bf264eb Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Thu, 31 Aug 2023 20:19:34 +0800 Subject: [PATCH] Make unit tests compile and pass all tests This commit fixes a minor problem that prevents tests from compiling and running to success. Arch-specific register IDs (represented by ArchTag::RegId) and instruction group IDs (represented by ArchTag::InsnGroupId) are generated as newtype structs whose inner type is c_uint because their C definition are just C enums. However, in a cs_detail struct, the regs_read field is an array of u16, not an array of c_uint. So we cannot just type pun on the underlying array to get &[A::RegId] because the layout is totally different. The similar problem exists for InsnDetail::regs_write and InsnDetail::groups. This commit fixes the problem by making these function return an Iterator rather than a slice. The iterator will map the underlying array elements to actual arch-specific types when iterated. --- README.md | 24 ++- capstone-rs/examples/demo.rs | 16 +- .../fuzz_targets/fuzz_target_disasm_x86_64.rs | 8 +- capstone-rs/src/arch/evm.rs | 4 +- capstone-rs/src/arch/mod.rs | 4 +- capstone-rs/src/capstone.rs | 9 +- capstone-rs/src/instruction.rs | 176 ++++++++++-------- capstone-rs/src/lib.rs | 4 +- cstool/src/bin/cstool.rs | 16 +- 9 files changed, 150 insertions(+), 111 deletions(-) diff --git a/README.md b/README.md index bd21fb5e..d6fa129b 100644 --- a/README.md +++ b/README.md @@ -31,25 +31,33 @@ See the [`capstone-sys`](capstone-sys) page for the requirements and supported p ```rust extern crate capstone; +use capstone::arch::x86::X86ArchTag; use capstone::prelude::*; const X86_CODE: &'static [u8] = b"\x55\x48\x8b\x05\xb8\x13\x00\x00\xe9\x14\x9e\x08\x00\x45\x31\xe4"; /// Print register names -fn reg_names(cs: &Capstone, regs: &[RegId]) -> String { - let names: Vec = regs.iter().map(|&x| cs.reg_name(x).unwrap()).collect(); +fn reg_names(cs: &Capstone, regs: I) -> String +where + A: ArchTag, + I: Iterator, +{ + let names: Vec = regs.map(|x| cs.reg_name(x).unwrap()).collect(); names.join(", ") } /// Print instruction group names -fn group_names(cs: &Capstone, regs: &[InsnGroupId]) -> String { - let names: Vec = regs.iter().map(|&x| cs.group_name(x).unwrap()).collect(); +fn group_names(cs: &Capstone, regs: I) -> String +where + A: ArchTag, + I: Iterator, +{ + let names: Vec = regs.map(|x| cs.group_name(x).unwrap()).collect(); names.join(", ") } fn main() { - let cs = Capstone::new() - .x86() + let cs = Capstone::::new() .mode(arch::x86::ArchMode::Mode64) .syntax(arch::x86::ArchSyntax::Att) .detail(true) @@ -63,8 +71,8 @@ fn main() { println!(); println!("{}", i); - let detail: InsnDetail = cs.insn_detail(&i).expect("Failed to get insn detail"); - let arch_detail: ArchDetail = detail.arch_detail(); + let detail = cs.insn_detail(&i).expect("Failed to get insn detail"); + let arch_detail = detail.arch_detail(); let ops = arch_detail.operands(); let output: &[(&str, String)] = &[ diff --git a/capstone-rs/examples/demo.rs b/capstone-rs/examples/demo.rs index c06bfb1e..e1045329 100644 --- a/capstone-rs/examples/demo.rs +++ b/capstone-rs/examples/demo.rs @@ -11,15 +11,23 @@ const X86_CODE: &[u8] = b"\x55\x48\x8b\x05\xb8\x13\x00\x00\xe9\x14\x9e\x08\x00\x #[cfg(feature = "full")] /// Print register names -fn reg_names(cs: &Capstone, regs: &[A::RegId]) -> String { - let names: Vec = regs.iter().map(|&x| cs.reg_name(x).unwrap()).collect(); +fn reg_names(cs: &Capstone, regs: I) -> String +where + A: ArchTag, + I: Iterator, +{ + let names: Vec = regs.map(|x| cs.reg_name(x).unwrap()).collect(); names.join(", ") } #[cfg(feature = "full")] /// Print instruction group names -fn group_names(cs: &Capstone, regs: &[A::InsnGroupId]) -> String { - let names: Vec = regs.iter().map(|&x| cs.group_name(x).unwrap()).collect(); +fn group_names(cs: &Capstone, regs: I) -> String +where + A: ArchTag, + I: Iterator, +{ + let names: Vec = regs.map(|x| cs.group_name(x).unwrap()).collect(); names.join(", ") } diff --git a/capstone-rs/fuzz/fuzz_targets/fuzz_target_disasm_x86_64.rs b/capstone-rs/fuzz/fuzz_targets/fuzz_target_disasm_x86_64.rs index 437cf411..5eae85c2 100644 --- a/capstone-rs/fuzz/fuzz_targets/fuzz_target_disasm_x86_64.rs +++ b/capstone-rs/fuzz/fuzz_targets/fuzz_target_disasm_x86_64.rs @@ -3,18 +3,18 @@ extern crate libfuzzer_sys; extern crate capstone; +use capstone::arch::x86::X86ArchTag; use capstone::prelude::*; fuzz_target!(|data: &[u8]| { - let mut cs = Capstone::new() - .x86() + let mut cs = Capstone::::new() .mode(arch::x86::ArchMode::Mode64) .detail(true) .build() .unwrap(); for i in cs.disasm_all(data, 0x1000).unwrap().iter() { - let detail: InsnDetail = cs.insn_detail(&i).unwrap(); - let arch_detail: ArchDetail = detail.arch_detail(); + let detail = cs.insn_detail(&i).unwrap(); + let arch_detail = detail.arch_detail(); arch_detail.operands().iter().for_each(drop); detail.regs_read().iter().for_each(drop); detail.regs_write().iter().for_each(drop); diff --git a/capstone-rs/src/arch/evm.rs b/capstone-rs/src/arch/evm.rs index 9a5b8f56..7b1d3a9d 100644 --- a/capstone-rs/src/arch/evm.rs +++ b/capstone-rs/src/arch/evm.rs @@ -12,7 +12,7 @@ pub use capstone_sys::evm_insn as EvmInsn; pub use crate::arch::arch_builder::evm::*; use crate::arch::{ArchTag, DetailsArchInsn}; use crate::arch::internal::ArchTagSealed; -use crate::{Arch, InsnDetail}; +use crate::{Arch, InsnDetail, RegIdInt}; pub struct EvmArchTag; @@ -25,7 +25,7 @@ impl ArchTag for EvmArchTag { type ExtraMode = ArchExtraMode; type Syntax = ArchSyntax; - type RegId = u32; + type RegId = RegIdInt; type InsnId = EvmInsn; type InsnGroupId = EvmInsnGroup; diff --git a/capstone-rs/src/arch/mod.rs b/capstone-rs/src/arch/mod.rs index 67dd03a3..c492ca9a 100644 --- a/capstone-rs/src/arch/mod.rs +++ b/capstone-rs/src/arch/mod.rs @@ -361,9 +361,9 @@ pub trait ArchTag: internal::ArchTagSealed + 'static + Sized { type ExtraMode: Copy + Into; type Syntax: Copy + Into; - type RegId: Copy + Into; + type RegId: Copy + From + Into; type InsnId: Copy + Into; - type InsnGroupId: Copy + Into; + type InsnGroupId: Copy + From + Into; type InsnDetail<'a>: DetailsArchInsn + for<'i> From<&'i InsnDetail<'a, Self>>; diff --git a/capstone-rs/src/capstone.rs b/capstone-rs/src/capstone.rs index 4f741ad6..21beb738 100644 --- a/capstone-rs/src/capstone.rs +++ b/capstone-rs/src/capstone.rs @@ -121,8 +121,9 @@ impl Capstone { /// This is the recommended interface to `Capstone`. /// /// ``` + /// use capstone::arch::x86::X86ArchTag; /// use capstone::prelude::*; - /// let cs = Capstone::new().x86().mode(arch::x86::ArchMode::Mode32).build(); + /// let cs = Capstone::::new().mode(arch::x86::ArchMode::Mode32).build(); /// ``` #[allow(clippy::new_ret_no_self)] pub fn new() -> A::Builder { @@ -134,7 +135,8 @@ impl Capstone { /// /// ``` /// use capstone::{Arch, Capstone, NO_EXTRA_MODE, Mode}; - /// let cs = Capstone::new_raw(Arch::X86, Mode::Mode64, NO_EXTRA_MODE, None); + /// use capstone::arch::DynamicArchTag; + /// let cs = Capstone::::new_raw(Arch::X86, Mode::Mode64, NO_EXTRA_MODE, None); /// assert!(cs.is_ok()); /// ``` pub fn new_raw>( @@ -191,7 +193,8 @@ impl Capstone { /// /// ``` /// # use capstone::prelude::*; - /// # let cs = Capstone::new().x86().mode(arch::x86::ArchMode::Mode32).build().unwrap(); + /// # use capstone::arch::x86::X86ArchTag; + /// # let cs = Capstone::::new().mode(arch::x86::ArchMode::Mode32).build().unwrap(); /// cs.disasm_all(b"\x90", 0x1000).unwrap(); /// ``` pub fn disasm_all<'a>(&'a self, code: &[u8], addr: u64) -> CsResult> { diff --git a/capstone-rs/src/instruction.rs b/capstone-rs/src/instruction.rs index fbd7ed14..689cd66f 100644 --- a/capstone-rs/src/instruction.rs +++ b/capstone-rs/src/instruction.rs @@ -8,7 +8,7 @@ use core::str; use capstone_sys::*; -use crate::arch::{ArchTag, DetailsArchInsn}; +use crate::arch::ArchTag; use crate::constants::Arch; use crate::ffi::str_from_cstr_ptr; @@ -17,10 +17,10 @@ use crate::ffi::str_from_cstr_ptr; /// /// To access inner [`&[Insn]`](Insn), use [`.as_ref()`](AsRef::as_ref). /// ``` -/// # use capstone::Instructions; +/// # use capstone::arch::x86::X86ArchTag; /// # use capstone::prelude::*; -/// # let cs = Capstone::new().x86().mode(arch::x86::ArchMode::Mode32).build().unwrap(); -/// let insns: Instructions = cs.disasm_all(b"\x55\x48\x8b\x05", 0x1000).unwrap(); +/// # let cs = Capstone::::new().mode(arch::x86::ArchMode::Mode32).build().unwrap(); +/// let insns = cs.disasm_all(b"\x55\x48\x8b\x05", 0x1000).unwrap(); /// for insn in insns.as_ref() { /// println!("{}", insn); /// } @@ -48,8 +48,8 @@ pub type InsnIdInt = u32; #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct InsnId(pub InsnIdInt); -macro_rules! define_insn_id_from_arch_insn { - ( $( $arch:ident => $arch_insn:ident ),+ $(,)? ) => { +macro_rules! define_arch_insn_conversions { + ( $( [ $arch:ident, $arch_insn:ident ] ),+ $(,)? ) => { $( impl From<$crate::arch::$arch::$arch_insn> for InsnId { fn from(arch_insn: $crate::arch::$arch::$arch_insn) -> Self { @@ -60,20 +60,20 @@ macro_rules! define_insn_id_from_arch_insn { }; } -define_insn_id_from_arch_insn![ - arm => ArmInsn, - arm64 => Arm64Insn, - evm => EvmInsn, - m68k => M68kInsn, - m680x => M680xInsn, - mips => MipsInsn, - ppc => PpcInsn, - riscv => RiscVInsn, - sparc => SparcInsn, - sysz => SyszInsn, - tms320c64x => Tms320c64xInsn, - x86 => X86Insn, - xcore => XcoreInsn, +define_arch_insn_conversions![ + [arm, ArmInsn], + [arm64, Arm64Insn], + [evm, EvmInsn], + [m68k, M68kInsn], + [m680x, M680xInsn], + [mips, MipsInsn], + [ppc, PpcInsn], + [riscv, RiscVInsn], + [sparc, SparcInsn], + [sysz, SyszInsn], + [tms320c64x, Tms320c64xInsn], + [x86, X86Insn], + [xcore, XcoreInsn], ]; /// Integer type used in `InsnGroupId` @@ -96,32 +96,38 @@ impl From for InsnGroupId { } } -macro_rules! define_insn_grp_id_from_arch_grp_id { - ( $( $arch:ident => $arch_insn_grp:ident ),+ $(,)? ) => { +macro_rules! define_arch_grp_id_conversions { + ( $( [ $arch:ident, $arch_insn_grp:ident ] ),+ $(,)? ) => { $( impl From<$crate::arch::$arch::$arch_insn_grp> for InsnGroupId { fn from(arch_insn_grp: $crate::arch::$arch::$arch_insn_grp) -> Self { Self(arch_insn_grp.0 as InsnGroupIdInt) } } + + impl From for $crate::arch::$arch::$arch_insn_grp { + fn from(insn_grp: InsnGroupId) -> Self { + Self(insn_grp.0 as _) + } + } )+ }; } -define_insn_grp_id_from_arch_grp_id![ - arm => ArmInsnGroup, - arm64 => Arm64InsnGroup, - evm => EvmInsnGroup, - m68k => M68kInsnGroup, - m680x => M680xInsnGroup, - mips => MipsInsnGroup, - ppc => PpcInsnGroup, - riscv => RiscVInsnGroup, - sparc => SparcInsnGroup, - sysz => SyszInsnGroup, - tms320c64x => Tms320c64xInsnGroup, - x86 => X86InsnGroup, - xcore => XcoreInsnGroup, +define_arch_grp_id_conversions![ + [arm, ArmInsnGroup], + [arm64, Arm64InsnGroup], + [evm, EvmInsnGroup], + [m68k, M68kInsnGroup], + [m680x, M680xInsnGroup], + [mips, MipsInsnGroup], + [ppc, PpcInsnGroup], + [riscv, RiscVInsnGroup], + [sparc, SparcInsnGroup], + [sysz, SyszInsnGroup], + [tms320c64x, Tms320c64xInsnGroup], + [x86, X86InsnGroup], + [xcore, XcoreInsnGroup], ]; pub use capstone_sys::cs_group_type as InsnGroupType; @@ -141,37 +147,49 @@ impl RegId { pub const INVALID_REG: Self = Self(0); } -impl From for RegId { - fn from(v: u32) -> RegId { - RegId(v.try_into().ok().unwrap_or(Self::INVALID_REG.0)) +impl From for RegId { + fn from(value: RegIdInt) -> Self { + Self(value) } } -macro_rules! define_reg_id_from_arch_reg { - ( $( $arch:ident => $arch_reg:ident ),+ $(,)? ) => { +impl From for RegIdInt { + fn from(value: RegId) -> Self { + value.0 + } +} + +macro_rules! define_arch_reg_conversions { + ( $( [ $arch:ident, $arch_reg:ident ] ),+ $(,)? ) => { $( impl From<$crate::arch::$arch::$arch_reg> for RegId { fn from(arch_reg: $crate::arch::$arch::$arch_reg) -> Self { - Self(arch_reg.0 as RegIdInt) + Self(arch_reg.0 as _) + } + } + + impl From for $crate::arch::$arch::$arch_reg { + fn from(reg_id: RegId) -> Self { + Self(reg_id.0 as _) } } )+ }; } -define_reg_id_from_arch_reg![ - arm => ArmReg, - arm64 => Arm64Reg, - m68k => M68kReg, - m680x => M680xReg, - mips => MipsReg, - ppc => PpcReg, - riscv => RiscVReg, - sparc => SparcReg, - sysz => SyszReg, - tms320c64x => Tms320c64xReg, - x86 => X86Reg, - xcore => XcoreReg, +define_arch_reg_conversions![ + [arm, ArmReg], + [arm64, Arm64Reg], + [m68k, M68kReg], + [m680x, M680xReg], + [mips, MipsReg], + [ppc, PpcReg], + [riscv, RiscVReg], + [sparc, SparcReg], + [sysz, SyszReg], + [tms320c64x, Tms320c64xReg], + [x86, X86Reg], + [xcore, XcoreReg], ]; /// Represents how the register is accessed. @@ -285,10 +303,9 @@ pub struct Insn<'a, A: ArchTag> { /// [`Capstone::insn_detail()`](crate::Capstone::insn_detail). /// /// ``` -/// # use capstone::Instructions; +/// # use capstone::arch::x86::X86ArchTag; /// # use capstone::prelude::*; -/// let cs = Capstone::new() -/// .x86() +/// let cs = Capstone::::new() /// .mode(arch::x86::ArchMode::Mode32) /// .detail(true) // needed to enable detail /// .build() @@ -296,8 +313,8 @@ pub struct Insn<'a, A: ArchTag> { /// let insns = cs.disasm_all(b"\x90", 0x1000).unwrap(); /// for insn in insns.as_ref() { /// println!("{}", insn); -/// let insn_detail: InsnDetail = cs.insn_detail(insn).unwrap(); -/// println!(" {:?}", insn_detail.groups()); +/// let insn_detail = cs.insn_detail(insn).unwrap(); +/// println!(" {:?}", insn_detail.groups().collect::>()); /// } /// ``` /// @@ -492,33 +509,28 @@ pub struct InsnGroupIter<'a>(slice::Iter<'a, InsnGroupIdInt>); impl<'a, A: ArchTag> InsnDetail<'a, A> { #[cfg(feature = "full")] /// Returns the implicit read registers - pub fn regs_read(&self) -> &[A::RegId] { - unsafe { - &*(&self.0.regs_read[..self.0.regs_read_count as usize] as *const [RegIdInt] - as *const [A::RegId]) - } + pub fn regs_read(&self) -> impl Iterator + '_ { + self.0.regs_read[..self.0.regs_read_count as usize] + .iter() + .map(|raw_reg| A::RegId::from(RegId(*raw_reg))) } #[cfg(feature = "full")] /// Returns the implicit write registers - pub fn regs_write(&self) -> &[A::RegId] { - unsafe { - &*(&self.0.regs_write[..self.0.regs_write_count as usize] as *const [RegIdInt] - as *const [A::RegId]) - } + pub fn regs_write(&self) -> impl Iterator + '_ { + // TODO: change the return type. + self.0.regs_write[..self.0.regs_write_count as usize] + .iter() + .map(|raw_reg| A::RegId::from(RegId(*raw_reg))) } #[cfg(feature = "full")] /// Returns the groups to which this instruction belongs - pub fn groups(&self) -> &[A::InsnGroupId] { - unsafe { - &*(&self.0.groups[..self.0.groups_count as usize] as *const [InsnGroupIdInt] - as *const [A::InsnGroupId]) - } - } - - pub fn operands(&self) -> Vec< as DetailsArchInsn>::Operand> { - todo!() + pub fn groups(&self) -> impl Iterator + '_ { + // TODO: change the return type. + self.0.groups[..self.0.groups_count as usize] + .iter() + .map(|raw_grp| A::InsnGroupId::from(InsnGroupId(*raw_grp))) } /// Architecture-specific detail @@ -535,9 +547,9 @@ where { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { fmt.debug_struct("Detail") - .field("regs_read", &self.regs_read()) - .field("regs_write", &self.regs_write()) - .field("groups", &self.groups()) + .field("regs_read", &self.regs_read().collect::>()) + .field("regs_write", &self.regs_write().collect::>()) + .field("groups", &self.groups().collect::>()) .finish() } } diff --git a/capstone-rs/src/lib.rs b/capstone-rs/src/lib.rs index 556279de..07a7cb41 100644 --- a/capstone-rs/src/lib.rs +++ b/capstone-rs/src/lib.rs @@ -43,8 +43,8 @@ pub use crate::instruction::*; /// ``` pub mod prelude { pub use crate::arch::{ - self, ArchInsnDetail, BuildsCapstone, BuildsCapstoneEndian, BuildsCapstoneExtraMode, - BuildsCapstoneSyntax, DetailsArchInsn, + self, ArchInsnDetail, ArchTag, BuildsCapstone, BuildsCapstoneEndian, + BuildsCapstoneExtraMode, BuildsCapstoneSyntax, DetailsArchInsn, }; pub use crate::{ Capstone, CsResult, InsnDetail, InsnGroupId, InsnGroupIdInt, InsnId, InsnIdInt, RegId, diff --git a/cstool/src/bin/cstool.rs b/cstool/src/bin/cstool.rs index f7130e6b..eb0cd176 100644 --- a/cstool/src/bin/cstool.rs +++ b/cstool/src/bin/cstool.rs @@ -35,14 +35,22 @@ where } /// Print register names -fn reg_names(cs: &Capstone, regs: &[A::RegId]) -> String { - let names: Vec = regs.iter().map(|&x| cs.reg_name(x).unwrap()).collect(); +fn reg_names(cs: &Capstone, regs: I) -> String +where + A: ArchTag, + I: Iterator, +{ + let names: Vec = regs.map(|x| cs.reg_name(x).unwrap()).collect(); names.join(", ") } /// Print instruction group names -fn group_names(cs: &Capstone, regs: &[A::InsnGroupId]) -> String { - let names: Vec = regs.iter().map(|&x| cs.group_name(x).unwrap()).collect(); +fn group_names(cs: &Capstone, regs: I) -> String +where + A: ArchTag, + I: Iterator, +{ + let names: Vec = regs.map(|x| cs.group_name(x).unwrap()).collect(); names.join(", ") }