diff --git a/.gitignore b/.gitignore index 185ff4f756..924a93e807 100644 --- a/.gitignore +++ b/.gitignore @@ -2,10 +2,12 @@ target /doc tex/*/out *.dot +*.out *.rs.bk .vscode *.mm_profdata perf.data perf.data.old flamegraph.svg +tests/extern-so/libtestlib.so .auto-* diff --git a/Cargo.lock b/Cargo.lock index f660ed7762..8f03cdbdf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "abort_on_panic" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955f37ac58af2416bac687c8ab66a4ccba282229bd7422a28d2281a5e66a6116" + [[package]] name = "addr2line" version = "0.17.0" @@ -327,6 +333,36 @@ version = "0.2.112" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" +[[package]] +name = "libffi" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e08093a2ddeee94bd0c830a53d895ff91f1f3bb0f9b3c8c6b00739cdf76bc1d" +dependencies = [ + "abort_on_panic", + "libc", + "libffi-sys", +] + +[[package]] +name = "libffi-sys" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab4106b7f09d7b87d021334d5618fac1dfcfb824d4c5fe111ff0074dfd242e15" +dependencies = [ + "cc", +] + +[[package]] +name = "libloading" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "lock_api" version = "0.4.5" @@ -401,6 +437,8 @@ dependencies = [ "getrandom", "lazy_static", "libc", + "libffi", + "libloading", "log", "measureme", "rand", diff --git a/Cargo.toml b/Cargo.toml index 208b3a7644..47237a2bd7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,8 @@ doctest = false # and no doc tests [dependencies] getrandom = { version = "0.2", features = ["std"] } env_logger = "0.9" +libffi = "3.0.0" +libloading = "0.7" log = "0.4" shell-escape = "0.1.4" rand = "0.8" diff --git a/README.md b/README.md index 5fbf89c86b..387b7a87e9 100644 --- a/README.md +++ b/README.md @@ -285,6 +285,14 @@ environment variable. We first document the most relevant and most commonly used `TERM` environment variable is excluded by default to [speed up the test harness](https://github.com/rust-lang/miri/issues/1702). This has no effect unless `-Zmiri-disable-isolation` is also set. +* `-Zmiri-extern-so-file=` is an experimental flag for providing support + for FFI calls. + **WARNING**: If an invalid/incorrect `.so` file is specified, this can cause undefined behaviour in Miri itself! + And of course, Miri cannot do any checks on the actions taken by the external code. + This is **work in progress**; currently, only integer arguments and return values are + supported (and no, pointer/integer casts to work around this limitation will not work; + they will fail horribly). + Follow [the discussion on supporting other types](https://github.com/rust-lang/miri/issues/2365). * `-Zmiri-env-forward=` forwards the `var` environment variable to the interpreted program. Can be used multiple times to forward several variables. This takes precedence over `-Zmiri-env-exclude`: if a variable is both forwarded and exluced, it *will* get forwarded. This diff --git a/src/bin/miri.rs b/src/bin/miri.rs index 489eb95990..eab884e38c 100644 --- a/src/bin/miri.rs +++ b/src/bin/miri.rs @@ -510,6 +510,19 @@ fn main() { "full" => BacktraceStyle::Full, _ => panic!("-Zmiri-backtrace may only be 0, 1, or full"), }; + } else if let Some(param) = arg.strip_prefix("-Zmiri-extern-so-file=") { + let filename = param.to_string(); + if std::path::Path::new(&filename).exists() { + if let Some(other_filename) = miri_config.external_so_file { + panic!( + "-Zmiri-extern-so-file external SO file is already set to {}", + other_filename.display() + ); + } + miri_config.external_so_file = Some(filename.into()); + } else { + panic!("-Zmiri-extern-so-file path {} does not exist", filename); + } } else { // Forward to rustc. rustc_args.push(arg); diff --git a/src/eval.rs b/src/eval.rs index 53264bd465..ff56f2d20f 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -4,6 +4,7 @@ use std::collections::HashSet; use std::ffi::OsStr; use std::iter; use std::panic::{self, AssertUnwindSafe}; +use std::path::PathBuf; use std::thread; use log::info; @@ -125,6 +126,9 @@ pub struct MiriConfig { pub report_progress: Option, /// Whether Stacked Borrows retagging should recurse into fields of datatypes. pub retag_fields: bool, + /// The location of a shared object file to load when calling external functions + /// TODO! consider allowing users to specify paths to multiple SO files, or to a directory + pub external_so_file: Option, } impl Default for MiriConfig { @@ -155,6 +159,7 @@ impl Default for MiriConfig { preemption_rate: 0.01, // 1% report_progress: None, retag_fields: false, + external_so_file: None, } } } diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 99fc086a22..1c2b021809 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -1,13 +1,13 @@ use std::cell::RefCell; use std::cmp::max; -use std::collections::hash_map::Entry; +use std::collections::{hash_map::Entry, BTreeMap}; use log::trace; use rand::Rng; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_span::Span; -use rustc_target::abi::{HasDataLayout, Size}; +use rustc_target::abi::{Align, HasDataLayout, Size}; use crate::*; @@ -26,9 +26,9 @@ pub type GlobalState = RefCell; #[derive(Clone, Debug)] pub struct GlobalStateInner { - /// This is used as a map between the address of each allocation and its `AllocId`. - /// It is always sorted - int_to_ptr_map: Vec<(u64, AllocId)>, + /// This is a map between the address of each allocation and its `AllocId`. + /// Since it's a `BTreeMap`, it is always sorted, and provides efficient insertion. + int_to_ptr_map: BTreeMap, /// The base address for each allocation. We cannot put that into /// `AllocExtra` because function pointers also have a base address, and /// they do not have an `AllocExtra`. @@ -46,11 +46,17 @@ pub struct GlobalStateInner { impl GlobalStateInner { pub fn new(config: &MiriConfig) -> Self { + // If we're in FFI mode, then the `next_base_addr` is only used to assign fake addresses + // to allocations that don't have associated arrays of bytes. + // CURRENT HACK: + // We start at 1 to avoid overlap with existing/future real memory the program has + // pointers to. + let next_base_addr = if config.external_so_file.is_some() { 1 } else { STACK_ADDR }; GlobalStateInner { - int_to_ptr_map: Vec::default(), + int_to_ptr_map: BTreeMap::default(), base_addr: FxHashMap::default(), exposed: FxHashSet::default(), - next_base_addr: STACK_ADDR, + next_base_addr, provenance_mode: config.provenance_mode, } } @@ -63,22 +69,26 @@ impl<'mir, 'tcx> GlobalStateInner { let global_state = ecx.machine.intptrcast.borrow(); assert!(global_state.provenance_mode != ProvenanceMode::Strict); - let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr); - // Determine the in-bounds provenance for this pointer. // (This is only called on an actual access, so in-bounds is the only possible kind of provenance.) - let alloc_id = match pos { - Ok(pos) => Some(global_state.int_to_ptr_map[pos].1), - Err(0) => None, - Err(pos) => { - // This is the largest of the adresses smaller than `int`, - // i.e. the greatest lower bound (glb) - let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1]; - // This never overflows because `addr >= glb` - let offset = addr - glb; - // If the offset exceeds the size of the allocation, don't use this `alloc_id`. - let size = ecx.get_alloc_info(alloc_id).0; - if offset <= size.bytes() { Some(alloc_id) } else { None } + let alloc_id = match global_state.int_to_ptr_map.get(&addr) { + Some(&id) => Some(id), + None => { + // If the address is not in the map, we check the position it should be inserted at. + // This returns the max key in the map less than `addr`. + match global_state.int_to_ptr_map.range(..addr).next_back() { + // Should be inserted at the beginning. + None => None, + // This is the largest of the adresses smaller than `int`, + // i.e. the greatest lower bound (glb). + Some((glb, &alloc_id)) => { + // This never overflows because `addr >= glb` + let offset = addr - glb; + // If the offset exceeds the size of the allocation, don't use this `alloc_id`. + let size = ecx.get_alloc_info(alloc_id).0; + if offset <= size.bytes() { Some(alloc_id) } else { None } + } + } } }?; @@ -156,8 +166,72 @@ impl<'mir, 'tcx> GlobalStateInner { Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) } + // Create a fake address for a new allocation, of a particular size and alignment. + // Ensure this address doesn't overlap with existing or future-assigned memory. + fn get_next_fake_addr( + ecx: &MiriEvalContext<'mir, 'tcx>, + align: Align, + size: Size, + next_base_addr: u64, + ) -> (u64, u64) { + // This allocation does not have a base address yet, pick one. + // Leave some space to the previous allocation, to give it some chance to be less aligned. + // It also doesn't correspond to a real array of bytes. + // HACK: we're not going to actually have pointers in the program that correspond to + // the really low addresses, so let's use these as placeholders for these allocations. + // This makes sure we won't overlap with any existing (real) addresses. + // An alternate hack, which we had before, was to create and leak a Box: + // `let new_addr = Box::leak(Box::new(0u128)) as *const u128 as u64;` + let slack = { + let mut rng = ecx.machine.rng.borrow_mut(); + // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. + rng.gen_range(0..16) + }; + // From next_base_addr + slack, round up to adjust for alignment. + let base_addr = next_base_addr.checked_add(slack).unwrap(); + let base_addr = Self::align_addr(base_addr, align.bytes()); + + // Remember next base address. If this allocation is zero-sized, leave a gap + // of at least 1 to avoid two allocations having the same base address. + // (The logic in `alloc_id_from_addr` assumes unique addresses, and different + // function/vtable pointers need to be distinguishable!) + let next_base_addr = base_addr.checked_add(max(size.bytes(), 1)).unwrap(); + (base_addr, next_base_addr) + } + fn alloc_base_addr(ecx: &MiriEvalContext<'mir, 'tcx>, alloc_id: AllocId) -> u64 { - let mut global_state = ecx.machine.intptrcast.borrow_mut(); + let in_ffi_mode = ecx.machine.external_so_lib.is_some(); + // With our hack, base_addr should always be fully aligned + let mut global_state = match ecx.machine.intptrcast.try_borrow_mut() { + Ok(gstate) => gstate, + Err(_) => { + if in_ffi_mode { + // We're recursing! + let (size, align, _kind) = ecx.get_alloc_info(alloc_id); + let new_addr = unsafe { + // Can't `borrow_mut` to get the global state, so just refer to it + // via pointer instead. + // This is unsafe. + let next_base_addr = (*ecx.machine.intptrcast.as_ptr()).next_base_addr; + let (new_addr, next_base_addr) = + Self::get_next_fake_addr(ecx, align, size, next_base_addr); + (*ecx.machine.intptrcast.as_ptr()).base_addr.insert(alloc_id, new_addr); + (*ecx.machine.intptrcast.as_ptr()) + .int_to_ptr_map + .insert(new_addr, alloc_id); + (*ecx.machine.intptrcast.as_ptr()).next_base_addr = next_base_addr; + new_addr + }; + trace!( + "Recursive case: Assigning base address {:#x} to allocation {:?}", + new_addr, + alloc_id, + ); + return new_addr; + } + panic!("Can't mutably borrow the `intptrcast` global state!"); + } + }; let global_state = &mut *global_state; match global_state.base_addr.entry(alloc_id) { @@ -167,34 +241,29 @@ impl<'mir, 'tcx> GlobalStateInner { // it became dangling. Hence we allow dead allocations. let (size, align, _kind) = ecx.get_alloc_info(alloc_id); - // This allocation does not have a base address yet, pick one. - // Leave some space to the previous allocation, to give it some chance to be less aligned. - let slack = { - let mut rng = ecx.machine.rng.borrow_mut(); - // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. - rng.gen_range(0..16) + // Short circuit -- only call `ecx.get_alloc_base_addr` if we're `in_ffi_mode`. + let base_addr = if in_ffi_mode && let Ok(addr) = ecx.get_alloc_base_addr(alloc_id) { + assert!(addr.bytes() % 16 == 0); + addr.bytes() + } else { + let (new_addr, next_base_addr) = Self::get_next_fake_addr(ecx, align, size, global_state.next_base_addr); //Box::leak(Box::new(0u128)) as *const u128 as u64; + global_state.next_base_addr = next_base_addr; + new_addr }; - // From next_base_addr + slack, round up to adjust for alignment. - let base_addr = global_state.next_base_addr.checked_add(slack).unwrap(); - let base_addr = Self::align_addr(base_addr, align.bytes()); + + // This allocation does not have a base address yet, assign its bytes base. entry.insert(base_addr); trace!( - "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", + "Assigning base address {:#x} to allocation {:?} (size: {}, align: {})", base_addr, alloc_id, size.bytes(), align.bytes(), - slack, ); - // Remember next base address. If this allocation is zero-sized, leave a gap - // of at least 1 to avoid two allocations having the same base address. - // (The logic in `alloc_id_from_addr` assumes unique addresses, and different - // function/vtable pointers need to be distinguishable!) - global_state.next_base_addr = base_addr.checked_add(max(size.bytes(), 1)).unwrap(); - // Given that `next_base_addr` increases in each allocation, pushing the - // corresponding tuple keeps `int_to_ptr_map` sorted - global_state.int_to_ptr_map.push((base_addr, alloc_id)); + // Map has no duplicates so no need to remove copies. + // Map is always sorted. + global_state.int_to_ptr_map.insert(base_addr, alloc_id); base_addr } diff --git a/src/machine.rs b/src/machine.rs index 2c9bfe803a..d2e2930c78 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -356,6 +356,9 @@ pub struct Evaluator<'mir, 'tcx> { pub(crate) report_progress: Option, /// The number of blocks that passed since the last progress report. pub(crate) since_progress_report: u32, + + /// Handle of the optional C shared object file + pub external_so_lib: Option<(libloading::Library, std::path::PathBuf)>, } impl<'mir, 'tcx> Evaluator<'mir, 'tcx> { @@ -410,6 +413,18 @@ impl<'mir, 'tcx> Evaluator<'mir, 'tcx> { preemption_rate: config.preemption_rate, report_progress: config.report_progress, since_progress_report: 0, + external_so_lib: config.external_so_file.as_ref().map(|lib_file_path| { + // Note: it is the user's responsibility to provide a correct SO file. + // WATCH OUT: If an invalid/incorrect SO file is specified, this can cause + // undefined behaviour in Miri itself! + ( + unsafe { + libloading::Library::new(lib_file_path) + .expect("Failed to read specified shared object file") + }, + lib_file_path.clone(), + ) + }), } } diff --git a/src/shims/ffi_support.rs b/src/shims/ffi_support.rs new file mode 100644 index 0000000000..c486f913fc --- /dev/null +++ b/src/shims/ffi_support.rs @@ -0,0 +1,401 @@ +use libffi::{high::call::*, low::CodePtr}; +use std::ops::Deref; + +use rustc_middle::ty::{IntTy, Ty, TyKind, TypeAndMut, UintTy}; +use rustc_span::Symbol; + +use crate::*; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} + +pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> { + /// Extract the scalar value from the result of reading a scalar from the machine, + /// and convert it to a `CArg`. + fn scalar_to_carg( + k: ScalarMaybeUninit, + arg_type: &Ty<'tcx>, + cx: &MiriEvalContext<'mir, 'tcx>, + ) -> InterpResult<'tcx, CArg> { + match arg_type.kind() { + // If the primitive provided can be converted to a type matching the type pattern + // then create a `CArg` of this primitive value with the corresponding `CArg` constructor. + // the ints + TyKind::Int(IntTy::I8) => { + return Ok(CArg::Int8(k.to_i8()?)); + } + TyKind::Int(IntTy::I16) => { + return Ok(CArg::Int16(k.to_i16()?)); + } + TyKind::Int(IntTy::I32) => { + return Ok(CArg::Int32(k.to_i32()?)); + } + TyKind::Int(IntTy::I64) => { + return Ok(CArg::Int64(k.to_i64()?)); + } + TyKind::Int(IntTy::Isize) => { + return Ok(CArg::ISize(k.to_machine_isize(cx)?.try_into().unwrap())); + } + // the uints + TyKind::Uint(UintTy::U8) => { + return Ok(CArg::UInt8(k.to_u8()?)); + } + TyKind::Uint(UintTy::U16) => { + return Ok(CArg::UInt16(k.to_u16()?)); + } + TyKind::Uint(UintTy::U32) => { + return Ok(CArg::UInt32(k.to_u32()?)); + } + TyKind::Uint(UintTy::U64) => { + return Ok(CArg::UInt64(k.to_u64()?)); + } + TyKind::Uint(UintTy::Usize) => { + return Ok(CArg::USize(k.to_machine_usize(cx)?.try_into().unwrap())); + } + // pointers + TyKind::RawPtr(TypeAndMut { ty: some_ty, mutbl: some_mut }) => { + match k { + ScalarMaybeUninit::Scalar(Scalar::Ptr(ptr, _)) => { + let qq = ptr.into_parts().1.bytes_usize(); + match (some_ty.kind(), some_mut) { + // int + (TyKind::Int(IntTy::I8), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrInt8(qq as *mut i8)); + } + (TyKind::Int(IntTy::I8), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrInt8(qq as *const i8)); + } + (TyKind::Int(IntTy::I16), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrInt16(qq as *mut i16)); + } + (TyKind::Int(IntTy::I16), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrInt16(qq as *const i16)); + } + (TyKind::Int(IntTy::I32), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrInt32(qq as *mut i32)); + } + (TyKind::Int(IntTy::I32), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrInt32(qq as *const i32)); + } + (TyKind::Int(IntTy::I64), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrInt64(qq as *mut i64)); + } + (TyKind::Int(IntTy::I64), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrInt64(qq as *const i64)); + } + // uints + (TyKind::Uint(UintTy::U8), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrUInt8(qq as *mut u8)); + } + (TyKind::Uint(UintTy::U8), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrUInt8(qq as *const u8)); + } + (TyKind::Uint(UintTy::U16), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrUInt16(qq as *mut u16)); + } + (TyKind::Uint(UintTy::U16), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrUInt16(qq as *const u16)); + } + (TyKind::Uint(UintTy::U32), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrUInt32(qq as *mut u32)); + } + (TyKind::Uint(UintTy::U32), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrUInt32(qq as *const u32)); + } + (TyKind::Uint(UintTy::U64), rustc_hir::Mutability::Mut) => { + return Ok(CArg::MutPtrUInt64(qq as *mut u64)); + } + (TyKind::Uint(UintTy::U64), rustc_hir::Mutability::Not) => { + return Ok(CArg::ConstPtrUInt64(qq as *const u64)); + } + // recursive case + (TyKind::RawPtr(..), _) => { + return Ok(CArg::RecCarg(Box::new(Self::scalar_to_carg( + k, some_ty, cx, + )?))); + } + _ => {} + } + } + _ => {} + } + } + _ => {} + } + // If no primitives were returned then we have an unsupported type. + throw_unsup_format!( + "unsupported scalar argument type to external C function: {:?}", + arg_type + ); + } + + /// Call external C function and + /// store output, depending on return type in the function signature. + fn call_external_c_and_store_return<'a>( + &mut self, + link_name: Symbol, + dest: &PlaceTy<'tcx, Provenance>, + ptr: CodePtr, + libffi_args: Vec>, + ) -> InterpResult<'tcx, ()> { + let this = self.eval_context_mut(); + + // Unsafe because of the call to external C code. + // Because this is calling a C function it is not necessarily sound, + // but there is no way around this and we've checked as much as we can. + unsafe { + // If the return type of a function is a primitive integer type, + // then call the function (`ptr`) with arguments `libffi_args`, store the return value as the specified + // primitive integer type, and then write this value out to the miri memory as an integer. + match dest.layout.ty.kind() { + // ints + TyKind::Int(IntTy::I8) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Int(IntTy::I16) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Int(IntTy::I32) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Int(IntTy::I64) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Int(IntTy::Isize) => { + let x = call::(ptr, libffi_args.as_slice()); + // `isize` doesn't `impl Into`, so convert manually. + // Convert to `i64` since this covers both 32- and 64-bit machines. + this.write_int(i64::try_from(x).unwrap(), dest)?; + return Ok(()); + } + // uints + TyKind::Uint(UintTy::U8) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Uint(UintTy::U16) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Uint(UintTy::U32) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Uint(UintTy::U64) => { + let x = call::(ptr, libffi_args.as_slice()); + this.write_int(x, dest)?; + return Ok(()); + } + TyKind::Uint(UintTy::Usize) => { + let x = call::(ptr, libffi_args.as_slice()); + // `usize` doesn't `impl Into`, so convert manually. + // Convert to `u64` since this covers both 32- and 64-bit machines. + this.write_int(u64::try_from(x).unwrap(), dest)?; + return Ok(()); + } + // mut pointers + TyKind::RawPtr(TypeAndMut { ty: some_ty, mutbl: rustc_hir::Mutability::Mut }) => { + match some_ty.kind() { + TyKind::Int(IntTy::I32) => { + println!("REEEE "); + // let x = call::(ptr, libffi_args.as_slice()); + // this.write_int(x, dest)?; + return Ok(()); + } + _ => {} + } + } + // Functions with no declared return type (i.e., the default return) + // have the output_type `Tuple([])`. + TyKind::Tuple(t_list) => + if t_list.len() == 0 { + call::<()>(ptr, libffi_args.as_slice()); + return Ok(()); + }, + _ => {} + } + // TODO ellen! deal with all the other return types + throw_unsup_format!("unsupported return type to external C function: {:?}", link_name); + } + } + + /// Get the pointer to the function of the specified name in the shared object file, + /// if it exists. The function must be in the shared object file specified: we do *not* + /// return pointers to functions in dependencies of the library. + fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option { + let this = self.eval_context_mut(); + // Try getting the function from the shared library. + // On windows `_lib_path` will be unused, hence the name starting with `_`. + let (lib, _lib_path) = this.machine.external_so_lib.as_ref().unwrap(); + let func: libloading::Symbol<'_, unsafe extern "C" fn()> = unsafe { + match lib.get(link_name.as_str().as_bytes()) { + Ok(x) => x, + Err(_) => { + return None; + } + } + }; + + // FIXME: this is a hack! + // The `libloading` crate will automatically load system libraries like `libc`. + // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202 + // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the + // library if it can't find the symbol in the library itself. + // So, in order to check if the function was actually found in the specified + // `machine.external_so_lib` we need to check its `dli_fname` and compare it to + // the specified SO file path. + // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`, + // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411 + // using the `libc` crate where this interface is public. + // No `libc::dladdr` on windows. + #[cfg(unix)] + let mut info = std::mem::MaybeUninit::::uninit(); + #[cfg(unix)] + unsafe { + if libc::dladdr(*func.deref() as *const _, info.as_mut_ptr()) != 0 { + if std::ffi::CStr::from_ptr(info.assume_init().dli_fname).to_str().unwrap() + != _lib_path.to_str().unwrap() + { + return None; + } + } + } + // Return a pointer to the function. + Some(CodePtr(*func.deref() as *mut _)) + } + + /// Call specified external C function, with supplied arguments. + /// Need to convert all the arguments from their hir representations to + /// a form compatible with C (through `libffi` call). + /// Then, convert return from the C call into a corresponding form that + /// can be stored in Miri internal memory. + fn call_and_add_external_c_fct_to_context( + &mut self, + link_name: Symbol, + dest: &PlaceTy<'tcx, Provenance>, + args: &[OpTy<'tcx, Provenance>], + ) -> InterpResult<'tcx, bool> { + // Get the pointer to the function in the shared object file if it exists. + let code_ptr = match self.get_func_ptr_explicitly_from_lib(link_name) { + Some(ptr) => ptr, + None => { + // Shared object file does not export this function -- try the shims next. + return Ok(false); + } + }; + + let this = self.eval_context_mut(); + + // Get the function arguments, and convert them to `libffi`-compatible form. + let mut libffi_args = Vec::::with_capacity(args.len()); + for cur_arg in args.iter() { + libffi_args.push(Self::scalar_to_carg( + this.read_scalar(cur_arg)?, + &cur_arg.layout.ty, + this, + )?); + } + + // Convert them to `libffi::high::Arg` type. + let libffi_args = libffi_args + .iter() + .map(|cur_arg| cur_arg.arg_downcast()) + .collect::>>(); + + // Code pointer to C function. + // let ptr = CodePtr(*func.deref() as *mut _); + // Call the function and store output, depending on return type in the function signature. + self.call_external_c_and_store_return(link_name, dest, code_ptr, libffi_args)?; + Ok(true) + } +} + +#[derive(Debug, Clone)] +/// Enum of supported arguments to external C functions. +pub enum CArg { + /// 8-bit signed integer. + Int8(i8), + /// 16-bit signed integer. + Int16(i16), + /// 32-bit signed integer. + Int32(i32), + /// 64-bit signed integer. + Int64(i64), + /// isize. + ISize(isize), + /// 8-bit unsigned integer. + UInt8(u8), + /// 16-bit unsigned integer. + UInt16(u16), + /// 32-bit unsigned integer. + UInt32(u32), + /// 64-bit unsigned integer. + UInt64(u64), + /// usize. + USize(usize), + // mutable pointers + MutPtrInt8(*mut i8), + MutPtrInt16(*mut i16), + MutPtrInt32(*mut i32), + MutPtrInt64(*mut i64), + MutPtrUInt8(*mut u8), + MutPtrUInt16(*mut u16), + MutPtrUInt32(*mut u32), + MutPtrUInt64(*mut u64), + // const pointers + ConstPtrInt8(*const i8), + ConstPtrInt16(*const i16), + ConstPtrInt32(*const i32), + ConstPtrInt64(*const i64), + ConstPtrUInt8(*const u8), + ConstPtrUInt16(*const u16), + ConstPtrUInt32(*const u32), + ConstPtrUInt64(*const u64), + /// Recursive `CArg` (for nested pointers). + RecCarg(Box), +} + +impl<'a> CArg { + /// Convert a `CArg` to a `libffi` argument type. + pub fn arg_downcast(&'a self) -> libffi::high::Arg<'a> { + match self { + CArg::Int8(i) => arg(i), + CArg::Int16(i) => arg(i), + CArg::Int32(i) => arg(i), + CArg::Int64(i) => arg(i), + CArg::ISize(i) => arg(i), + CArg::UInt8(i) => arg(i), + CArg::UInt16(i) => arg(i), + CArg::UInt32(i) => arg(i), + CArg::UInt64(i) => arg(i), + CArg::USize(i) => arg(i), + CArg::MutPtrInt8(i) => arg(i), + CArg::MutPtrInt16(i) => arg(i), + CArg::MutPtrInt32(i) => arg(i), + CArg::MutPtrInt64(i) => arg(i), + CArg::MutPtrUInt8(i) => arg(i), + CArg::MutPtrUInt16(i) => arg(i), + CArg::MutPtrUInt32(i) => arg(i), + CArg::MutPtrUInt64(i) => arg(i), + CArg::ConstPtrInt8(i) => arg(i), + CArg::ConstPtrInt16(i) => arg(i), + CArg::ConstPtrInt32(i) => arg(i), + CArg::ConstPtrInt64(i) => arg(i), + CArg::ConstPtrUInt8(i) => arg(i), + CArg::ConstPtrUInt16(i) => arg(i), + CArg::ConstPtrUInt32(i) => arg(i), + CArg::ConstPtrUInt64(i) => arg(i), + CArg::RecCarg(box_carg) => (*box_carg).arg_downcast(), + } + } +} diff --git a/src/shims/foreign_items.rs b/src/shims/foreign_items.rs index 208e7ea788..0af547e10c 100644 --- a/src/shims/foreign_items.rs +++ b/src/shims/foreign_items.rs @@ -23,6 +23,7 @@ use rustc_target::{ use super::backtrace::EvalContextExt as _; use crate::helpers::{convert::Truncate, target_os_is_unix}; +use crate::shims::ffi_support::EvalContextExt as _; use crate::*; /// Returned by `emulate_foreign_item_by_name`. @@ -31,7 +32,7 @@ pub enum EmulateByNameResult<'mir, 'tcx> { NeedsJumping, /// Jumping has already been taken care of. AlreadyJumped, - /// A MIR body has been found for the function + /// A MIR body has been found for the function. MirBody(&'mir mir::Body<'tcx>, ty::Instance<'tcx>), /// The item is not supported. NotSupported, @@ -366,6 +367,17 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> { let this = self.eval_context_mut(); + // First deal with any external C functions in linked .so file + // (if any SO file is specified). + if this.machine.external_so_lib.as_ref().is_some() { + // An Ok(false) here means that the function being called was not exported + // by the specified SO file; we should continue and check if it corresponds to + // a provided shim. + if this.call_and_add_external_c_fct_to_context(link_name, dest, args)? { + return Ok(EmulateByNameResult::NeedsJumping); + } + } + // Here we dispatch all the shims for foreign functions. If you have a platform specific // shim, add it to the corresponding submodule. match link_name.as_str() { @@ -722,9 +734,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx target => throw_unsup_format!("the target `{}` is not supported", target), } }; - // We only fall through to here if we did *not* hit the `_` arm above, - // i.e., if we actually emulated the function. + // i.e., if we actually emulated the function with one of the shims. Ok(EmulateByNameResult::NeedsJumping) } diff --git a/src/shims/mod.rs b/src/shims/mod.rs index f5b4de30a5..e223cc848c 100644 --- a/src/shims/mod.rs +++ b/src/shims/mod.rs @@ -1,4 +1,5 @@ mod backtrace; +pub mod ffi_support; pub mod foreign_items; pub mod intrinsics; pub mod unix; diff --git a/tests/compiletest.rs b/tests/compiletest.rs index 48e0ae855b..d187f812b4 100644 --- a/tests/compiletest.rs +++ b/tests/compiletest.rs @@ -1,7 +1,7 @@ use colored::*; use regex::Regex; use std::path::{Path, PathBuf}; -use std::{env, ffi::OsString}; +use std::{env, ffi::OsString, process::Command}; use ui_test::{color_eyre::Result, Config, DependencyBuilder, Mode, OutputConflictHandling}; fn miri_path() -> PathBuf { @@ -35,6 +35,25 @@ fn run_tests(mode: Mode, path: &str, target: Option) -> Result<()> { flags.push(target.into()); } + // If we're on linux, then build the shared object file for testing external C function calls. + if cfg!(target_os = "linux") { + let cc = option_env!("CC").unwrap_or("cc"); + Command::new(cc) + .args([ + "-shared", + "-o", + "tests/extern-so/libtestlib.so", + "tests/extern-so/test.c", + // Only add the functions specified in libcode.version to the shared object file. + // This is to avoid automatically adding `malloc`, etc. + // Source: https://anadoxin.org/blog/control-over-symbol-exports-in-gcc.html/ + "-fPIC", + "-Wl,--version-script=tests/extern-so/libcode.version", + ]) + .output() + .expect("failed to generate shared object file for testing external C function calls"); + } + let skip_ui_checks = env::var_os("MIRI_SKIP_UI_CHECKS").is_some(); let output_conflict_handling = match (env::var_os("MIRI_BLESS").is_some(), skip_ui_checks) { diff --git a/tests/extern-so/libcode.version b/tests/extern-so/libcode.version new file mode 100644 index 0000000000..d52bb11c51 --- /dev/null +++ b/tests/extern-so/libcode.version @@ -0,0 +1,10 @@ +CODEABI_1.0 { + global: *double_deref*; + *add_one_int*; + *printer*; + *test_stack_spill*; + *get_unsigned_int*; + *add_int16*; + *add_short_to_long*; + local: *; +}; diff --git a/tests/extern-so/test.c b/tests/extern-so/test.c new file mode 100644 index 0000000000..36c7ce1e91 --- /dev/null +++ b/tests/extern-so/test.c @@ -0,0 +1,31 @@ +#include + +int double_deref(const int **p) { + return **p; +} + +int add_one_int(int x) { + return 2 + x; +} + +void printer() { + printf("printing from C\n"); +} + +// function with many arguments, to test functionality when some args are stored +// on the stack +int test_stack_spill(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l) { + return a+b+c+d+e+f+g+h+i+j+k+l; +} + +unsigned int get_unsigned_int() { + return -10; +} + +short add_int16(short x) { + return x + 3; +} + +long add_short_to_long(short x, long y) { + return x + y; +} diff --git a/tests/fail/extern-so/function_not_in_SO.rs b/tests/fail/extern-so/function_not_in_SO.rs new file mode 100644 index 0000000000..c84f0d6c43 --- /dev/null +++ b/tests/fail/extern-so/function_not_in_SO.rs @@ -0,0 +1,13 @@ +//@only-target-linux +//@only-on-host +//@compile-flags: -Zmiri-extern-so-file=tests/extern-so/libtestlib.so + +extern "C" { + fn foo(); +} + +fn main() { + unsafe { + foo(); //~ ERROR: unsupported operation: can't call foreign function: foo + } +} diff --git a/tests/fail/extern-so/function_not_in_SO.stderr b/tests/fail/extern-so/function_not_in_SO.stderr new file mode 100644 index 0000000000..0aca1700b5 --- /dev/null +++ b/tests/fail/extern-so/function_not_in_SO.stderr @@ -0,0 +1,14 @@ +error: unsupported operation: can't call foreign function: foo + --> $DIR/function_not_in_SO.rs:LL:CC + | +LL | foo(); + | ^^^^^ can't call foreign function: foo + | + = help: this is likely not a bug in the program; it indicates that the program performed an operation that the interpreter does not support + = note: backtrace: + = note: inside `main` at $DIR/function_not_in_SO.rs:LL:CC + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to previous error + diff --git a/tests/pass/extern-so/call_extern_c_fcts.rs b/tests/pass/extern-so/call_extern_c_fcts.rs new file mode 100644 index 0000000000..eaea261dd8 --- /dev/null +++ b/tests/pass/extern-so/call_extern_c_fcts.rs @@ -0,0 +1,54 @@ +//@only-target-linux +//@only-on-host +//@compile-flags: -Zmiri-extern-so-file=tests/extern-so/libtestlib.so + +extern "C" { + fn double_deref(x: *const *const i32) -> i32; + fn add_one_int(x: i32) -> i32; + fn add_int16(x: i16) -> i16; + fn test_stack_spill( + a: i32, + b: i32, + c: i32, + d: i32, + e: i32, + f: i32, + g: i32, + h: i32, + i: i32, + j: i32, + k: i32, + l: i32, + ) -> i32; + fn add_short_to_long(x: i16, y: i64) -> i64; + fn get_unsigned_int() -> u32; + fn printer(); +} + +fn main() { + unsafe { + // test function that adds 2 to a provided int + assert_eq!(add_one_int(1), 3); + + // test function that takes the sum of its 12 arguments + assert_eq!(test_stack_spill(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), 78); + + // test function that adds 3 to a 16 bit int + assert_eq!(add_int16(-1i16), 2i16); + + // test function that adds an i16 to an i64 + assert_eq!(add_short_to_long(-1i16, 123456789123i64), 123456789122i64); + + // test function that returns -10 as an unsigned int + assert_eq!(get_unsigned_int(), (-10i32) as u32); + + // test void function that prints from C -- call it twice + printer(); + printer(); + + let base: i32 = 42; + let base_p: *const i32 = &base as *const i32; + let base_pp: *const *const i32 = &base_p as *const *const i32; + assert_eq!(double_deref(base_pp), 42); + } +} diff --git a/tests/pass/extern-so/call_extern_c_fcts.stdout b/tests/pass/extern-so/call_extern_c_fcts.stdout new file mode 100644 index 0000000000..df3475a9ea --- /dev/null +++ b/tests/pass/extern-so/call_extern_c_fcts.stdout @@ -0,0 +1,2 @@ +printing from C +printing from C diff --git a/ui_test/src/lib.rs b/ui_test/src/lib.rs index bcc48b4b63..bfc3b7b358 100644 --- a/ui_test/src/lib.rs +++ b/ui_test/src/lib.rs @@ -624,8 +624,8 @@ pub enum Mode { impl Mode { fn ok(self, status: ExitStatus) -> Errors { - match (status.code().unwrap(), self) { - (1, Mode::Fail) | (101, Mode::Panic) | (0, Mode::Pass) => vec![], + match (status.code(), self) { + (Some(1), Mode::Fail) | (Some(101), Mode::Panic) | (Some(0), Mode::Pass) => vec![], _ => vec![Error::ExitStatus(self, status)], } }