From d5440bca573d3a74e2416ec1f8e7f880fce7b4f8 Mon Sep 17 00:00:00 2001 From: Neven Villani Date: Wed, 21 Jun 2023 12:33:46 +0200 Subject: [PATCH] Provenance forging for `miri_pointer_name` --- src/borrow_tracker/mod.rs | 36 ++++++++- .../tree_borrows/diagnostics.rs | 62 ++++++++++---- src/borrow_tracker/tree_borrows/mod.rs | 63 ++++++++++++++- src/borrow_tracker/tree_borrows/unimap.rs | 2 +- src/shims/foreign_items.rs | 19 ++++- .../reserved/cell-protected-write.rs | 4 +- .../reserved/int-protected-write.rs | 2 +- tests/pass/tree_borrows/end-of-protector.rs | 2 +- tests/pass/tree_borrows/formatting.rs | 2 +- tests/pass/tree_borrows/reserved.rs | 2 +- tests/utils/macros.rs | 39 ++++++--- tests/utils/miri_extern.rs | 80 ++++++++++++++++++- 12 files changed, 269 insertions(+), 44 deletions(-) diff --git a/src/borrow_tracker/mod.rs b/src/borrow_tracker/mod.rs index ffc49eedb5..3a5695f26f 100644 --- a/src/borrow_tracker/mod.rs +++ b/src/borrow_tracker/mod.rs @@ -318,7 +318,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { fn give_pointer_debug_name( &mut self, ptr: Pointer>, - nth_parent: u8, name: &str, ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); @@ -328,8 +327,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { this.tcx.tcx.sess.warn("Stacked Borrows does not support named pointers; `miri_pointer_name` is a no-op"); Ok(()) } - BorrowTrackerMethod::TreeBorrows => - this.tb_give_pointer_debug_name(ptr, nth_parent, name), + BorrowTrackerMethod::TreeBorrows => this.tb_give_pointer_debug_name(ptr, name), } } @@ -341,6 +339,38 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { BorrowTrackerMethod::TreeBorrows => this.print_tree(alloc_id, show_unnamed), } } + + /// SB: returns `ptr`. + /// TB: forges a pointer with the provenance of the `nb`'th parent of `ptr` + /// in the same allocation. + fn forge_ptr_nth_parent( + &mut self, + ptr: Pointer>, + nb: u8, + ) -> InterpResult<'tcx, Pointer>> { + let this = self.eval_context_mut(); + let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; + match method { + BorrowTrackerMethod::StackedBorrows => Ok(ptr), + BorrowTrackerMethod::TreeBorrows => this.tb_forge_ptr_nth_parent(ptr, nb), + } + } + + /// SB: returns `ptr1`. + /// TB: forges a pointer with the provenance of the nearest common ancestor of + /// `ptr1` and `ptr2` if they belong to the same allocation. + fn forge_ptr_common_ancestor( + &mut self, + ptr1: Pointer>, + ptr2: Pointer>, + ) -> InterpResult<'tcx, Pointer>> { + let this = self.eval_context_mut(); + let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; + match method { + BorrowTrackerMethod::StackedBorrows => Ok(ptr1), + BorrowTrackerMethod::TreeBorrows => this.tb_forge_ptr_common_ancestor(ptr1, ptr2), + } + } } /// Extra per-allocation data for borrow tracking diff --git a/src/borrow_tracker/tree_borrows/diagnostics.rs b/src/borrow_tracker/tree_borrows/diagnostics.rs index a87a4bbdda..065dd60efa 100644 --- a/src/borrow_tracker/tree_borrows/diagnostics.rs +++ b/src/borrow_tracker/tree_borrows/diagnostics.rs @@ -1,7 +1,7 @@ use std::fmt; use std::ops::Range; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_span::{Span, SpanData}; use crate::borrow_tracker::tree_borrows::{ @@ -181,35 +181,67 @@ impl fmt::Display for NodeDebugInfo { } } -impl<'tcx> Tree { +impl Tree { /// Climb the tree to get the tag of a distant ancestor. /// Allows operations on tags that are unreachable by the program /// but still exist in the tree. Not guaranteed to perform consistently /// if `tag-gc=1`. - fn nth_parent(&self, tag: BorTag, nth_parent: u8) -> Option { - let mut idx = self.tag_mapping.get(&tag).unwrap(); - for _ in 0..nth_parent { + /// + /// NOTE: the `nb`'th parent is computed in a "saturating" way: `nth_parent(root, 1) == root` + pub fn nth_parent(&self, tag: BorTag, nth_parent: u8) -> BorTag { + let mut idx = self.tag_mapping.get(&tag).expect("Tag not found in the allocation"); + for dist in 0..nth_parent { let node = self.nodes.get(idx).unwrap(); - idx = node.parent?; + let Some(parent) = node.parent else { + eprintln!("{tag:?} has no {nth_parent}'th parent (reached the root) at distance {dist}, exiting early"); + break; + }; + idx = parent; } - Some(self.nodes.get(idx).unwrap().tag) + self.nodes.get(idx).unwrap().tag } /// Debug helper: assign name to tag. - pub fn give_pointer_debug_name( - &mut self, - tag: BorTag, - nth_parent: u8, - name: &str, - ) -> InterpResult<'tcx> { - let tag = self.nth_parent(tag, nth_parent).unwrap(); + pub fn give_pointer_debug_name(&mut self, tag: BorTag, name: &str) { let idx = self.tag_mapping.get(&tag).unwrap(); if let Some(node) = self.nodes.get_mut(idx) { node.debug_info.add_name(name); } else { eprintln!("Tag {tag:?} (to be named '{name}') not found!"); } - Ok(()) + } + + /// Find the nearest common ancestor of two tags. + /// This function will panic if the tags do not exist or are not part of the + /// correct allocation. + /// Unlike `nth_parent` this should be more resilient to garbage collection + /// and implementation details. + pub fn common_ancestor(&self, tag1: BorTag, tag2: BorTag) -> BorTag { + // To find the nearest common ancestor without paying too much + // on pathological cases: climb both paths in parallel. + let mut path1 = FxHashSet::default(); + let mut path2 = FxHashSet::default(); + let mut idx1 = self.tag_mapping.get(&tag1).expect("Tag not found in the allocation"); + let mut idx2 = self.tag_mapping.get(&tag2).expect("Tag not found in the allocation"); + loop { + // Record path + path1.insert(idx1); + path2.insert(idx2); + // Check membership of one index in the other path: if so we found the ancestor. + if path2.contains(&idx1) { + return self.nodes.get(idx1).unwrap().tag; + } + if path1.contains(&idx2) { + return self.nodes.get(idx2).unwrap().tag; + } + // Not found yet, climb up one step. + if let Some(parent1) = self.nodes.get(idx1).unwrap().parent { + idx1 = parent1; + } + if let Some(parent2) = self.nodes.get(idx2).unwrap().parent { + idx2 = parent2; + } + } } /// Debug helper: determines if the tree contains a tag. diff --git a/src/borrow_tracker/tree_borrows/mod.rs b/src/borrow_tracker/tree_borrows/mod.rs index 1c7a735779..2bf35b1071 100644 --- a/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/borrow_tracker/tree_borrows/mod.rs @@ -536,7 +536,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { fn tb_give_pointer_debug_name( &mut self, ptr: Pointer>, - nth_parent: u8, name: &str, ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); @@ -549,6 +548,66 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { }; let alloc_extra = this.get_alloc_extra(alloc_id)?; let mut tree_borrows = alloc_extra.borrow_tracker_tb().borrow_mut(); - tree_borrows.give_pointer_debug_name(tag, nth_parent, name) + tree_borrows.give_pointer_debug_name(tag, name); + Ok(()) + } + + /// Forge a pointer with the provenance of the `nb`'th parent of `ptr`. + /// No-op if `ptr` is a wildcard pointer. + fn tb_forge_ptr_nth_parent( + &mut self, + ptr: Pointer>, + nb: u8, + ) -> InterpResult<'tcx, Pointer>> { + let this = self.eval_context_mut(); + let (tag, alloc_id) = match ptr.provenance { + Some(Provenance::Concrete { tag, alloc_id }) => (tag, alloc_id), + _ => { + eprintln!("{ptr:?} has no provenance, cannot forge ancestor"); + return Ok(ptr); + } + }; + let alloc_extra = this.get_alloc_extra(alloc_id)?; + let tree_borrows = alloc_extra.borrow_tracker_tb().borrow(); + let forged_tag = tree_borrows.nth_parent(tag, nb); + Ok(ptr.map_provenance(|_| Some(Provenance::Concrete { tag: forged_tag, alloc_id }))) + } + + /// Forge a pointer with the provenance of the nearest common ancestor of `ptr1` and `ptr2`. + /// No-op on `ptr1` if either pointer is a wildcard pointer or if they belong to different + /// allocations. + fn tb_forge_ptr_common_ancestor( + &mut self, + ptr1: Pointer>, + ptr2: Pointer>, + ) -> InterpResult<'tcx, Pointer>> { + let this = self.eval_context_mut(); + let (tag1, alloc_id1) = match ptr1.provenance { + Some(Provenance::Concrete { tag, alloc_id }) => (tag, alloc_id), + _ => { + eprintln!("{ptr1:?} has no provenance, cannot forge ancestor"); + return Ok(ptr1); + } + }; + let (tag2, alloc_id2) = match ptr2.provenance { + Some(Provenance::Concrete { tag, alloc_id }) => (tag, alloc_id), + _ => { + eprintln!("{ptr2:?} has no provenance, cannot forge ancestor"); + return Ok(ptr1); + } + }; + if alloc_id1 != alloc_id2 { + eprintln!( + "{tag1:?} and {tag2:?} are not part of the same allocation, cannot forge ancestor" + ); + return Ok(ptr1); + } + + let alloc_extra = this.get_alloc_extra(alloc_id1)?; + let tree_borrows = alloc_extra.borrow_tracker_tb().borrow(); + let forged_tag = tree_borrows.common_ancestor(tag1, tag2); + Ok(ptr1.map_provenance(|_| { + Some(Provenance::Concrete { tag: forged_tag, alloc_id: alloc_id1 }) + })) } } diff --git a/src/borrow_tracker/tree_borrows/unimap.rs b/src/borrow_tracker/tree_borrows/unimap.rs index 58af32385c..abea08b0b8 100644 --- a/src/borrow_tracker/tree_borrows/unimap.rs +++ b/src/borrow_tracker/tree_borrows/unimap.rs @@ -17,7 +17,7 @@ use std::hash::Hash; use rustc_data_structures::fx::FxHashMap; /// Intermediate key between a UniKeyMap and a UniValMap. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct UniIndex { idx: u32, } diff --git a/src/shims/foreign_items.rs b/src/shims/foreign_items.rs index f4e91c30d9..40d733820d 100644 --- a/src/shims/foreign_items.rs +++ b/src/shims/foreign_items.rs @@ -455,15 +455,28 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { "miri_pointer_name" => { // This associates a name to a tag. Very useful for debugging, and also makes // tests more strict. - let [ptr, nth_parent, name] = this.check_shim(abi, Abi::Rust, link_name, args)?; + let [ptr, name] = this.check_shim(abi, Abi::Rust, link_name, args)?; let ptr = this.read_pointer(ptr)?; - let nth_parent = this.read_scalar(nth_parent)?.to_u8()?; let name = this.read_byte_slice(name)?; // We must make `name` owned because we need to // end the shared borrow from `read_byte_slice` before we can // start the mutable borrow for `give_pointer_debug_name`. let name = String::from_utf8_lossy(name).into_owned(); - this.give_pointer_debug_name(ptr, nth_parent, &name)?; + this.give_pointer_debug_name(ptr, &name)?; + } + "miri_tree_nth_parent" => { + let [ptr, nb] = this.check_shim(abi, Abi::Rust, link_name, args)?; + let ptr = this.read_pointer(ptr)?; + let nb = this.read_scalar(nb)?.to_u8()?; + let res = this.forge_ptr_nth_parent(ptr, nb)?; + this.write_pointer(res, dest)?; + } + "miri_tree_common_ancestor" => { + let [ptr1, ptr2] = this.check_shim(abi, Abi::Rust, link_name, args)?; + let ptr1 = this.read_pointer(ptr1)?; + let ptr2 = this.read_pointer(ptr2)?; + let res = this.forge_ptr_common_ancestor(ptr1, ptr2)?; + this.write_pointer(res, dest)?; } "miri_static_root" => { let [ptr] = this.check_shim(abi, Abi::Rust, link_name, args)?; diff --git a/tests/fail/tree_borrows/reserved/cell-protected-write.rs b/tests/fail/tree_borrows/reserved/cell-protected-write.rs index 872efe3ad5..4e6a548353 100644 --- a/tests/fail/tree_borrows/reserved/cell-protected-write.rs +++ b/tests/fail/tree_borrows/reserved/cell-protected-write.rs @@ -19,8 +19,8 @@ fn main() { write_second(x, y); unsafe fn write_second(x: &mut UnsafeCell, y: *mut u8) { let alloc_id = alloc_id!(x.get()); - name!(x.get(), "callee:x"); - name!(x.get()=>1, "caller:x"); + name!(ancestor!(x.get(), x.get()), "callee:x"); + name!(parent!(ancestor!(x.get(), x.get())), "caller:x"); name!(y, "callee:y"); name!(y, "caller:y"); print_state!(alloc_id); diff --git a/tests/fail/tree_borrows/reserved/int-protected-write.rs b/tests/fail/tree_borrows/reserved/int-protected-write.rs index 3a1205a84f..445c2439f6 100644 --- a/tests/fail/tree_borrows/reserved/int-protected-write.rs +++ b/tests/fail/tree_borrows/reserved/int-protected-write.rs @@ -18,7 +18,7 @@ fn main() { unsafe fn write_second(x: &mut u8, y: *mut u8) { let alloc_id = alloc_id!(x); name!(x, "callee:x"); - name!(x=>1, "caller:x"); + name!(parent!(x), "caller:x"); name!(y, "callee:y"); name!(y, "caller:y"); print_state!(alloc_id); diff --git a/tests/pass/tree_borrows/end-of-protector.rs b/tests/pass/tree_borrows/end-of-protector.rs index 76bbc73e66..bce326c66e 100644 --- a/tests/pass/tree_borrows/end-of-protector.rs +++ b/tests/pass/tree_borrows/end-of-protector.rs @@ -26,7 +26,7 @@ fn main() { unsafe fn do_nothing(x: &mut u8) { name!(x, "callee:x"); - name!(x=>1, "caller:x"); + name!(parent!(x), "caller:x"); let alloc_id = alloc_id!(x); print_state!(alloc_id); } diff --git a/tests/pass/tree_borrows/formatting.rs b/tests/pass/tree_borrows/formatting.rs index 9021c41763..2770f6b3e7 100644 --- a/tests/pass/tree_borrows/formatting.rs +++ b/tests/pass/tree_borrows/formatting.rs @@ -16,7 +16,7 @@ fn main() { // decimal digits to verify proper padding. unsafe fn alignment_check() { let data: &mut [u8] = &mut [0; 1024]; - name!(data.as_ptr()=>2, "data"); + name!(ancestor!(data.as_ptr(), data.as_ptr()), "data"); let alloc_id = alloc_id!(data.as_ptr()); let x = &mut data[1]; name!(x as *mut _, "data[1]"); diff --git a/tests/pass/tree_borrows/reserved.rs b/tests/pass/tree_borrows/reserved.rs index d8a8c27568..e2b1c5bc34 100644 --- a/tests/pass/tree_borrows/reserved.rs +++ b/tests/pass/tree_borrows/reserved.rs @@ -33,7 +33,7 @@ unsafe fn print(msg: &str) { } unsafe fn read_second(x: &mut T, y: *mut u8) { - name!(x as *mut T as *mut u8=>1, "caller:x"); + name!(parent!(x as *mut T as *mut u8), "caller:x"); name!(x as *mut T as *mut u8, "callee:x"); name!(y, "caller:y"); name!(y, "callee:y"); diff --git a/tests/utils/macros.rs b/tests/utils/macros.rs index de223410fb..4d9e79385a 100644 --- a/tests/utils/macros.rs +++ b/tests/utils/macros.rs @@ -29,7 +29,30 @@ macro_rules! print_state { }; } -/// `name!(ptr => nth_parent, name)`: associate `name` to the `nth_parent` of `ptr`. +/// `parent!(ptr)`: forges a pointer with the provenance of the direct parent of `ptr`. +/// +/// Use in conjunction with `name!` to access an otherwise unreachable tag. +macro_rules! parent { + ($ptr:expr) => { + crate::utils::miri_extern::miri_tree_nth_parent($ptr as *const u8 as *const (), 1) + }; +} + +/// `ancestor!(ptr1, ptr2)`: forges a pointer with the provenance of the +/// nearest common ancestor of `ptr1` and `ptr2`. +/// Caller should ensure that they are part of the same allocation. +/// +/// Use in conjunction with `name!` to access an otherwise unreachable tag. +macro_rules! ancestor { + ($ptr1:expr, $ptr2:expr) => { + crate::utils::miri_extern::miri_tree_common_ancestor( + $ptr1 as *const u8 as *const (), + $ptr2 as *const u8 as *const (), + ) + }; +} + +/// `name!(tg, name)`: associate `name` to the `nth_parent` of `ptr`. /// /// `ptr` should be any pointer or reference that can be converted with `_ as *const u8`. /// @@ -41,21 +64,17 @@ macro_rules! print_state { /// `name` is an optional string that will be used as the name. Defaults to /// `stringify!($ptr)` the name of `ptr` in the source code. macro_rules! name { - ($ptr:expr, $name:expr) => { - crate::utils::macros::name!($ptr => 0, $name); - }; ($ptr:expr) => { - crate::utils::macros::name!($ptr => 0, stringify!($ptr)); - }; - ($ptr:expr => $nb:expr) => { - crate::utils::macros::name!($ptr => $nb, stringify!($ptr)); + crate::utils::macros::name!($ptr, stringify!($ptr)); }; - ($ptr:expr => $nb:expr, $name:expr) => { + ($ptr:expr, $name:expr) => { let name = $name.as_bytes(); - crate::utils::miri_extern::miri_pointer_name($ptr as *const u8 as *const (), $nb, name); + crate::utils::miri_extern::miri_pointer_name($ptr as *const u8 as *const (), name); }; } pub(crate) use alloc_id; +pub(crate) use ancestor; pub(crate) use name; +pub(crate) use parent; pub(crate) use print_state; diff --git a/tests/utils/miri_extern.rs b/tests/utils/miri_extern.rs index 6c4298c613..e6666f3927 100644 --- a/tests/utils/miri_extern.rs +++ b/tests/utils/miri_extern.rs @@ -65,7 +65,7 @@ extern "Rust" { /// /// This is only useful as an input to `miri_print_borrow_stacks`, and it is a separate call because /// getting a pointer to an allocation at runtime can change the borrow stacks in the allocation. - /// This function should be considered unstable. It exists only to support `miri_print_borrow_stacks` and so + /// This function should be considered unstable. It exists only to support `miri_print_borrow_state` and so /// inherits all of its instability. pub fn miri_get_alloc_id(ptr: *const ()) -> u64; @@ -92,15 +92,87 @@ extern "Rust" { /// change, or it may be removed entirely. pub fn miri_print_borrow_state(alloc_id: u64, show_unnamed: bool); - /// Miri-provided extern function to associate a name to the nth parent of a tag. + /// Miri-provided extern function to associate a name to a tag. /// Typically the name given would be the name of the program variable that holds the pointer. - /// Unreachable tags can still be named by using nonzero `nth_parent` and a child tag. + /// Unreachable tags can still be accessed through a combination of `miri_tree_nth_parent` and + /// `miri_tree_common_ancestor`. /// /// This function does nothing under Stacked Borrows, since Stacked Borrows's implementation /// of `miri_print_borrow_state` does not show the names. /// /// Under Tree Borrows, the names also appear in error messages. - pub fn miri_pointer_name(ptr: *const (), nth_parent: u8, name: &[u8]); + pub fn miri_pointer_name(ptr: *const (), name: &[u8]); + + /// Miri-provided extern function to forge the provenance of a pointer. + /// Use only for debugging and diagnostics. + /// + /// Under Tree Borrows, this can be used in conjunction with `miri_pointer_name` + /// to access a tag that is not directly accessible in the program: the pointer + /// returned has the same address as `ptr` and is in the same allocation, but has + /// the `tag` of the `nb`'th parent above `ptr`. + /// + /// `ptr` must not be a wildcard pointer. + /// The output can change based on implementation details of `rustc` as well as + /// `miri` flags such as `-Zmiri-unique-is-unique`. + /// Behavior might be even more unpredictable without `-Zmiri-tag-gc=0`. + /// + /// Prefer the more stable `miri_tree_common_ancestor` when possible. + /// In particular when `nb > 1` it is a sign that this function might not be + /// the right one to use. + /// + /// Example usage: naming the caller-retagged parent of a function argument + /// ```rs + /// foo(&*x); + /// fn foo(x: &T) { + /// // `x` was reborrowed once by the caller and once by the callee + /// // we can give a name to the callee's `x` with + /// miri_pointer_name(x as *const T as *const (), "callee:x".as_bytes()); + /// // However the caller's `x` is not directly reachable. + /// miri_pointer_name( + /// miri_tree_nth_parent(x as *const T as *const (), 1), + /// "caller:x".as_bytes(), + /// ); + /// } + /// ``` + /// + /// This is a noop under Stacked Borrows. + pub fn miri_tree_nth_parent(ptr: *const (), nb: u8) -> *const (); + + /// Miri-provided extern function to forge the provenance of a pointer. + /// Use only for debugging and diagnostics. + /// + /// Under Tree Borrows, this can be used in conjunction with `miri_pointer_name` + /// to access a tag that is not directly accessible in the program: the pointer + /// returned has the same address as `ptr1` and is in the same allocation, but + /// has the `tag` of the nearest common ancestor of `ptr1` and `ptr2`. + /// + /// Both `ptr1` and `ptr2` must not be wildcard pointers, and + /// `ptr2` must be part of the same allocation as `ptr1`. + /// + /// Example usage: + /// ```rs + /// // If you have a function that returns a pointer and you use it as such: + /// miri_pointer_name( + /// something() as *const (), + /// "something()".as_bytes(), + /// ); + /// // you might not be naming the pointer you think you are: two invocations + /// // of `something()` might yield different tags, and the one you gave a name to + /// // might be later invalidated when the root pointer is still usable. + /// // This occurs in particular for some `as_ptr` functions. + /// + /// // To give a name to the actual underlying pointer, you can use + /// miri_pointer_name( + /// miri_tree_common_ancestor( + /// something() as *const (), + /// something() as *const (), + /// ), + /// "root of something()".as_bytes(), + /// ); + /// ``` + /// + /// This is a noop under Stacked Borrows. + pub fn miri_tree_common_ancestor(ptr1: *const (), ptr2: *const ()) -> *const (); /// Miri-provided extern function to print (from the interpreter, not the /// program) the contents of a section of program memory, as bytes. Bytes