diff --git a/crates/aquascope/Cargo.toml b/crates/aquascope/Cargo.toml index fdde4db7c..3e0674ba3 100644 --- a/crates/aquascope/Cargo.toml +++ b/crates/aquascope/Cargo.toml @@ -15,7 +15,7 @@ publish = false rustc_private = true [features] -testing = ["lazy_static"] +testing = [] [dependencies] anyhow = "1.0.0" @@ -32,7 +32,7 @@ miri = {git = "https://github.com/rust-lang/miri.git", rev = "35d6927663065d7fde aquascope_workspace_utils = { version = "0.2", path = "../aquascope_workspace_utils" } # testing utils -lazy_static = { version = "1.4", optional = true } +lazy_static = { version = "1.4" } [dev-dependencies] insta = { version = "1.22.0", features = ["json", "yaml", "redactions"] } diff --git a/crates/aquascope/src/analysis/ir_mapper/body_graph.rs b/crates/aquascope/src/analysis/ir_mapper/body_graph.rs index 7de9080ee..db9d14f25 100644 --- a/crates/aquascope/src/analysis/ir_mapper/body_graph.rs +++ b/crates/aquascope/src/analysis/ir_mapper/body_graph.rs @@ -1,7 +1,7 @@ -use itertools::Itertools; -use rustc_data_structures::{fx::FxHashMap as HashMap, graph::*}; +use rustc_data_structures::{captures::Captures, graph::*}; use rustc_middle::mir::{ - BasicBlock, BasicBlockData, BasicBlocks, Body, Location, + BasicBlock, BasicBlockData, BasicBlocks, Body, Location, Terminator, + TerminatorKind, }; use smallvec::SmallVec; @@ -19,15 +19,6 @@ impl<'a, 'tcx: 'a> CleanedBody<'a, 'tcx> { self.0 } - // TODO: cache the results - pub(crate) fn paths_from_to( - &self, - from: BasicBlock, - to: BasicBlock, - ) -> Vec> { - DFSFinder::find_paths_from_to(self, from, to) - } - /// Compute the locations successor. /// /// If the specified location lies in the middle of a `BasicBlock`, @@ -56,14 +47,49 @@ impl<'a, 'tcx: 'a> CleanedBody<'a, 'tcx> { statement_index: 0, }) } else { + log::debug!("No Location (or too many) successor(s) found: {nexts:?}"); None } } } + pub fn terminator_in_block(&self, block: BasicBlock) -> &Terminator<'tcx> { + self.body().basic_blocks[block].terminator() + } + + pub fn blocks( + &self, + ) -> impl Iterator + Captures<'a> + Captures<'tcx> + '_ { + self + .0 + .basic_blocks + .postorder() + .iter() + .filter(|bb| CleanedBody::keep_block(&self.0.basic_blocks[**bb])) + .copied() + } + + pub fn is_false_edge(&self, bb: BasicBlock) -> bool { + matches!( + self.0.basic_blocks[bb].terminator().kind, + TerminatorKind::FalseEdge { .. } + ) + } + fn keep_block(bb: &BasicBlockData) -> bool { !bb.is_cleanup && !bb.is_empty_unreachable() } + + fn is_imaginary_target( + from_data: &BasicBlockData, + target: BasicBlock, + ) -> bool { + let TerminatorKind::FalseEdge { imaginary_target, .. } = from_data.terminator().kind else { + return false; + }; + + imaginary_target == target + } } // ----------- @@ -96,7 +122,11 @@ impl<'tcx> WithSuccessors for CleanedBody<'_, 'tcx> { node: Self::Node, ) -> >::Iter { ::successors(&self.0.basic_blocks, node) - .filter(|bb| CleanedBody::keep_block(&self.0.basic_blocks[*bb])) + .filter(|bb| { + let from_data = &self.0.basic_blocks[*bb]; + CleanedBody::keep_block(from_data) + && !CleanedBody::is_imaginary_target(from_data, *bb) + }) .collect::>() .into_iter() } @@ -119,126 +149,103 @@ impl<'tcx> WithPredecessors for CleanedBody<'_, 'tcx> { } } -/// Finds all paths between two nodes. -/// -/// This DFS will find all unique paths between two nodes. This -/// includes allowing loops to be traversed (at most once). -/// This is quite a HACK to briefly satisfy the needs of the -/// [stepper](crate::analysis::stepper::compute_permission_steps). -struct DFSFinder<'graph, G> -where - G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, -{ - graph: &'graph G, - paths: Vec>, - stack: Vec, - visited: HashMap, -} - -impl<'graph, G> DFSFinder<'graph, G> -where - G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, -{ - pub fn new(graph: &'graph G) -> Self { - Self { - graph, - paths: vec![], - stack: vec![], - visited: HashMap::default(), - } - } - - pub fn find_paths_from_to( - graph: &'graph G, - from: G::Node, - to: G::Node, - ) -> Vec> { - let mut dfs = Self::new(graph); - dfs.search(from, to); - dfs.paths.into_iter().unique().collect::>() - } - - fn insert(&mut self, n: G::Node) -> bool { - let v = self.visited.entry(n).or_default(); - if *v >= 2 { - return false; - } - *v += 1; - true - } - - fn remove(&mut self, n: G::Node) { - let v = self.visited.entry(n).or_default(); - assert!(*v > 0); - *v -= 1; - } - - fn search(&mut self, from: G::Node, to: G::Node) { - if !self.insert(from) { - return; - } - - self.stack.push(from); - - if from == to { - self.paths.push(self.stack.clone()); - self.remove(to); - self.stack.pop().unwrap(); - return; - } - - for v in self.graph.successors(from) { - self.search(v, to); - } - - self.stack.pop().unwrap(); - self.remove(from); - } -} - #[cfg(test)] mod test { - use rustc_data_structures::graph::vec_graph::VecGraph; + use rustc_utils::BodyExt; - use super::*; + use super::{super::AllPostDominators, *}; + use crate::test_utils as tu; - #[test] - fn if_shape() { - // Diamond shaped IF. - let graph = VecGraph::new(6, vec![ - (0u32, 1u32), - (1u32, 2u32), - (1u32, 3u32), - (2u32, 4u32), - (3u32, 4u32), - (4u32, 5u32), - ]); - - let paths_0_5 = vec![vec![0, 1, 2, 4, 5], vec![0, 1, 3, 4, 5]]; - - assert_eq!(DFSFinder::find_paths_from_to(&graph, 0, 5), paths_0_5); - } + // CleanedBody tests #[test] - fn while_loop_shape() { - // While loop shape: - // 0 -> 1 -> 2 -> 3 -> 5 - // ^ | - // | v - // |-- 4 - let graph = VecGraph::new(6, vec![ - (0u32, 1u32), - (1u32, 2u32), - (2u32, 3u32), - (3u32, 5u32), - (3u32, 4u32), - (4u32, 2u32), - ]); - - let paths_0_5 = vec![vec![0, 1, 2, 3, 5], vec![0, 1, 2, 3, 4, 2, 3, 5]]; - let mut paths = DFSFinder::find_paths_from_to(&graph, 0, 5); - paths.sort_by_key(|l| l.len()); - - assert_eq!(paths, paths_0_5); + fn cleaned_body_simple_if() { + // EXPECTED MIR: + // ------------- + // bb0: { + // StorageLive(_2); + // _2 = const 0_i32; + // FakeRead(ForLet(None), _2); + // StorageLive(_3); + // StorageLive(_4); + // _4 = const true; + // switchInt(move _4) -> [0: bb3, otherwise: bb1]; + // } + // + // bb1: { + // _5 = CheckedAdd(_2, const 1_i32); + // assert(!move (_5.1: bool), ) -> [success: bb2, unwind: bb5]; + // } + // + // bb2: { + // _2 = move (_5.0: i32); + // _3 = const (); + // goto -> bb4; + // } + // + // bb3: { + // _3 = const (); + // goto -> bb4; + // } + // + // bb4: { + // StorageDead(_4); + // StorageDead(_3); + // _0 = _2; + // StorageDead(_2); + // return; + // } + // + // bb5 (cleanup): { + // resume; + // } + + tu::compile_normal( + r#" +fn foo() -> i32 { + let mut v1 = 0; + if true { + v1 += 1; + } + return v1; +} +"#, + |tcx| { + tu::for_each_body(tcx, |_, wfacts| { + let cleaned_graph = CleanedBody(&wfacts.body); + + let post_doms = AllPostDominators::::build( + &cleaned_graph, + wfacts.body.all_returns().map(|loc| loc.block), + ); + + let cleaned_blocks = cleaned_graph.blocks().collect::>(); + + let bb0 = BasicBlock::from_usize(0); + let bb1 = BasicBlock::from_usize(1); + let bb2 = BasicBlock::from_usize(2); + let bb3 = BasicBlock::from_usize(3); + let bb4 = BasicBlock::from_usize(4); + let bb5 = BasicBlock::from_usize(5); + + assert!(cleaned_blocks.contains(&bb0)); + assert!(cleaned_blocks.contains(&bb1)); + assert!(cleaned_blocks.contains(&bb2)); + assert!(cleaned_blocks.contains(&bb3)); + assert!(cleaned_blocks.contains(&bb4)); + // Cleanup blocks + assert!(!cleaned_blocks.contains(&bb5)); + + for &bb in vec![bb0, bb1, bb2, bb3, bb4].iter() { + assert!(post_doms.is_postdominated_by(bb, bb4)); + } + + assert!(!post_doms.is_postdominated_by(bb0, bb2)); + assert!(!post_doms.is_postdominated_by(bb0, bb3)); + assert!(post_doms.is_postdominated_by(bb1, bb2)); + assert!(!post_doms.is_postdominated_by(bb1, bb3)); + }) + }, + ); } } diff --git a/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs b/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs index 170b1b6e5..4337460a7 100644 --- a/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs +++ b/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs @@ -26,24 +26,24 @@ impl MirOrderedLocations { } pub fn exit_location(&self) -> Option { - self.exit_block.map(|block| { - let statement_index = *self - .locations - .get(&block) - .expect("Block with no associated locations") - .last() - .unwrap(); - Location { - block, - statement_index, - } - }) + let block = self.exit_block?; + self.locations.get(&block).map_or_else( + // Block has no associated index then default to the start + || Some(block.start_location()), + // Get the last associated index of the block + |vs| { + vs.last().map(|&statement_index| Location { + block, + statement_index, + }) + }, + ) } pub fn get_entry_exit_locations(&self) -> Option<(Location, Location)> { - self - .entry_location() - .and_then(|mn| self.exit_location().map(|mx| (mn, mx))) + let entry = self.entry_location()?; + let exit = self.exit_location()?; + Some((entry, exit)) } pub fn values(&self) -> impl Iterator + Captures<'_> { diff --git a/crates/aquascope/src/analysis/ir_mapper/mod.rs b/crates/aquascope/src/analysis/ir_mapper/mod.rs index 20e933071..a4c222a02 100644 --- a/crates/aquascope/src/analysis/ir_mapper/mod.rs +++ b/crates/aquascope/src/analysis/ir_mapper/mod.rs @@ -27,8 +27,8 @@ pub struct IRMapper<'a, 'tcx> { body: &'a Body<'tcx>, hir_to_mir: HashMap>, gather_mode: GatherMode, - dominators: Dominators, - post_dominators: AllPostDominators, + pub(crate) dominators: Dominators, + pub(crate) post_dominators: AllPostDominators, } // TODO: I want to decompose this into more specific regions. @@ -107,6 +107,36 @@ where ir_map } + pub fn ldominates(&self, dom: Location, node: Location) -> bool { + if dom.block == node.block { + return dom.statement_index <= node.statement_index; + } + self.dominates(dom.block, node.block) + } + + pub fn lpost_dominates(&self, pdom: Location, node: Location) -> bool { + if pdom.block == node.block { + return pdom.statement_index >= node.statement_index; + } + self.post_dominates(pdom.block, node.block) + } + + pub fn dominates(&self, dom: BasicBlock, node: BasicBlock) -> bool { + self.dominators.is_reachable(node) && self.dominators.dominates(dom, node) + } + + pub fn post_dominates(&self, pdom: BasicBlock, node: BasicBlock) -> bool { + self.post_dominators.is_postdominated_by(node, pdom) + } + + /// Returns true if the terminator in the location's block is a `switchInt`. + pub fn is_terminator_switchint(&self, location: Location) -> bool { + matches!( + self.cleaned_graph.terminator_in_block(location.block).kind, + mir::TerminatorKind::SwitchInt { .. } + ) + } + pub fn local_assigned_place(&self, local: &hir::Local) -> Vec> { use either::Either; use mir::{FakeReadCause as FRC, StatementKind as SK}; @@ -192,20 +222,34 @@ where idxs.sort_unstable(); } - let basic_blocks = total_location_map.keys().collect::>(); + let basic_blocks = total_location_map.keys().copied().collect::>(); - let entry_block = basic_blocks.iter().find(|&&&b1| { - basic_blocks.iter().all(|&&b2| { - self.dominators.is_reachable(b2) - && (b1 == b2 || self.dominators.dominates(b1, b2)) + let entry_block = basic_blocks + .iter() + .find(|&&candidate_dom| { + basic_blocks.iter().all(|&block| { + self.dominators.is_reachable(block) + && self.dominators.dominates(candidate_dom, block) + }) }) - }); + .copied(); - let exit_block = basic_blocks.iter().find(|&&&b1| { - basic_blocks.iter().all(|&&b2| { - b1 == b2 || self.post_dominators.is_postdominated_by(b2, b1) - }) - }); + let find_exit_from = |basic_blocks: &[BasicBlock]| -> Option { + basic_blocks + .iter() + .find(|&&candidate_postdom| { + basic_blocks.iter().all(|&block| { + self + .post_dominators + .is_postdominated_by(block, candidate_postdom) + }) + }) + .copied() + }; + + let exit_block = find_exit_from(&basic_blocks); + + log::debug!("Gathering MIR location entry / exit blocks: {entry_block:?}{exit_block:?}"); if exit_block.is_none() { log::debug!("Found locations: {total_location_map:#?}"); @@ -215,8 +259,8 @@ where } Some(MirOrderedLocations { - entry_block: entry_block.map(|b| **b), - exit_block: exit_block.map(|b| **b), + entry_block, + exit_block, locations: total_location_map, }) } diff --git a/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs b/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs index 9f40ff48d..dae93bf67 100644 --- a/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs +++ b/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs @@ -40,3 +40,145 @@ impl AllPostDominators { } } } + +#[cfg(test)] +mod tests { + use rustc_data_structures::graph::{vec_graph::VecGraph, *}; + + use super::*; + + struct VG { + source: N, + forward: VecGraph, + backward: VecGraph, + } + + impl VG { + fn make(size: usize, source: N, edges: Vec<(N, N)>) -> Self { + let rev = edges.iter().map(|&(f, s)| (s, f)).collect::>(); + VG { + source, + forward: VecGraph::new(size, edges), + backward: VecGraph::new(size, rev), + } + } + } + + impl DirectedGraph for VG { + type Node = N; + } + + impl<'graph, N: Idx> GraphSuccessors<'graph> for VG { + type Item = N; + type Iter = smallvec::IntoIter<[N; 10]>; + } + + impl<'graph, N: Idx> GraphPredecessors<'graph> for VG { + type Item = N; + type Iter = smallvec::IntoIter<[N; 10]>; + } + + impl WithStartNode for VG { + fn start_node(&self) -> N { + self.source + } + } + + impl WithNumNodes for VG { + fn num_nodes(&self) -> usize { + self.forward.num_nodes() + } + } + + impl WithSuccessors for VG { + fn successors( + &self, + node: Self::Node, + ) -> >::Iter { + self + .forward + .successors(node) + .iter() + .copied() + .collect::>() + .into_iter() + } + } + + impl WithPredecessors for VG { + fn predecessors( + &self, + node: Self::Node, + ) -> >::Iter { + self + .backward + .successors(node) + .iter() + .copied() + .collect::>() + .into_iter() + } + } + + #[test] + fn pdom_diamond() { + let diamond = VG::::make(4, 0, vec![(0, 1), (0, 2), (1, 3), (2, 3)]); + let post_doms = AllPostDominators::build(&diamond, std::iter::once(3)); + for b in 0 ..= 2 { + assert!(post_doms.is_postdominated_by(b, 3)); + } + } + + #[test] + fn pdom_linear() { + let nodes = 100; + let edges = (0 .. nodes).zip(1 ..).collect::>(); + let line = VG::::make(nodes, 0, edges); + let post_doms = AllPostDominators::build(&line, std::iter::once(nodes - 1)); + for i in 0 .. nodes { + for j in i + 1 .. nodes { + assert!( + post_doms.is_postdominated_by(i, j), + "{j} should post-dominate {i}" + ); + } + } + } + + #[test] + fn pdom_double_diamond() { + // 2 5 + // 0 -> 1 4 7 -> + // 3 6 + let dd = VG::::make(8, 0, vec![ + (0, 1), + (1, 2), + (1, 3), + (2, 4), + (3, 4), + (4, 5), + (4, 6), + (5, 7), + (6, 7), + ]); + let post_doms = AllPostDominators::build(&dd, std::iter::once(7)); + + assert!(post_doms.is_postdominated_by(0, 1)); + assert!(post_doms.is_postdominated_by(0, 4)); + assert!(post_doms.is_postdominated_by(0, 7)); + assert!(post_doms.is_postdominated_by(1, 4)); + assert!(post_doms.is_postdominated_by(1, 7)); + assert!(post_doms.is_postdominated_by(4, 7)); + + for i in 0 .. 8 { + for &bad in &[2, 3, 5, 6] { + if i != bad { + assert!( + !post_doms.is_postdominated_by(i, bad), + "{bad} should NOT post-dominate {i}" + ); + } + } + } + } +} diff --git a/crates/aquascope/src/analysis/mod.rs b/crates/aquascope/src/analysis/mod.rs index 9e6ab4fee..a07ddbb41 100644 --- a/crates/aquascope/src/analysis/mod.rs +++ b/crates/aquascope/src/analysis/mod.rs @@ -540,6 +540,7 @@ impl<'a, 'tcx: 'a> AquascopeAnalysis<'a, 'tcx> { smooth_elements(spans) } + /// Convert a potentially non-contiguous collection of [`Point`]s into [`Span`]s. fn points_to_spans( &self, points: impl IntoIterator, diff --git a/crates/aquascope/src/analysis/permissions/context.rs b/crates/aquascope/src/analysis/permissions/context.rs index 42d7e570b..2df868c1d 100644 --- a/crates/aquascope/src/analysis/permissions/context.rs +++ b/crates/aquascope/src/analysis/permissions/context.rs @@ -429,6 +429,28 @@ impl<'a, 'tcx> PermissionsCtxt<'a, 'tcx> { body.all_places(tcx, def_id).collect::>() } + pub fn domain_bottom(&self) -> PermissionsDomain<'tcx> { + self + .domain_places() + .into_iter() + .map(|place| { + (place, PermissionsData { + is_live: false, + type_droppable: false, + type_writeable: false, + type_copyable: false, + path_moved: None, + path_uninitialized: false, + loan_read_refined: None, + loan_write_refined: None, + loan_drop_refined: None, + permissions: Permissions::bottom(), + }) + }) + .collect::>() + .into() + } + pub fn permissions_domain_at_point( &self, point: Point, diff --git a/crates/aquascope/src/analysis/stepper/find_steps.rs b/crates/aquascope/src/analysis/stepper/find_steps.rs deleted file mode 100644 index 242599f07..000000000 --- a/crates/aquascope/src/analysis/stepper/find_steps.rs +++ /dev/null @@ -1,1234 +0,0 @@ -//! Core analysis for creating permission steps. -//! -//! # Overview -//! -//! Defined on the MIR, a *permissions step* is the difference in permissions between -//! two adjacent MIR [`Point`]s. The difference represents the gains and losses that -//! occur between adjacent permissions states. Permission steps on the MIR are useless -//! to the average user, thus we cluster subsequences of MIR instructions and take the -//! difference between the first and last point to create a larger step. -//! -//! At a high-level, the strategy is to partition the MIR into subsequences (referred to as segments), -//! such that each segment represents a single permission step. I.E. each segment is a cluster -//! of instructions representing one source-level permissions step. -//! After clustering, the steps are easily computed in isolation to create the final permissions -//! steps. As we'll see later, the “isolation” is broken down a little to prevent some specific -//! visual effects. -//! -//! # Splitting Strategy -//! -//! The main goal of the permission stepper is to provide steps that map to logical “steps” in -//! the source code. First, the steps will be determined using HIR language constructs, which are -//! subsequently lowered to fit the more granular MIR language constructs. -//! Starting with the HIR, a so-called “logical step” is roughly defined to be a [`Stmt`](rustc_hir::Stmt). -//! Typically statements fall on their own line and they mark the beginning and end -//! of some potentially permissions-altering operation. This makes up the first loose for -//! finding permissions steps. -//! -//! Statements however, do not cover how permissions change in a control-flow sensitive construct. -//! For example, the statements at the beginning of the then and else branch might execute with -//! different permissions, this sudden change of permissions needs to be communicated to the user, -//! rather than happening on instructions of the CFG these permissions are changed on the *edges*. -//! This forms the second rule of creating a step, namely, a branch in control flow is also a -//! permissions-altering “operation”. The full rules for tracking permissions steps at each -//! respective level of granularity are outlined below. -//! -//! ## Source to HIR -//! -//! In the source code, we'd like a permissions step to be shown after each line and at the -//! opening brace (`{`) of a new block. This requires us to take a permissions step at the -//! following HIR locations. -//! -//! * From before to after each [`Stmt`](rustc_hir::Stmt). -//! * From the last [`Block`](rustc_hir::Block) statement to after the `expr` of a [`Block`](rustc_hir::Block). -//! * From before a branch to *before* the first expression of a branch target. For example, at the source-level, -//! this would map to a step from before an `if` to the directly after the opening `{` of the -//! then / else block. -//! -//! Each node in the HIR generates several MIR locations. For information on how to map between the -//! two see the [`IRMapper`]. Important for the stepper, is the ability to find the first, and -//! last, MIR [`Location`] which came from a HIR node. First and last are used in the sense of -//! a [dominator](https://en.wikipedia.org/wiki/Dominator_(graph_theory)) and [post-dominator](https://en.wikipedia.org/wiki/Dominator_(graph_theory)#Postdominance) -//! respectively. The main idea is that the HIR traversal allows us to find the proper *slice points* -//! for the MIR graph. -//! -//! ## HIR to MIR -//! -//! When forming permission steps in the MIR, the most crucial invariant is that the permissions steps -//! form a total cover of the control-flow graph. This invariant remains to ensure that no change in -//! permissions is *missed*. If a change in permissions is not shown (at the source-level), -//! this is due to segmenting steps at the wrong boundaries or at too coarse a granularity. -//! Because of this invariant, the stepper uses a strategy to "slice" the MIR into segments, such that -//! these segments always form a total cover. -//! -//! ### Data Structures Summary -//! -//! The key data structures involvled are the [`MirSegment`] and [`SegmentTree`]. -//! The [`MirSegment`] is a simple struct storing the two points, where a permimssion step -//! will step `from` and where steps `to`. This means that a `MirSegment` must lie on a -//! valid path within the MIR. -//! -//! The [`SegmentTree`] (not to be confused with a [segment tree](https://en.wikipedia.org/wiki/Segment_tree)) -//! is a tree which holds [`MirSegment`]s in its leaves. -//! -//! ### Slicing -//! -//! The core operation performed on the [`SegmentTree`] is taking a *slice*. There are two kinds of -//! slices: -//! -//! 1. linear slices, those that **do not** contain permissions-altering CFG edges. -//! 2. control-flow slices, those that **only** contain permissions-altering CFG edges. -//! -//! These two slices exist to maintain the invariants of the [`MirSegment`] and [`SegmentTree`]. -//! Fundamentally, these slices work on different *shapes* of the underlying graph. -//! -//! #### Linear Slices -//! -//! A *linear slice* slices a portion of the graph which forms a continuous subsequence. -//! -//! Example: -//! -//! ```text -//! before slice: -//! -//! slice point -//! | -//! [segment 1] | -//! ⬤ [l1] ----> ⬤ [l2] ----> ⬤ [l3] -//! | -//! | -//! -//! -//! after slice: -//! -//! [segment 1] -//! ⬤ [l1] ----> ⬤ [l2] -//! -//! [segment 2] -//! ⬤ [l2] ----> ⬤ [l3] -//! -//! ``` -//! -//! In the above example there exists a linear sequence of control-flow from `l1 ⟶ l2 ⟶ l3`. -//! Depicted, is a *linear slice* of this segment at location `l2`. Linear slices *always* -//! split a single segment, into two new segments which maintain the [`MirSegment`] invariant. -//! These slices are used after [`Stmt`s](rustc_hir::Stmt) and the end of a [`Block` expression](rustc_hir::Block). -//! -//! #### Control-flow slices -//! -//! A *control-flow* slice, then does not slice a continuous subsequence but multiple that -//! /span across/ branches of control flow. -//! -//! Example: -//! ```text -//! before slice: -//! -//! slice point -//! | -//! | -//! | -//! ------> ⬤ [l2] ------- -//! | | | -//! [segment 1] | | v -//! ----> ⬤ [l1] | ⬤ [l4] ----> -//! | | ^ -//! | | | -//! ------> ⬤ [l3] ------- -//! | -//! | -//! | -//! -//! -//! after slice: -//! -//! [segment 1] -//! ⬤ [l1] ----> ⬤ [l2] -//! -//! [segment 2] -//! ⬤ [l1] ----> ⬤ [l3] -//! -//! [segment 3] -//! ⬤ [l2] ----> ⬤ [l4] -//! -//! [segment 4] -//! ⬤ [l3] ----> ⬤ [l4] -//! -//! ``` -//! Before the slice in segment 1 there is a graph which roughly captures the shape -//! of an if expression. location `l1` would be the branch point (corresponding -//! to a `SwitchInt`), `l2` and `l3` would be the then and else branches. Here these -//! branches are abstracted to a single point, but in practice they can be any valid -//! [`MirSegment`]. Then location `l4` joins the branches and control flow continues -//! again linearly. -//! -//! In order to slice a control-flow segment properly, a set of locations is required -//! and the function mapping a location to a control-flow path must be bijective. -//! In the above example, the possible paths through this segment (the usliced segment 1) are: -//! 1. `l1`, `l2`, `l4` -//! 2. `l1`, `l3`, `l4` -//! -//! Therefore, in order to perform a proper slice, the set (`l2`, `l3`) is provided. -//! Luckily, these locations are easy to obtain from the structure of the HIR and correspond -//! to the opening block of each branch. -//! -//! After slicing, the result is four segments that form a total cover of the original -//! segment, and each has a clear entry / exit point for *it's specific control flow*. -//! -//! NOTE: one small semantic difference between the resulting segments. The segments -//! which form the so-called "split set" (segments 1 and 2 in the above example) *cannot* -//! be further split. They are treated as **atomic**. This is intuitive if you image that -//! they only contains edges in the CFG (there would be nothing left to spliti). -//! -//! # Finalizing Differences -//! -//! Slicing the MIR into segments is the core task for the stepper and results -//! in a proper [`SegmentTree`]. The last task of the stepper engine is to take the -//! permissions difference between the domain after the segment, and that before. -//! See the [`PermissionsCtxt`] for more information about computing a [`PermissionsDomain`]. -//! -//! When computing the differences however, there is an edge case when handling liveness. -//! As a result of the generated MIR, it's possible for the left-hand-side of an assignment -//! to gain permissions before it seems it should. This occurs when the initializer expression -//! is more complex (e.g. an [`If`](rustc_hir::Expr) or [`Block`](rustc_hir::Expr) expression). -//! To ensure initialized places don't gain permissions before the end of the let statement, -//! these places are marked as /attached/ to a specific MIR location, and they are filtered -//! from any nested segment step results. -//! -//! # Known Shortcomings -//! -//! There are a few major known limitations, they can be resolved we just need the time: -//! -//! - Function bodies that contain infinite loops `loop {}` cannot be analyzed. -//! More general, if there does not contain an exit point to the function the -//! current algorithm will report this limitation to the user. -//! -//! - The control-flow slicing is too strict, if there exists an `if` without -//! and `else`, or if there are multiple returns, the algorithm also fails. - -use anyhow::{bail, Result}; -use rustc_data_structures::{self, fx::FxHashMap as HashMap}; -use rustc_hir::{ - self as hir, - intravisit::{self, Visitor as HirVisitor}, - HirId, -}; -use rustc_middle::{ - hir::nested_filter, - mir::{self, Local, Location, Place}, -}; -use rustc_span::Span; -use rustc_utils::{ - source_map::range::CharRange, test_utils::DUMMY_CHAR_RANGE, PlaceExt, SpanExt, -}; - -use super::{ - segment_tree::{MirSegment, SegmentSearchResult, SegmentTree, SplitType}, - *, -}; -use crate::{ - analysis::{ - ir_mapper::{GatherDepth, IRMapper}, - permissions::{ - Permissions, PermissionsCtxt, PermissionsData, PermissionsDomain, - }, - }, - errors, -}; - -pub fn compute_permission_steps<'a, 'tcx>( - analysis: &AquascopeAnalysis<'a, 'tcx>, -) -> Result> -where - 'tcx: 'a, -{ - let mode = INCLUDE_MODE.copied().unwrap_or(PermIncludeMode::Changes); - let ctxt = &analysis.permissions; - let ir_mapper = &analysis.ir_mapper; - let body = &ctxt.body_with_facts.body; - let _basic_blocks = body.basic_blocks.indices(); - let mut hir_visitor = HirStepPoints::make(ctxt, ir_mapper)?; - hir_visitor.visit_nested_body(ctxt.body_id); - - log::debug!( - "Final tree for permission steps\n{:?}", - hir_visitor.mir_segments - ); - - if let Some((_, msg)) = hir_visitor.unsupported_encounter { - bail!(msg); - } - - if !hir_visitor.fatal_error.is_empty() { - bail!(hir_visitor.fatal_error); - } - - Ok(prettify_permission_steps( - analysis, - hir_visitor.finalize_diffs(), - mode, - )) -} - -// Prettify, means: -// - Remove all places that are not source visible -// - Remove all tables which are empty -// - Convert Spans to Ranges -fn prettify_permission_steps<'tcx>( - analysis: &AquascopeAnalysis<'_, 'tcx>, - perm_steps: HashMap< - Span, - (MirSegment, HashMap, PermissionsDataDiff>), - >, - mode: PermIncludeMode, -) -> Vec { - let ctxt = &analysis.permissions; - let tcx = ctxt.tcx; - let body = &ctxt.body_with_facts.body; - - let should_keep = |p: &PermissionsDataDiff| -> bool { - !(matches!(p.is_live, ValueStep::None { value: Some(false) }) - || (mode == PermIncludeMode::Changes && p.is_empty())) - }; - - macro_rules! place_to_string { - ($p:expr) => { - $p.to_string(tcx, body) - .unwrap_or_else(|| String::from("")) - }; - } - - let first_error_span_opt = - errors::get_span_of_first_error(ctxt.def_id.expect_local()) - .and_then(|s| s.as_local(ctxt.body_with_facts.body.span)); - let source_map = tcx.sess.source_map(); - - perm_steps - .into_iter() - .fold( - HashMap::< - CharRange, - Vec<(MirSegment, Vec<(Place<'tcx>, PermissionsDataDiff)>)>, - >::default(), - |mut acc, (span, (segment, place_to_diffs))| { - // Attach the span to the end of the line. Later, all permission - // steps appearing on the same line will be combined. - let span = source_map.span_extend_to_line(span).shrink_to_hi(); - let entries = place_to_diffs - .into_iter() - .filter(|(place, diff)| { - place.is_source_visible(tcx, body) && should_keep(diff) - }) - .collect::>(); - - // This could be a little more graceful. The idea is that - // we want to remove all permission steps which occur after - // the first error, but the steps involved with the first - // error could still be helpful. This is why we filter all - // spans with a LO BytePos greater than the error - // span HI BytePos. - if !(entries.is_empty() - || first_error_span_opt - .is_some_and(|err_span| err_span.hi() < span.lo())) - { - let range = analysis.span_to_range(span); - acc.entry(range).or_default().push((segment, entries)); - } - - acc - }, - ) - .into_iter() - // HACK FIXME: we're at odds with the multi-table setup. This quick - // hack combines table entries into a single table until the - // visual explanation gets up-to-speed. - // Another weird thing about this is that you can have a single - // table with two changes for one place. - // ```example - // # fn main() { - // let closure = |s: &str| s.len(); // s: +R+O - // // s: -R-O - // // closure: +R+O - // # } - // ``` - // imagine that the comments to the right of the Let represent - // a pseudo combined table. The path `s` gains and loses the same - // set of permissions in the same table. This is kind of weird, we'd - // rather just show *no change*. - .filter_map(|(range, mut entries)| { - for (_, v) in entries.iter_mut() { - v.sort_by_key(|(place, _)| (place.local.as_usize(), place.projection)) - } - - // let state = entries - // .into_iter() - // .map(|(MirSegment { from, to }, diffs)| { - // let state = diffs - // .into_iter() - // .map(|(place, diff)| { - // let s = place_to_string!(place); - // (s, diff) - // }) - // .collect::>(); - // let from = analysis.span_to_range(ctxt.location_to_span(from)); - // let to = analysis.span_to_range(ctxt.location_to_span(to)); - // PermissionsStepTable { from, to, state } - // }) - // .collect::>(); - - // Conforming to the above HACK this just takes any (from, to) pair. - let dummy_char_range = DUMMY_CHAR_RANGE.with(|range| *range); - let (from, to) = entries.first().map_or_else( - || (dummy_char_range, dummy_char_range), - |(MirSegment { from, to }, _)| { - let from = analysis.span_to_range(ctxt.location_to_span(*from)); - let to = analysis.span_to_range(ctxt.location_to_span(*to)); - (from, to) - }, - ); - - let mut master_table: Vec<(Place<'tcx>, PermissionsDataDiff)> = - Vec::default(); - - let is_symmetric_diff = - |diff1: &PermissionsDataDiff, diff2: &PermissionsDataDiff| -> bool { - macro_rules! is_symmetric { - ($v1:expr, $v2:expr) => { - matches!( - (&$v1, &$v2), - (ValueStep::High { .. }, ValueStep::Low { .. }) - | (ValueStep::Low { .. }, ValueStep::High { .. }) - | (ValueStep::None { .. }, ValueStep::None { .. }) - ) - }; - } - let p1 = &diff1.permissions; - let p2 = &diff2.permissions; - is_symmetric!(p1.read, p2.read) - && is_symmetric!(p1.write, p2.write) - && is_symmetric!(p1.drop, p2.drop) - }; - - // For all tables which fall on the same line, we combine them into a single table - // and remove all *SYMMETRIC* differences. That is, if you have permission changes such as: - // - path: +R+O - // - path: -R-O - // these are exactly symmetric, and will be removed. - for (_, diffs) in entries.into_iter() { - for (place, diff) in diffs.into_iter() { - let i_opt = master_table.iter().position(|(p, _)| *p == place); - if let Some(idx) = i_opt { - let (_, old_diff) = &master_table[idx]; - if is_symmetric_diff(&diff, old_diff) { - log::debug!( - "REMOVING place {place:?} with diff {diff:?} into the MT." - ); - master_table.remove(idx); - continue; - } - } - - log::debug!("ADDING place {place:?} with diff {diff:?} into the MT."); - master_table.push((place, diff)); - } - } - - // This means the tables were symmetric and all were removed. - if master_table.is_empty() { - return None; - } - - let master_table = PermissionsStepTable { - from, - to, - state: master_table - .into_iter() - .map(|(place, diff)| (place_to_string!(place), diff)) - .collect::>(), - }; - - Some(PermissionsLineDisplay { - location: range, - state: vec![master_table], - }) - }) - .collect::>() -} - -// ------------------------------------------------ - -macro_rules! fatal { - ($this:expr, $( $rest:tt ),*) => { - let f = format!( $($rest)*); - $this.report_fatal(&f); - bail!(f); - } -} - -/// Visitor for creating permission steps in the HIR. -/// -/// Visits the HIR in a Nested order, splitting the MIR and accumulating permission steps. -struct HirStepPoints<'a, 'tcx> -where - 'tcx: 'a, -{ - ctxt: &'a PermissionsCtxt<'a, 'tcx>, - ir_mapper: &'a IRMapper<'a, 'tcx>, - mir_segments: Box, - unsupported_encounter: Option<(Span, String)>, - fatal_error: String, -} - -impl<'a, 'tcx: 'a> HirStepPoints<'a, 'tcx> { - fn make( - ctxt: &'a PermissionsCtxt<'a, 'tcx>, - ir_mapper: &'a IRMapper<'a, 'tcx>, - ) -> Result { - let tcx = ctxt.tcx; - let hir = tcx.hir(); - let body = &hir.body(ctxt.body_id); - let body_hir_id = body.value.hir_id; - let body_span = body.value.span; - - let mol = ir_mapper - .get_mir_locations(body_hir_id, GatherDepth::Nested) - .unwrap(); - - // A body must have an entry location. - let from = mol.entry_location().unwrap(); - - // A body with an infinite loop will not generate MIR that - // contains an exit location. - let Some(to) = mol.exit_location() else { - bail!("The function body under analysis has zero (or many) exit points. This currently isn't supported by the permissions stepper; I suggest trying to rewrite the function to contain a single `return`."); - }; - - let body_segment = MirSegment::new(from, to); - let mir_segments = Box::new(SegmentTree::new(body_segment, body_span)); - - Ok(HirStepPoints { - ctxt, - ir_mapper, - mir_segments, - unsupported_encounter: None, - fatal_error: String::default(), - }) - } - - fn report_unsupported(&mut self, id: HirId, msg: &str) { - if self.unsupported_encounter.is_none() { - let span = self.span_of(id); - self.unsupported_encounter = Some((span, String::from(msg))); - } - } - - fn report_fatal(&mut self, msg: &str) { - self.fatal_error.push_str(&"-".repeat(5)); - self.fatal_error.push('\n'); - self.fatal_error.push_str(msg); - } - - /// Determine whether the traversal should visited nested HIR nodes. - /// - /// This method is a sort of HACK to avoid picking apart nodes expanded from - /// macros, while visiting nodes expanded from expected desugarings (e.g. for / while loops). - fn should_visit_nested(&self, _id: HirId, span: Span) -> bool { - use rustc_span::hygiene::DesugaringKind as DK; - !span.from_expansion() - || span.is_desugaring(DK::ForLoop) - || span.is_desugaring(DK::WhileLoop) - } - - /// Split an already linear segment into two segments. - /// - /// Example, a block of statements will produce a graph with the following shape: - /// - /// ```text - /// ⬤ l1 --> ⬤ l2 --> ⬤ l3 - /// ``` - /// - /// The above linear sequence could be split at any of the location `l1, l2, l3` and it - /// would produce two valid segments. For example, splitting the above at `l2` would produce: - /// - /// ```text - /// SegmentTree::Split { - /// segments: SplitType::Linear { - /// first: MirSegment(l1, l2), - /// second: MirSegment(l2, l3), - /// }, - /// reach: MirSegment(l1, l3), - /// ... - /// } - /// ``` - fn insert_linear_step_at( - &mut self, - span: Span, - location: Location, - attached_here: Vec, - ) -> Result<()> { - let enclosing_segment = self - .mir_segments - .as_ref() - .find_segment_for_end(location, &self.ir_mapper.cleaned_graph); - - match enclosing_segment { - SegmentSearchResult::NotFound => { - fatal!(self, "{location:?} should always be enclosed in the graph"); - } - SegmentSearchResult::StepExists(segment, ..) => { - log::warn!( - "linear step had slice conflict at {location:?} with {segment:?}" - ); - Ok(()) - } - - SegmentSearchResult::Enclosing(SegmentTree::Single { - segment, - span: old_span, - attached, - }) => { - let mut paths = - segment.paths_along_segment(&self.ir_mapper.cleaned_graph); - - let first_step = SegmentTree::Single { - segment: MirSegment::new(segment.from, location), - attached: attached_here, - span, - }; - - let second_step = SegmentTree::Single { - segment: MirSegment::new(location, segment.to), - attached: vec![], - span: *old_span, - }; - - let _ = paths - .drain_filter(|path| path.contains(&location.block)) - .collect::>(); - - if !paths.is_empty() { - fatal!(self, "Inserting a linear segment should not result in fragmentation.\nSplitting segment: {segment:?} at {location:?}. Remaining paths: {paths:#?}"); - } - - let subtree = SegmentTree::Split { - segments: SplitType::Linear { - first: Box::new(first_step), - second: Box::new(second_step), - }, - reach: *segment, - span: *old_span, - attached: attached.clone(), - }; - - let segment = *segment; - self.mir_segments.as_mut().replace_single(segment, subtree) - } - - _ => { - fatal!(self, "Enclosing segments can only be a `Single` variant, this is a stepper bug!"); - } - } - } - - /// Split a segment into a series of split / join segments for a piece of control flow. - /// - /// Example, a simple `if ... { ... } else { ... }` expression will produce a diamond shaped CFG. - /// - /// ```text - /// ⬤ l1 - /// / \ - /// ⬤ l2 ⬤ l3 - /// \ / - /// ⬤ l4 - /// ``` - /// - /// In this diagram, the initial `MirSegment` is `l1` -> `l4`. To produce a well-formed - /// `SegmentTree::Split` node, the locations `[l2, l3]` should be provided as arguments. - /// - /// The specified locations for splitting should satisfy the following properties. - /// 1. All locations are enclosed by the same MirSegment, (in the above example `(l1, l4)`). - /// 2. Each location should correspond to a single path through the control flow. In the above - /// example, the two possible paths are `[l1, l2, l4]` and `[l1, l3, l4]`. - /// 3. The locations should be bijective wrt the possible control-flow paths. - /// - /// The above example would produce a SegmentTree with the following shape: - /// - /// ```text - /// SegmentTree::Split { - /// segments: SegmentType::ControlFlow { - /// splits: vec![ - /// MirSegment::new(l1, l2), - /// MirSegment::new(l1, l3) - /// ], - /// joins: vec![ - /// MirSegment::new(l2, l4), - /// MirSegment::new(l3, l4) - /// ], - /// }, - /// reach: ..., - /// span: ..., - /// } - /// ``` - fn insert_cf_step_at(&mut self, steps: Vec<(Location, Span)>) -> Result<()> { - if steps.is_empty() { - return Ok(()); - } - - let graph = &self.ir_mapper.cleaned_graph; - - let enclosings = steps - .iter() - .filter_map(|(location, _)| { - let res = self.mir_segments.find_segment_for_end(*location, graph); - if let SegmentSearchResult::Enclosing(SegmentTree::Single { - segment, - span, - attached, - }) = res - { - Some((*segment, *span, attached.clone())) - } else { - log::error!( - "searching for {location:?} came up with no result {res:?}" - ); - None - } - }) - .collect::>(); - - if enclosings.len() < steps.len() { - fatal!(self, "not every locations step had an enclosing segment."); - } - - let (segment, old_span, attached) = enclosings.first().unwrap(); - - if !enclosings.iter().all(|(s, _, _)| s == segment) { - fatal!(self, "not all provided locations map to the same enclosing segment: {enclosings:#?}"); - } - - let mut paths = segment.paths_along_segment(&self.ir_mapper.cleaned_graph); - - let mut splits = Vec::default(); - let mut joins = Vec::default(); - - for (location, span) in steps.into_iter() { - let split_step = SegmentTree::Single { - segment: MirSegment::new(segment.from, location), - attached: vec![], - span, - }; - - let join_step = SegmentTree::Single { - segment: MirSegment::new(location, segment.to), - attached: vec![], - span: *old_span, - }; - - let _removed_paths = paths - .drain_filter(|path| path.contains(&location.block)) - .collect::>(); - - splits.push(split_step); - joins.push(join_step); - } - - let subtree = SegmentTree::Split { - segments: SplitType::ControlFlow { splits, joins }, - reach: *segment, - span: *old_span, - attached: attached.clone(), - }; - - self.mir_segments.replace_single(*segment, subtree) - } - - fn span_of(&self, id: HirId) -> Span { - let hir = self.ctxt.tcx.hir(); - let span = hir.span(id); - span - .as_local(self.ctxt.body_with_facts.body.span) - .unwrap_or(span) - } - - fn body_value_id(&self) -> HirId { - let hir = self.ctxt.tcx.hir(); - hir.body(self.ctxt.body_id).value.hir_id - } - - /// The [`PermissionsDomain`] ⊥. - /// - /// No permissions, anywhere. - fn domain_bottom(&self) -> PermissionsDomain<'tcx> { - self - .ctxt - .domain_places() - .into_iter() - .map(|place| { - (place, PermissionsData { - is_live: false, - type_droppable: false, - type_writeable: false, - type_copyable: false, - path_moved: None, - path_uninitialized: false, - loan_read_refined: None, - loan_write_refined: None, - loan_drop_refined: None, - permissions: Permissions::bottom(), - }) - }) - .collect::>() - .into() - } - - /// Convert the current [`SegmentTree`] into permission steps. - fn finalize_diffs( - self, - ) -> HashMap, PermissionsDataDiff>)> - { - let body_hir_id = self.body_value_id(); - let body_open_brace = self.span_of(body_hir_id).shrink_to_lo(); - let first_point = self.ctxt.location_to_point(self.body_segment().from); - let first_domain = &self.ctxt.permissions_domain_at_point(first_point); - let empty_domain = &self.domain_bottom(); - - // Upon entry, the function parameters are already "live". But we want to - // special case this, and show that they "come alive" at the opening brace. - let first_diff = empty_domain.diff(first_domain); - - fn diff_subtree<'tcx>( - ctxt: &PermissionsCtxt<'_, 'tcx>, - tree: &SegmentTree, - result: &mut HashMap< - Span, - (MirSegment, HashMap, PermissionsDataDiff>), - >, - attached_at: &mut HashMap, - ) { - log::trace!( - "\ndiff_subtree\n[FILTERS]:\n{attached_at:?}\n[TREE]:{tree:?}" - ); - - macro_rules! is_attached { - ($set:expr, $place:expr, $loc:expr) => { - $set.get(&$place.local).map(|l| *l == $loc).unwrap_or(false) - }; - } - - let mut insert_segment = |segment: MirSegment, span: Span| { - if segment.from != segment.to { - let p0 = ctxt.location_to_point(segment.from); - let p1 = ctxt.location_to_point(segment.to); - let before = &ctxt.permissions_domain_at_point(p0); - let after = &ctxt.permissions_domain_at_point(p1); - let mut diff = before.diff(after); - - let removed = diff - .drain_filter(|place, _| { - is_attached!(attached_at, place, segment.to) - }) - .collect::>(); - - log::debug!( - "removed domain places due to attached filter at {:?} {:?}", - segment.to, - removed - ); - - result.insert(span, (segment, diff)); - } - }; - - match tree { - SegmentTree::Single { segment, span, .. } => { - insert_segment(*segment, *span) - } - SegmentTree::Split { - segments, - attached, - reach, - span, - } => { - // Add the attached places filter - for local in attached.iter() { - log::debug!( - "filtering Local {local:?} not attached to {:?}", - reach.to - ); - - let old = attached_at.insert(*local, reach.to); - assert!(old.is_none()); - } - - match segments { - SplitType::Linear { first, second } => { - diff_subtree(ctxt, first, result, attached_at); - diff_subtree(ctxt, second, result, attached_at); - } - - // CF Splits with exactly one branch / join are considered linear - // This happens frequently when there is ForLoop desugaring. - SplitType::ControlFlow { splits, joins } - if splits.len() == 1 && joins.len() == 1 => - { - diff_subtree(ctxt, &splits[0], result, attached_at); - diff_subtree(ctxt, &joins[0], result, attached_at); - } - - SplitType::ControlFlow { splits, joins } => { - for subtree in splits.iter() { - diff_subtree(ctxt, subtree, result, attached_at); - } - - let mut joined_diff = HashMap::default(); - let mut entire_diff = reach.into_diff(ctxt); - - // Rules for joining two domain differences. - // 1. We always insert the attached locals. - let attached_here = entire_diff - .drain_filter(|place, _| { - is_attached!(attached_at, place, reach.to) - }) - .collect::>(); - - // 2. Differences not found in *any* of the join segments are ignored - for subtree in joins.iter() { - let mut temp = HashMap::default(); - diff_subtree(ctxt, subtree, &mut temp, attached_at); - - // HACK: remove any differences that were attached to this span. - temp.remove(span); - - // HACK: manually remove any attached places which got added. - for (_, (_, diffs)) in temp.iter_mut() { - diffs - .drain_filter(|place, _| attached_here.contains_key(place)); - } - - joined_diff.extend(temp); - } - - assert!(!result.contains_key(span)); - assert!(joined_diff.get(span).is_none()); - - // FIXME: the reach is not the correct set of points here. - // But we don't currently have a good semantic model for - // what it should be. They aren't currently being - // displayed by the frontend so this isn't a problem (yet). - result.insert(*span, (*reach, attached_here)); - result.extend(joined_diff); - } - } - - // Remove the attached places filter. - for local in attached.iter() { - attached_at.remove(local); - } - } - } - } - - let mut diffs = HashMap::default(); - let mut attached_at = HashMap::default(); - let dummy_loc = Location { - block: mir::START_BLOCK, - statement_index: 0, - }; - - diffs.insert( - body_open_brace, - ( - MirSegment { - from: dummy_loc, - to: dummy_loc, - }, - first_diff, - ), - ); - - diff_subtree(self.ctxt, &self.mir_segments, &mut diffs, &mut attached_at); - - diffs - } - - fn body_segment(&self) -> &MirSegment { - match self.mir_segments.as_ref() { - SegmentTree::Split { reach, .. } => reach, - SegmentTree::Single { segment, .. } => segment, - } - } -} - -macro_rules! split_with_control_flow { - ($this:tt, $ids:expr) => { - split_with_control_flow!($this, $ids, "CF-SPLIT ") - }; - - ($this:tt, $ids:expr, $msg:expr) => { - let f = format!("{}\nsplitting the control flow with:\n{:#?}", $msg, $ids); - $ids - .into_iter() - .map(|id| { - $this - .ir_mapper - .get_mir_locations(id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.entry_location().map(|entry| { - let span = $this.span_of(id).shrink_to_lo(); - (entry, span) - }) - }) - }) - .fold(Some(Vec::default()), |acc, step| { - if let (Some(mut acc), Some(step)) = (acc, step) { - acc.push(step); - Some(acc) - } else { - None - } - }) - .and_then(|steps| $this.insert_cf_step_at(steps).ok()) - .unwrap_or_else(|| { - $this.report_fatal(&f); - }); - }; -} - -macro_rules! split_with_linear { - ($this:tt, $id:expr) => { - split_with_linear!($this, $id, "splitting linearly") - }; - - ($this:tt, $id:expr, $msg:expr) => { - split_with_linear!($this, $id, $msg, vec![]) - }; - - ($this:tt, $id:expr, $msg:expr, $attached:expr) => { - $this - .ir_mapper - .get_mir_locations($id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.exit_location().map(|exit| { - let span = $this.span_of($id); - let exit = $this - .ir_mapper - .cleaned_graph - .location_successor(exit) - .unwrap_or(exit); - $this - .insert_linear_step_at(span, exit, $attached) - .expect(""); - }) - }) - .unwrap_or_else(|| { - log::warn!( - "Expected entry / exit locations but none were found: {:?}", - $msg - ); - }); - }; -} - -impl<'a, 'tcx: 'a> HirVisitor<'tcx> for HirStepPoints<'a, 'tcx> { - type NestedFilter = nested_filter::All; - - fn nested_visit_map(&mut self) -> Self::Map { - self.ctxt.tcx.hir() - } - - fn visit_stmt(&mut self, stmt: &'tcx hir::Stmt) { - use rustc_hir::StmtKind as SK; - let hir = self.nested_visit_map(); - let error_msg = - format!("Analyzing statement : {}", hir.node_to_string(stmt.hir_id)); - - let locals = match stmt.kind { - SK::Local(local) => { - let places = self.ir_mapper.local_assigned_place(local); - places.into_iter().map(|p| p.local).collect::>() - } - _ => vec![], - }; - - split_with_linear!(self, stmt.hir_id, error_msg, locals); - - if self.should_visit_nested(stmt.hir_id, stmt.span) { - intravisit::walk_stmt(self, stmt); - } - } - - fn visit_block(&mut self, block: &'tcx hir::Block) { - let hir = self.ctxt.tcx.hir(); - - for stmt in block.stmts.iter() { - self.visit_stmt(stmt); - } - - if let Some(expr) = block.expr { - let error_msg = - format!("end-of-statement expr: {}", hir.node_to_string(expr.hir_id)); - split_with_linear!(self, expr.hir_id, error_msg); - self.visit_expr(expr); - } - } - - fn visit_expr(&mut self, expr: &'tcx hir::Expr) { - use hir::{ExprKind as EK, LoopSource, StmtKind as SK}; - - let hir = self.nested_visit_map(); - let error_msg = - format!("Analyzing expr : {}", hir.node_to_string(expr.hir_id)); - - match expr.kind { - // Special case for While Loop desugaring, this shouldn't be necessary - // when generic loops are handled. - EK::Loop( - hir::Block { - stmts: [], - expr: - Some(hir::Expr { - kind: EK::If(cnd, then, Some(els)), - .. - }), - .. - }, - _label, - LoopSource::While, - _loop_span, - ) => { - self - .ir_mapper - .get_mir_locations(then.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.entry_location().map(|then_entry| { - self - .ir_mapper - .get_mir_locations(els.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.exit_location().map(|else_exit| { - let loop_end = self.span_of(expr.hir_id).shrink_to_hi(); - let if_start = self.span_of(then.hir_id).shrink_to_lo(); - - let ls = - vec![(then_entry, if_start), (else_exit, loop_end)]; - - self.insert_cf_step_at(ls).expect(""); - }) - }) - .unwrap(); - }) - }) - .unwrap(); - - // Skip the else block, it only contains the break statement. - intravisit::walk_expr(self, cnd); - intravisit::walk_expr(self, then); - } - - // Special case for For Loop desugaring, this shouldn't be necessary - // when generic loops are handled. - EK::Loop( - hir::Block { - stmts: - [hir::Stmt { - kind: - SK::Expr(hir::Expr { - kind: EK::Match(cnd, [none, some], _), - .. - }), - .. - }], - expr: None, - .. - }, - _label, - LoopSource::ForLoop, - _loop_span, - ) => { - self - .ir_mapper - .get_mir_locations(some.body.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.entry_location().map(|then_entry| { - self - .ir_mapper - .get_mir_locations(none.body.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.exit_location().map(|else_exit| { - let loop_end = self.span_of(expr.hir_id).shrink_to_hi(); - let loop_start = - self.span_of(some.body.hir_id).shrink_to_lo(); - - let ls = - vec![(then_entry, loop_start), (else_exit, loop_end)]; - - self.insert_cf_step_at(ls).expect(""); - }) - }) - .unwrap(); - }) - }) - .unwrap(); - - // ignore the none branch as it just contains the break. - intravisit::walk_expr(self, cnd); - intravisit::walk_arm(self, some); - } - - // TODO: have a split strategy for bare loops. They could be infinite, and - // thus have no exit block. This shouldn't be an issue but it currently is. - EK::Loop(_block, _label, LoopSource::Loop, _span) => { - self.report_unsupported(expr.hir_id, "Bare loops aren't working yet, sorry! Can I interest you in a `for` or `while` loop?"); - } - - EK::If(cnd, then, else_opt) => { - // NOTE: first we need to walk and split the condition. In the - // case of a more complex condition expression, splitting this - // first will result in a split location closest to the `SwitchInt`. - intravisit::walk_expr(self, cnd); - - let ids = [Some(then), else_opt] - .iter() - .flatten() - .map(|n| n.hir_id) - .collect::>(); - - split_with_control_flow!(self, ids, error_msg); - - intravisit::walk_expr(self, then); - if let Some(els) = else_opt { - intravisit::walk_expr(self, els); - } - } - - EK::Match(swtch, arms, _source) => { - // NOTE: first we need to walk and split the condition. In the - // case of a more complex condition expression, splitting this - // first will result in a split location closest to the `SwitchInt`. - intravisit::walk_expr(self, swtch); - - let ids = arms - .iter() - .map(|arm| { - if arm.guard.is_some() { - self.report_unsupported( - arm.hir_id, - "Arm guards are not supported, sorry!", - ) - } - - arm.body.hir_id - }) - .collect::>(); - - split_with_control_flow!(self, ids, error_msg); - - for arm in arms.iter() { - intravisit::walk_arm(self, arm); - } - } - _ => { - intravisit::walk_expr(self, expr); - } - } - } -} diff --git a/crates/aquascope/src/analysis/stepper/hir_steps.rs b/crates/aquascope/src/analysis/stepper/hir_steps.rs new file mode 100644 index 000000000..14d112800 --- /dev/null +++ b/crates/aquascope/src/analysis/stepper/hir_steps.rs @@ -0,0 +1,1226 @@ +//! HIR-level stepper (and entry point) for computing permissions steps. +//! +//! The permissions stepper computes the differences in permissions +//! between two "states". These differences are computed per [`mir::Place`], +//! to read how they are aggregated and displayed see [super::table_builder]. +//! +//! Computing these permissions steps takes a surprising amount of coordination +//! between the HIR and the MIR. Fundamentally, the HIR has the information we +//! need about the _source program_ while the MIR holds the information +//! about control-flow and code points. Because permissions steps are associated +//! with a source span, we need the HIR to communicate this down to the MIR, but +//! we need the MIR to ensure that created steps are valid. To understand +//! the validation of creating permissions steps see [`super::segmented_mir`]. +//! +//! At a (very) high-level, we insert steps after anything interesting +//! could happen. Interesting in this case means (1) it's visible at the source- +//! level, and (2) a change in permissions could be captured. The three main places +//! where this could happen are: +//! +//! 1. After statements. +//! 2. After the final expression in blocks. +//! 3. Entering a block, potentially from a conditional branch +//! which can cause liveness permissions changes. +//! +//! For most of the process, the [`SegmentedMirBuilder`] handles all the +//! tough work of making sure steps are valid. There are a few cases when +//! the HIR knows more about the structure of a program and they all have to +//! do with placing spans. Life would be much better if we didn't have to +//! place spans, or if the rust compiler had a richer model for tracking spans +//! but that's not the case (_stares longingly out the window_). The main places +//! where this happens is for loop desugaring, and branches. The reason why is +//! touched on briefly. +//! +//! Several constructs as they appear in the HIR are desugared compared to the +//! language constructs one uses in Rust source code. For example a `while cnd { ... }` +//! loop, will get desugared into `loop { if cnd { ... } else { break; }}`. These +//! desugarings have to be special cased by the stepper so that we get the span +//! place _just right_. +//! +//! Branches again require the HIR to make some decisions about step locations. +//! When a match expression is encountered, it might look like the following: +//! +//! ```ignore +//! match Some(10) { +//! None => 0, +//! Some(n) => { +//! n * 2 +//! }, +//! } +//! ``` +//! +//! When computing steps over the arms of the match, the `SegmentedMirBuilder` would +//! insert a step at the very beginning of each branch target. However, that's not +//! quite what we want, if the user things of the opening curly brace as the beginning +//! of the branch, then in the `Some` case `n` is _already bound_. We can use info +//! at the HIR level to find this micro adjustment which computes the branch target +//! as being after the code initializing all bound variables in a match pattern. + +use anyhow::{anyhow, Result}; +use rustc_data_structures::{self, fx::FxHashMap as HashMap}; +use rustc_hir::{ + self as hir, + intravisit::{self, Visitor as HirVisitor}, + BodyId, HirId, +}; +use rustc_middle::{ + hir::nested_filter, + mir::{self, Body, Local, Location}, + ty::TyCtxt, +}; +use rustc_span::Span; +use rustc_utils::SpanExt; + +use super::{segmented_mir::*, table_builder::*, *}; +use crate::analysis::ir_mapper::{GatherDepth, IRMapper}; + +/// Visitor for creating permission steps in the HIR. +/// +/// Visits the HIR in a Nested order, splitting the MIR and accumulating permission steps. +pub(super) struct HirStepPoints<'a, 'tcx> +where + 'tcx: 'a, +{ + tcx: &'a TyCtxt<'tcx>, + body: &'a Body<'tcx>, + body_id: BodyId, + ir_mapper: &'a IRMapper<'a, 'tcx>, + + // Error reporting counters + unsupported_features: Vec, + fatal_errors: Vec, + + // Actual state of the analysis + /// Entry location of the body under analysis. + start_loc: Location, + locals_at_scope: HashMap>, + /// Stack of the current branch entry points, used + /// for hinting path steps to the `SegmentedMir`. + current_branch_start: Vec, + mir_segments: SegmentedMirBuilder<'a, 'tcx>, +} + +/// Makes calling functions on the SegmentedMir easier. +/// All functions on the `SegmentedMir` return a Result in +/// the case that the internal state gets off. When it does, +/// we should save the error and stop the current computation. +/// As with most error-relevant things, if internally an error +/// state is entered more errors are likely to occur, but it's +/// really the first we care about. +macro_rules! invoke_internal { + (on_fail -> $ret:expr, $this:ident, $call:ident, $($param:expr),*) => { + match $this.mir_segments.$call($( $param ),*) { + Err(e) => { + $this.fatal_errors.push(e); + return $ret; + }, + Ok(v) => v, + } + }; + (on_fail -> $ret:expr, $this:ident, $call:ident) => { + invoke_internal!(on_fail -> $ret, $this, $call,) + }; + (on_fail -> $ret:expr, $this:ident, $call:ident, $($param:expr),*) => { + invoke_internal!(on_fail -> $ret, $this, $call, $($param:expr),*) + }; + ($this:ident, $call:ident) => { + invoke_internal!(on_fail -> (), $this, $call,) + }; + ($this:ident, $call:ident, $( $param:expr ),*) => { + invoke_internal!(on_fail -> (), $this, $call, $( $param ),*) + }; +} + +macro_rules! report_unexpected { + ($this:ident, $($param:expr),*) => { + $this.fatal_errors.push(anyhow!($( $param ),*)) + } +} + +macro_rules! report_unsupported { + ($this:ident, $($param:expr),*) => { + $this.unsupported_features.push(anyhow!($( $param ),*)) + } +} + +impl<'a, 'tcx: 'a> HirStepPoints<'a, 'tcx> { + pub(super) fn make( + tcx: &'a TyCtxt<'tcx>, + body: &'a Body<'tcx>, + body_id: BodyId, + ir_mapper: &'a IRMapper<'a, 'tcx>, + ) -> Result { + let mir_segments = SegmentedMirBuilder::make(ir_mapper); + let start_loc = mir::START_BLOCK.start_location(); + + Ok(HirStepPoints { + tcx, + body, + body_id, + ir_mapper, + unsupported_features: Vec::default(), + fatal_errors: Vec::default(), + start_loc, + locals_at_scope: HashMap::default(), + current_branch_start: Vec::default(), + mir_segments, + }) + } + + fn process_error(stack: &[anyhow::Error]) -> Option { + use itertools::Itertools; + if stack.is_empty() { + return None; + } + + Some( + stack + .iter() + .map(|e: &anyhow::Error| e.to_string()) + .join("\n"), + ) + } + + pub(super) fn get_unsupported_feature(&self) -> Option { + Self::process_error(&self.unsupported_features) + } + + pub(super) fn get_internal_error(&self) -> Option { + Self::process_error(&self.fatal_errors) + } + + pub(super) fn finalize( + self, + analysis: &AquascopeAnalysis<'_, 'tcx>, + mode: PermIncludeMode, + ) -> Result> { + let body_hir_id = self.body_value_id(); + let body_span = self.span_of(body_hir_id); + + let mir_segments = self.mir_segments.freeze()?; + + log::debug!( + "Steps analysis found these steps: {:#?}", + mir_segments.segments().collect::>() + ); + + let finalizer = TableBuilder { + analysis, + ctxt: &analysis.permissions, + mir: &mir_segments, + locals_at_scope: self.locals_at_scope, + }; + + Ok(finalizer.finalize_body(self.start_loc, body_span, mode)) + } + + // Used for tracking path hints of the current branches. + + fn get_path_hint(&self) -> Option { + self.current_branch_start.last().copied() + } + + fn push_branch_start(&mut self, location: Location) { + self.current_branch_start.push(location) + } + + fn pop_branch_start(&mut self, expecting: Location) { + if let Some(popped) = self.current_branch_start.pop() && popped != expecting { + report_unexpected!(self, "expecting popped location {expecting:?} but got {popped:?}") + } + } + + /// Determine whether the traversal should visited nested HIR nodes. + /// + /// This method is a sort of HACK to avoid picking apart nodes expanded from + /// macros, while visiting nodes expanded from expected desugarings (e.g. for / while loops). + fn span_of(&self, id: HirId) -> Span { + let hir = self.tcx.hir(); + let span = hir.span(id); + span.as_local(self.body.span).unwrap_or(span) + } + + fn body_value_id(&self) -> HirId { + let hir = self.tcx.hir(); + hir.body(self.body_id).value.hir_id + } + + fn get_node_entry(&self, hir_id: HirId) -> Option { + let mir_order = self + .ir_mapper + .get_mir_locations(hir_id, GatherDepth::Nested)?; + mir_order.entry_location() + } + + fn get_node_exit(&self, hir_id: HirId) -> Option { + let mir_order = self + .ir_mapper + .get_mir_locations(hir_id, GatherDepth::Nested)?; + + // HACK: shift the exit to the next successor if available. + // this way we capture the state changes for a single + // operation rather than having an off by one. + // TODO: a more elegant solution would be to have a way to + // specify at which execution point you want the permission + // state, before, middle, or after an instruction. This is + // similar to what the MIR does but it doesn't provide an + // after point, only a start and mid. + mir_order.exit_location().map(|e| { + self + .ir_mapper + .cleaned_graph + .location_successor(e) + .unwrap_or(e) + }) + } + + fn prettify_node(&self, hir_id: HirId) -> String { + let hir = self.tcx.hir(); + hir.node_to_string(hir_id) + } + + /// Open a conditional expression for branching. On success, returns + /// the exit `Location` of the given conditon. + /// + /// Examples, given a `EK::If(Expr, Expr, Option)`, the given condition expression should + /// be the first expression in the tuple, which is the condition. + /// For a `EK::Match(Expr, [Arm], ...)` the given condition should be the first expression + /// in the tuple which is the match condition. + fn expr_condition_prelude( + &mut self, + cnd: &'tcx hir::Expr, + expr: &'tcx hir::Expr, + ) -> Option { + // NOTE: first we need to walk and split the condition. In the + // case of a more complex condition expression, splitting this + // first will result in a split location closest to the `SwitchInt`. + self.visit_expr(cnd); + let Some(cnd_exit) = self.get_node_exit(cnd.hir_id).or_else(|| { + log::warn!( + "EXPR condition has no exit {} looking at expr entry", + self.prettify_node(cnd.hir_id) + ); + self.get_node_entry(expr.hir_id) + }) else { + log::warn!("cannot do EXPR prelude, aborting"); + return None; + }; + + invoke_internal!( + on_fail -> None, + self, + insert, + cnd_exit, + self.get_path_hint(), + self.span_of(cnd.hir_id) + ); + + Some(cnd_exit) + } + + /// Close the entire branching expression which had the condition exit. + /// + /// Here, the given expression should be the _entire_ `EK::If` or `EK::Match`. + fn expr_condition_postlude(&mut self, bid: BranchId, hir_id: HirId) { + log::warn!( + "flushing and closing branch steps:\n{}", + self.prettify_node(hir_id) + ); + + invoke_internal!(self, close_branch, bid); + } + + /// Inserts a step point after the specified `HirId`. This + /// method is generic and takes the raw span returned by the + /// `IRMapper`, if a node requires tweaking for the span this + /// should not be used. + fn insert_step_at_node_exit(&mut self, hir_id: HirId) { + if let Some(exit) = self.get_node_exit(hir_id) { + invoke_internal!( + self, + insert, + exit, + self.get_path_hint(), + self.span_of(hir_id) + ); + } else { + log::warn!( + "Node {} doesn't have an exit location.", + self.prettify_node(hir_id) + ); + } + } + + fn condition_produced_switchint(&self, expr: &'tcx hir::Expr) -> bool { + if let Some(exit) = self.get_node_exit(expr.hir_id) { + log::debug!( + "checking location {exit:?} to see if terminator is switchInt" + ); + self.ir_mapper.is_terminator_switchint(exit) + } else { + // If the IRMapper can't determine a single exit location that + // is most often caused by branching, in this case we just assume + // that a switchInt was procued. We could do something more robust + // if we see the need for it. + true + } + } + + // Factored out of the Visitor because this same logic is needed for + // EK::If and while loop desugarings, just with a different location + // to span mapping. + fn handle_expr_if( + &mut self, + expr: &'tcx hir::Expr, + cnd: &'tcx hir::Expr, + then: &'tcx hir::Expr, + else_opt: Option<&'tcx hir::Expr>, + entry_locs_to_spans: HashMap, + ) { + log::debug!( + "visiting EXPR-IF\n\tCND: {}\n\t\tTHEN: {}\n\t\tELSE: {}", + self.prettify_node(cnd.hir_id), + self.prettify_node(then.hir_id), + else_opt.map_or(String::from(""), |e| self.prettify_node(e.hir_id)) + ); + let expr_id = expr.hir_id; + let Some(cnd_exit) = self.expr_condition_prelude(cnd, expr) else { + return; + }; + + let mapper = self.ir_mapper; + // We use this default span because an ExprKind::If can produce branches + // that "don't exist" at the HIR-level. This happens when no else-branch + // is provided, therefore we chose this default span to match the end + // of the If expression itself. + let default_span = self.span_of(expr_id).shrink_to_hi(); + let branch_id = invoke_internal!( + self, + open_branch, + cnd_exit, + move |to: &mut Location| { + entry_locs_to_spans + .iter() + .find_map(|(&l, &span)| { + if mapper.ldominates(*to, l) { + *to = l; + Some(span) + } else { + None + } + }) + .unwrap_or(default_span) + } + ); + + if let Some(then_entry) = self.get_node_entry(then.hir_id) { + self.push_branch_start(then_entry); + self.visit_expr(then); + self.pop_branch_start(then_entry); + } else { + log::warn!( + "then-branch doesn't have entry {}", + self.prettify_node(then.hir_id) + ); + } + + if let Some(els) = else_opt { + if let Some(els_entry) = self.get_node_entry(els.hir_id) { + self.push_branch_start(els_entry); + self.visit_expr(els); + self.pop_branch_start(els_entry); + } else { + log::warn!( + "else-branch doesn't have entry {}", + self.prettify_node(els.hir_id) + ); + } + } + + self.expr_condition_postlude(branch_id, expr_id); + } + + fn handle_expr_match( + &mut self, + expr: &'tcx hir::Expr, + cnd: &'tcx hir::Expr, + arms: &'tcx [hir::Arm], + entry_locs_to_spans: HashMap, + ) { + let expr_id = expr.hir_id; + let Some(cnd_exit) = self.expr_condition_prelude(cnd, expr) else { + return; + }; + let mapper = self.ir_mapper; + let branch_id = invoke_internal!( + self, + open_branch, + cnd_exit, + move |to: &mut Location| { + entry_locs_to_spans + .iter() + .find_map(|(&l, &span)| { + if mapper.ldominates(*to, l) { + // Update the location to be the entry of the arm. + *to = l; + Some(span) + } else { + None + } + }) + .unwrap_or(Span::default()) + } + ); + + for arm in arms { + self.visit_arm(arm); + } + + self.expr_condition_postlude(branch_id, expr_id); + } +} + +impl<'a, 'tcx: 'a> HirVisitor<'tcx> for HirStepPoints<'a, 'tcx> { + type NestedFilter = nested_filter::All; + + fn nested_visit_map(&mut self) -> Self::Map { + self.tcx.hir() + } + + fn visit_body(&mut self, body: &'tcx hir::Body) { + intravisit::walk_body(self, body); + self.insert_step_at_node_exit(body.value.hir_id); + } + + fn visit_block(&mut self, block: &'tcx hir::Block) { + let scope = invoke_internal!(self, open_scope); + for stmt in block.stmts.iter() { + self.visit_stmt(stmt); + } + + if let Some(expr) = block.expr { + log::debug!("BLOCK contains final EXPR"); + self.visit_expr(expr); + self.insert_step_at_node_exit(expr.hir_id); + } + invoke_internal!(self, close_scope, scope); + } + + fn visit_stmt(&mut self, stmt: &'tcx hir::Stmt) { + use rustc_hir::StmtKind as SK; + + log::debug!( + "Starting analysis of STMT {}\n", + self.prettify_node(stmt.hir_id), + ); + + let scope = invoke_internal!(self, open_scope); + + if let SK::Local(local) = stmt.kind { + let places = self.ir_mapper.local_assigned_place(local); + let locals = places.into_iter().map(|p| p.local).collect::>(); + if !locals.is_empty() { + log::debug!("storing locals at scope {scope:?} {locals:?}"); + self.locals_at_scope.insert(scope, locals); + } + } + + intravisit::walk_stmt(self, stmt); + + // Close the scope before inserting the final steps. + invoke_internal!(self, close_scope, scope); + + self.insert_step_at_node_exit(stmt.hir_id); + } + + fn visit_expr(&mut self, expr: &'tcx hir::Expr) { + use hir::{ExprKind as EK, LoopSource, MatchSource, StmtKind as SK}; + match expr.kind { + EK::If(cnd, then, else_opt) => { + // For the generic case we can take the use the opening brace of each branch + // target as the span. + let mut entry_to_spans = HashMap::default(); + + // Insert the location and span for the then branch + if let Some(then_entry) = self.get_node_entry(then.hir_id) { + let then_span = self.span_of(then.hir_id).shrink_to_lo(); + entry_to_spans.insert(then_entry, then_span); + } + + // Insert the location and span for the else branch + if let Some(els) = else_opt && let Some(else_entry) = self.get_node_entry(els.hir_id) { + let else_span = self.span_of(els.hir_id).shrink_to_lo(); + entry_to_spans.insert(else_entry, else_span); + } + + self.handle_expr_if(expr, cnd, then, else_opt, entry_to_spans); + } + + // HACK: Special cases for ForLoop and While desugarings. + // + // These special cases are needed to _adjust the spans_. + // Example: + // ```ignore + // fn foo(mut s: String) { + // s.push_str("looping ") + // let b = &mut s; // - Table 1 - + // // b: +R +W + // // s: -R -W -O + // while true { /* open */ + // b.push_str("again... and "); + // } /* close */ // - Table 2 - + // // b: -R -W + // // s: +R +W +O + // s.push_str("done!"); + // println!("{s}"); + // } + // ``` + // If we don't adjust for the desugaring, "Table 2" would + // be placed on the line labeled "/* open */", but we want + // it to actually get placed at the end of the loop where + // it is depicted above. A similar adjustment is needed + // for `for` loops. + + // While loops need to be detected with the surrounding loop. + EK::Loop( + hir::Block { + stmts: [], + expr: + Some(hir::Expr { + kind: EK::If(cnd, then, Some(els)), + .. + }), + .. + }, + _, + LoopSource::While, + _, + ) => { + // For the generic case we can take the use the opening brace of each branch + // target as the span. + let mut entry_to_spans = HashMap::default(); + + // Insert the location and span for the then branch + if let Some(then_entry) = self.get_node_entry(then.hir_id) { + let then_span = self.span_of(then.hir_id).shrink_to_lo(); + entry_to_spans.insert(then_entry, then_span); + } + + // Insert the location and span for the else branch + if let Some(else_entry) = self.get_node_entry(els.hir_id) { + // NOTE: we adjust the span of the break block to + // be _after_ the loop. + let else_span = self.span_of(expr.hir_id).shrink_to_hi(); + entry_to_spans.insert(else_entry, else_span); + } + + self.handle_expr_if(expr, cnd, then, Some(els), entry_to_spans); + } + + EK::Loop( + hir::Block { + stmts: + [hir::Stmt { + kind: + SK::Expr(hir::Expr { + kind: EK::Match(cnd, arms @ [none, some], _), + .. + }), + .. + }], + expr: None, + .. + }, + _, + LoopSource::ForLoop, + _, + ) => { + let mut entry_to_spans = HashMap::default(); + + let loop_start = self.span_of(some.body.hir_id).shrink_to_lo(); + let loop_end = self.span_of(expr.hir_id).shrink_to_hi(); + + // Iterator::next => None, breaking out of the loop + if let Some(none_entry) = self.get_node_entry(none.body.hir_id) { + entry_to_spans.insert(none_entry, loop_end); + } + + // Iterator::next => Some(_), execute loop body + if let Some(some_entry) = self.get_node_entry(some.body.hir_id) { + entry_to_spans.insert(some_entry, loop_start); + } + + #[allow(clippy::needless_borrow)] + self.handle_expr_match(expr, cnd, &arms, entry_to_spans); + } + + // NOTE: if a match condition doesn't produce a `switchInt`, there + // is no need to open a scope for this case. This most + // commonly happens when there is a single arm (common for desugarings) + // but it can also happen if future arms are elided. However, we + // still want to show the steps at the arm locations. + EK::Match(cnd, [_], MatchSource::ForLoopDesugar) + if !self.condition_produced_switchint(cnd) => + { + log::debug!( + "Match condition didn't produce switchInt {}", + self.prettify_node(cnd.hir_id) + ); + intravisit::walk_expr(self, expr); + } + + // TODO this view of how a match branches is too simplistic, and + // doesn't accurately reflect reality. There could be many + // generated `switchInt`s or there could be none. + // Example: + // ```ignore + // match x { + // 0 => 1, + // 1 => 1, + // x => x, + // } + // ``` + // the above match block would generate NO `switchInt`, just + // a series of `goto`s. Contrasted with something such as: + // + // ```ignore + // match x { + // None => 1, + // Some(1) => 1, + // Some(x) => x, + // } + // ``` + // + // which will actually generate two `switchInt`s, one for the + // discriminant match and another for the inner integer check. + // These two cases are relatively simple, but branching for a + // generic match is complicated with the current internal API. + // What we would want, is automatic opening of a branch, + // this would make closing branches more difficult ... + // I'm(gavin) currently in thinking mode for this. + EK::Match(cnd, arms, _) => { + // This is the generic case and assumes no desugaring. + // For the span we want to pick the END of the matched pattern, + // but we choose the location as the entry to the arm body + // (after all bound variables have been assigned). + let entry_to_spans = arms + .iter() + .filter_map(|arm| { + let id = arm.body.hir_id; + self + .get_node_entry(id) + .map(|entry| (entry, self.span_of(arm.pat.hir_id).shrink_to_hi())) + }) + .collect::>(); + + self.handle_expr_match(expr, cnd, arms, entry_to_spans); + } + _ => { + intravisit::walk_expr(self, expr); + } + } + } + + // NOTE: it's impotant that arms handle path hinting + fn visit_arm(&mut self, arm: &'tcx hir::Arm) { + if arm.guard.is_some() { + // TODO: NYI. + report_unsupported!( + self, + "match arm guards are not yet supported {}", + self.prettify_node(arm.hir_id) + ); + } + + // We use the arm_entry for path hinting, because it's + // closer the the `switchInt`. + if let Some(arm_entry) = self.get_node_entry(arm.hir_id) { + self.push_branch_start(arm_entry); + + // We get the entry of the arm body (or before the arm guard), + // this is where any arm patterns will be initialized and bound. + if let Some(entry) = self.get_node_entry(arm.body.hir_id) { + let span = self.span_of(arm.hir_id).shrink_to_lo(); + invoke_internal!(self, insert, entry, self.get_path_hint(), span); + self.visit_expr(arm.body); + // self.insert_step_at_node_exit(arm.hir_id); + } else { + intravisit::walk_arm(self, arm); + } + + self.pop_branch_start(arm_entry); + } else { + log::warn!( + "match-arm doesn't have entry {}", + self.prettify_node(arm.hir_id) + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::{super::segmented_mir::test_exts::SegmentedMirTestExt, *}; + use crate::{analysis::ir_mapper::GatherMode, test_utils as tu}; + + macro_rules! compile_and_run { + ($code:expr) => { + tu::compile_normal($code, |tcx| { + tu::for_each_body(tcx, |body_id, wfacts| { + let body = &wfacts.body; + let mapper = IRMapper::new(tcx, body, GatherMode::IgnoreCleanup); + let mut visitor = HirStepPoints::make(&tcx, body, body_id, &mapper) + .expect("Failed to create stepper"); + visitor.visit_nested_body(body_id); + + if let Some(uf) = visitor.get_unsupported_feature() { + eprintln!("unsupported feature: {uf:?}"); + panic!("unsupported feature"); + } + + if let Some(ie) = visitor.get_internal_error() { + eprintln!("internal error: {ie:?}"); + panic!("internal error"); + } + + let smir = visitor + .mir_segments + .freeze() + .expect("Failed to freeze SegmentedMirBuilder"); + + if let Err(invalid) = smir.validate(&mapper) { + eprintln!("invalid reason: {invalid:?}"); + panic!("invalid smir"); + } + }) + }) + }; + } + + // Compile a piece of Rust code and assert that the generated SegmentedMir + // structure is valid. See `is_valid` for more details on what that means. + macro_rules! test_valid_segmented_mir { + (panics_with $s:expr => $name:ident, $code:expr) => { + #[test] + #[should_panic(expected = $s)] + fn $name() { + compile_and_run!($code); + } + }; + (should_panic => $name:ident, $code:expr) => { + #[test] + #[should_panic] + fn $name() { + compile_and_run!($code); + } + }; + ($name:ident, $code:expr) => { + #[test] + fn $name() { + compile_and_run!($code); + } + }; + } + + test_valid_segmented_mir!( + linear_stmts, + r#" +fn test() { + let a = String::from(""); + let b = &a; + let c = &&b; + println!("{c}"); + let d = &&&&&&c; + println!("{d} {}", 1 + 1 + 1 + 1 + 1 + 1); +} +"# + ); + + test_valid_segmented_mir!( + branch_simple, + r#" +fn test() { + let s = String::from(""); + + if true { + let b1 = &mut s; + b1.push_str("No!"); + } else { + let b1 = &mut s; + b1.push_str("Never!"); + } + + println!("{s}"); +} +"# + ); + + test_valid_segmented_mir!( + match_simple, + r#" +fn test(n: Option) -> i32 { + match n { + Some(n) => 1, + None => 0, + } +} +"# + ); + + test_valid_segmented_mir!( + match_with_child, + r#" +fn test(n: Option) -> i32 { + match n { + Some(0) => 1, + Some(n) => test(Some(n - 1)) * n, + None => 0, + } +} +"# + ); + + // ----------------------------------- + // Functions taken from weird_exprs.rs + // + // These merely test the resilience of + // the stepper, and none of them have + // been inspected to see if the visual + // output is worth anything. + + test_valid_segmented_mir!( + weird_exprs_strange, + r#" +fn strange() -> bool { let _x: bool = return true; } +"# + ); + + test_valid_segmented_mir!( + weird_exprs_funny, + r#" +fn funny() { + fn f(_x: ()) { } + f(return); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_what, + r#" +use std::cell::Cell; +fn what() { + fn the(x: &Cell) { + return while !x.get() { x.set(true); }; + } + let i = &Cell::new(false); + let dont = {||the(i)}; + dont(); + assert!((i.get())); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_zombiejesus, + r#" +fn zombiejesus() { + loop { + while (return) { + if (return) { + match (return) { + 1 => { + if (return) { + return + } else { + return + } + } + _ => { return } + }; + } else if (return) { + return; + } + } + if (return) { break; } + } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_notsure, + r#" +use std::mem::swap; +fn notsure() { + let mut _x: isize; + let mut _y = (_x = 0) == (_x = 0); + let mut _z = (_x = 0) < (_x = 0); + let _a = (_x += 0) == (_x = 0); + let _b = swap(&mut _y, &mut _z) == swap(&mut _y, &mut _z); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_canttouchthis, + r#" +fn canttouchthis() -> usize { + fn p() -> bool { true } + let _a = (assert!((true)) == (assert!(p()))); + let _c = (assert!((p())) == ()); + let _b: bool = (println!("{}", 0) == (return 0)); +} +"# + ); + + // XXX: The HIR constructs that turn into NOPs, e.g., the + // `loop { if break {} }` are not present in the + // simplified MIR, which currently causes a few issues. + test_valid_segmented_mir!( + panics_with "invalid smir" => + weird_exprs_angrydome, + r#" +fn angrydome() { + loop { if break { } } + let mut i = 0; + loop { i += 1; if i == 1 { match (continue) { 1 => { }, _ => panic!("wat") } } + break; } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_evil_lincoln, + r#" +fn evil_lincoln() { let _evil = println!("lincoln"); } +"# + ); + + test_valid_segmented_mir!( + weird_exprs_dots, + r#" +fn dots() { + assert_eq!(String::from(".................................................."), + format!("{:?}", .. .. .. .. .. .. .. .. .. .. .. .. .. + .. .. .. .. .. .. .. .. .. .. .. ..)); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_u8, + r#" +fn u8(u8: u8) { + if u8 != 0u8 { + assert_eq!(8u8, { + macro_rules! u8 { + (u8) => { + mod u8 { + pub fn u8<'u8: 'u8 + 'u8>(u8: &'u8 u8) -> &'u8 u8 { + "u8"; + u8 + } + } + }; + } + + u8!(u8); + let &u8: &u8 = u8::u8(&8u8); + ::u8(0u8); + u8 + }); + } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_fishy, + r#" +fn fishy() { + assert_eq!(String::from("><>"), + String::<>::from::<>("><>").chars::<>().rev::<>().collect::()); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_union, + r#" +fn union() { + union union<'union> { union: &'union union<'union>, } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_punch_card, + r#" +fn punch_card() -> impl std::fmt::Debug { + ..=..=.. .. .. .. .. .. .. .. .. .. .. ..=.. .. + ..=.. ..=.. .. .. .. .. .. .. .. .. ..=..=..=.. + ..=.. ..=.. ..=.. ..=.. .. ..=..=.. .. ..=.. .. + ..=..=.. .. ..=.. ..=.. ..=.. .. .. .. ..=.. .. + ..=.. ..=.. ..=.. ..=.. .. ..=.. .. .. ..=.. .. + ..=.. ..=.. ..=.. ..=.. .. .. ..=.. .. ..=.. .. + ..=.. ..=.. .. ..=..=.. ..=..=.. .. .. ..=.. .. +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_rmatch, + r#" + fn r#match() { + let val = match match match match match () { + () => () + } { + () => () + } { + () => () + } { + () => () + } { + () => () + }; + assert_eq!(val, ()); + } + "# + ); + + test_valid_segmented_mir!( + weird_exprs_i_yield, + r#" +fn i_yield() { + static || { + yield yield yield yield yield yield yield yield yield; + }; +} +"# + ); + + // XXX: arm guards are not currently supported. + test_valid_segmented_mir!( + panics_with "unsupported feature" => + weird_exprs_match_nested_if, + r#" +fn match_nested_if() { + let val = match () { + () if if if if true {true} else {false} {true} else {false} {true} else {false} => true, + _ => false, + }; + assert!(val); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_monkey_barrel, + r#" +fn monkey_barrel() { + let val = ()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=(); + assert_eq!(val, ()); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_𝚌𝚘𝚗𝚝𝚒𝚗𝚞𝚎, + r#" +fn 𝚌𝚘𝚗𝚝𝚒𝚗𝚞𝚎() { + type 𝚕𝚘𝚘𝚙 = i32; + fn 𝚋𝚛𝚎𝚊𝚔() -> 𝚕𝚘𝚘𝚙 { + let 𝚛𝚎𝚝𝚞𝚛𝚗 = 42; + return 𝚛𝚎𝚝𝚞𝚛𝚗; + } + assert_eq!(loop { + break 𝚋𝚛𝚎𝚊𝚔 (); + }, 42); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_function, + r#" +fn function() { + struct foo; + impl FnOnce<()> for foo { + type Output = foo; + extern "rust-call" fn call_once(self, _args: ()) -> Self::Output { + foo + } + } + let foo = foo () ()() ()()() ()()()() ()()()()(); +} +"# + ); + + // The match will desugar to something with an + // arm guard which are NYI. + test_valid_segmented_mir!( + panics_with "unsupported feature" => + weird_exprs_bathroom_stall, + r#" +fn bathroom_stall() { + let mut i = 1; + matches!(2, _|_|_|_|_|_ if (i+=1) != (i+=1)); + assert_eq!(i, 13); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_closure_matching, + r#" +fn closure_matching() { + let x = |_| Some(1); + let (|x| x) = match x(..) { + |_| Some(2) => |_| Some(3), + |_| _ => unreachable!(), + }; + assert!(matches!(x(..), |_| Some(4))); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_semisemisemisemisemi, + r#" +fn semisemisemisemisemi() { + ;;;;;;; ;;;;;;; ;;; ;;; ;; + ;; ;; ;;;; ;;;; ;; + ;;;;;;; ;;;;; ;; ;;;; ;; ;; + ;; ;; ;; ;; ;; ;; + ;;;;;;; ;;;;;;; ;; ;; ;; +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_useful_syntax, + r#" +fn useful_syntax() { + use {{std::{{collections::{{HashMap}}}}}}; + use ::{{{{core}, {std}}}}; + use {{::{{core as core2}}}}; +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_infcx, + r#" +fn infcx() { + pub mod cx { + pub mod cx { + pub use super::cx; + pub struct Cx; + } + } + let _cx: cx::cx::Cx = cx::cx::cx::cx::cx::Cx; +} +"# + ); +} diff --git a/crates/aquascope/src/analysis/stepper/mod.rs b/crates/aquascope/src/analysis/stepper/mod.rs index a7c1eccc8..f1be6929f 100644 --- a/crates/aquascope/src/analysis/stepper/mod.rs +++ b/crates/aquascope/src/analysis/stepper/mod.rs @@ -1,27 +1,32 @@ //! Analysis for the “Missing-at” relations. -mod find_steps; -mod segment_tree; +mod hir_steps; +#[allow(clippy::similar_names)] +mod segmented_mir; +mod table_builder; use std::collections::hash_map::Entry; -use anyhow::Result; -pub use find_steps::compute_permission_steps; +use anyhow::{bail, Result}; use fluid_let::fluid_let; -use rustc_data_structures::fx::FxHashMap as HashMap; -use rustc_middle::mir::Place; +use rustc_data_structures::{self, fx::FxHashMap as HashMap}; +use rustc_hir::intravisit::Visitor as HirVisitor; +use rustc_middle::mir::{Location, Place}; +use rustc_span::Span; use rustc_utils::source_map::range::CharRange; use serde::{Deserialize, Serialize}; use ts_rs::TS; use crate::analysis::{ - permissions::{Permissions, PermissionsData, PermissionsDomain}, + permissions::{ + Permissions, PermissionsCtxt, PermissionsData, PermissionsDomain, + }, AquascopeAnalysis, LoanKey, MoveKey, }; fluid_let!(pub static INCLUDE_MODE: PermIncludeMode); -#[derive(Debug, PartialEq, Eq, Clone, Copy, Deserialize, Serialize, Hash)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Hash)] pub enum PermIncludeMode { Changes, All, @@ -61,18 +66,34 @@ pub struct PermissionsLineDisplay { pub state: Vec, } -#[derive(Clone, PartialEq, Eq, Serialize, TS)] -#[serde(tag = "type")] -#[ts(export)] -pub enum ValueStep -where - A: Clone +pub trait Stepable: + Copy + + Clone + + std::fmt::Debug + + std::cmp::PartialEq + + std::cmp::Eq + + std::hash::Hash + + Serialize + + TS +{ +} + +impl Stepable for A where + A: Copy + + Clone + std::fmt::Debug + std::cmp::PartialEq + std::cmp::Eq + + std::hash::Hash + Serialize - + TS, + + TS { +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize, TS)] +#[serde(tag = "type")] +#[ts(export)] +pub enum ValueStep { High { value: A, }, @@ -83,15 +104,19 @@ where }, } -impl std::fmt::Debug for ValueStep -where - A: Clone - + std::fmt::Debug - + std::cmp::PartialEq - + std::cmp::Eq - + Serialize - + TS, -{ +impl ValueStep { + // TODO: this is a loose surface-level notion of symmetry. + fn is_symmetric_diff(&self, rhs: &Self) -> bool { + matches!( + (self, rhs), + (ValueStep::High { .. }, ValueStep::Low { .. }) + | (ValueStep::Low { .. }, ValueStep::High { .. }) + | (ValueStep::None { .. }, ValueStep::None { .. }) + ) + } +} + +impl std::fmt::Debug for ValueStep { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ValueStep::High { .. } => write!(f, "↑"), @@ -110,7 +135,7 @@ where // the default BoolStep can be taken. macro_rules! make_diff { ($base:ident => $diff:ident { $($i:ident),* }) => { - #[derive(Clone, PartialEq, Eq, Serialize, TS)] + #[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize, TS)] #[ts(export)] pub struct $diff { $( pub $i: ValueStep, )* @@ -134,7 +159,7 @@ impl std::fmt::Debug for PermissionsDiff { } } -#[derive(Clone, Serialize, TS, PartialEq, Eq)] +#[derive(Copy, Clone, Serialize, TS, PartialEq, Eq, Hash)] #[ts(export)] pub struct PermissionsDataDiff { pub is_live: ValueStep, @@ -166,6 +191,15 @@ impl PermissionsDataDiff { fn is_empty(&self) -> bool { self.permissions.is_empty() } + + fn is_symmetric_diff(&self, rhs: &PermissionsDataDiff) -> bool { + let p1 = &self.permissions; + let p2 = &rhs.permissions; + + p1.read.is_symmetric_diff(&p2.read) + && p1.write.is_symmetric_diff(&p2.write) + && p1.drop.is_symmetric_diff(&p2.drop) + } } impl Difference for bool { @@ -181,19 +215,13 @@ impl Difference for bool { } } -impl ValueStep -where - T: Clone + std::fmt::Debug + std::cmp::PartialEq + Eq + Serialize + TS, -{ +impl ValueStep { fn is_empty(&self) -> bool { matches!(self, Self::None { .. }) } } -impl Difference for Option -where - A: Clone + PartialEq + Eq + std::fmt::Debug + Serialize + TS, -{ +impl Difference for Option { type Diff = ValueStep; fn diff(&self, rhs: Option) -> Self::Diff { @@ -246,22 +274,91 @@ impl Difference for PermissionsData { impl<'tcx> Difference for &PermissionsDomain<'tcx> { type Diff = HashMap, PermissionsDataDiff>; fn diff(&self, rhs: &PermissionsDomain<'tcx>) -> Self::Diff { - self - .iter() - .fold(HashMap::default(), |mut acc, (place, p1)| { - let p2 = rhs.get(place).unwrap(); - let diff = p1.diff(*p2); - - match acc.entry(*place) { - Entry::Occupied(_) => { - panic!("Permissions step already in output for {place:?}"); - } - Entry::Vacant(entry) => { - entry.insert(diff); - } + let mut diffs = HashMap::default(); + + for (place, p1) in self.iter() { + let p2 = rhs.get(place).unwrap(); + let diff = p1.diff(*p2); + + match diffs.entry(*place) { + Entry::Occupied(_) => { + panic!("Permissions step already in output for {place:?}"); + } + Entry::Vacant(entry) => { + entry.insert(diff); } + } + } + + diffs + } +} - acc - }) +/// Represents a segment of the MIR control-flow graph. +/// +/// A `MirSegment` corresponds directly to locations where a permissions step +/// will be made. However, a segment is also control-flow specific. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct MirSegment { + pub from: Location, + pub to: Location, +} + +impl std::fmt::Debug for MirSegment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "MirSegment({:?} -> {:?})", self.from, self.to) } } + +impl MirSegment { + pub fn new(l1: Location, l2: Location) -> Self { + MirSegment { from: l1, to: l2 } + } + + /// A _rough_ approximation of the source span of the step. + pub fn span(&self, ctxt: &PermissionsCtxt) -> Span { + let lo = ctxt.location_to_span(self.from); + let hi = ctxt.location_to_span(self.to); + lo.with_hi(hi.hi()) + } + + pub fn into_diff<'tcx>( + self, + ctxt: &PermissionsCtxt<'_, 'tcx>, + ) -> HashMap, PermissionsDataDiff> { + let p0 = ctxt.location_to_point(self.from); + let p1 = ctxt.location_to_point(self.to); + let before = &ctxt.permissions_domain_at_point(p0); + let after = &ctxt.permissions_domain_at_point(p1); + before.diff(after) + } +} + +// ---------- +// Main entry + +pub fn compute_permission_steps<'a, 'tcx>( + analysis: &AquascopeAnalysis<'a, 'tcx>, +) -> Result> +where + 'tcx: 'a, +{ + let mode = INCLUDE_MODE.copied().unwrap_or(PermIncludeMode::Changes); + let ctxt = &analysis.permissions; + let ir_mapper = &analysis.ir_mapper; + let body = &ctxt.body_with_facts.body; + let _basic_blocks = body.basic_blocks.indices(); + let mut hir_visitor = + hir_steps::HirStepPoints::make(&ctxt.tcx, body, ctxt.body_id, ir_mapper)?; + hir_visitor.visit_nested_body(ctxt.body_id); + + if let Some(msg) = hir_visitor.get_unsupported_feature() { + bail!(msg); + } + + if let Some(fatal_error) = hir_visitor.get_internal_error() { + bail!(fatal_error); + } + + hir_visitor.finalize(analysis, mode) +} diff --git a/crates/aquascope/src/analysis/stepper/segment_tree.rs b/crates/aquascope/src/analysis/stepper/segment_tree.rs deleted file mode 100644 index 9ff5d09ab..000000000 --- a/crates/aquascope/src/analysis/stepper/segment_tree.rs +++ /dev/null @@ -1,375 +0,0 @@ -use anyhow::{bail, Result}; -use rustc_data_structures::{ - self, - fx::{FxHashMap as HashMap, FxHashSet as HashSet}, -}; -use rustc_middle::mir::{BasicBlock, BasicBlocks, Local, Location, Place}; -use rustc_span::Span; - -use super::*; -use crate::analysis::{ir_mapper::CleanedBody, permissions::PermissionsCtxt}; - -/// Represents a segment of the MIR control-flow graph. -/// -/// A `MirSegment` corresponds directly to locations where a permissions step -/// will be made. However, a segment is also control-flow specific. -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct MirSegment { - pub from: Location, - pub to: Location, -} - -/// The types of splits that can be performed on a [`SegmentTre::Single`]. -#[derive(Clone)] -pub enum SplitType { - /// A split of a segment that is not due to control flow. - /// Example, after each `Stmt` a step is created, this is simply - /// a step in a linear sequence. - Linear { - first: Box, - second: Box, - }, - - /// Split of a complex control flow. - /// For example, the `split_segments` of an `ExprKind::If` would be the segments - /// from the if condition, to the start of the then / else blocks. - /// The `join_segments` are all the those that end at the same join point. - /// - /// NOTE: any segment stored in the `splits` of a SplitType::ControlFlow - /// can not be split again, these are *atomic*. - ControlFlow { - splits: Vec, - joins: Vec, - }, -} - -/// A `SegmentTree` represents the control flow graph of a MIR `Body`. -/// It's used to keep track of the entire graph as it is sliced during -/// the permission steps analysis. -#[derive(Clone)] -pub enum SegmentTree { - /// An inner tree node with children. - Split { - segments: SplitType, - reach: MirSegment, - span: Span, - attached: Vec, - }, - - /// A leaf segment that is expected to be split again later. - Single { - segment: MirSegment, - span: Span, - attached: Vec, - }, -} - -/// Search result when trying to find the smallest enclosing segment for a location. -/// -/// NOTE: this is used under the assumption that the location cannot be the -/// ending location of a step (this would result in a zero distance step). -#[derive(Clone, Debug)] -pub enum SegmentSearchResult<'a> { - Enclosing(&'a SegmentTree), - StepExists(MirSegment, Span), - NotFound, -} - -// ------------------------------------------------ -// Debugging pretty printers - -impl std::fmt::Debug for MirSegment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "MirSegment({:?} -> {:?})", self.from, self.to) - } -} - -impl std::fmt::Debug for SegmentTree { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - fn print_loop( - f: &mut std::fmt::Formatter, - tree: &SegmentTree, - spaces: usize, - ) -> std::fmt::Result { - let indent_size = 4; - match tree { - SegmentTree::Single { - segment, - attached, - span, - .. - } => { - writeln!( - f, - "{}SegmentTree::Single: {segment:?} {span:?}", - " ".repeat(spaces) - )?; - writeln!( - f, - "{}-locals attached to end {:?}", - " ".repeat(spaces), - attached, - ) - } - SegmentTree::Split { - segments: SplitType::Linear { first, second }, - reach, - attached, - .. - } => { - writeln!( - f, - "{}SegmentTree::Split [LINEAR]: {reach:?}", - " ".repeat(spaces) - )?; - writeln!( - f, - "{}-locals attached to end {:?}", - " ".repeat(spaces), - attached, - )?; - print_loop(f, first, spaces + indent_size)?; - writeln!(f)?; - print_loop(f, second, spaces + indent_size)?; - writeln!(f)?; - - Ok(()) - } - - SegmentTree::Split { - segments: SplitType::ControlFlow { splits, joins }, - reach, - attached, - .. - } => { - writeln!( - f, - "{}SegmentTree::Split [CF]: {reach:?}", - " ".repeat(spaces) - )?; - writeln!( - f, - "{}-locals attached to end {:?}", - " ".repeat(spaces), - attached, - )?; - writeln!(f, "{}Splits:", " ".repeat(spaces))?; - for tree in splits.iter() { - print_loop(f, tree, spaces + indent_size)?; - writeln!(f)?; - } - writeln!(f)?; - - writeln!(f, "{}Joins:", " ".repeat(spaces))?; - for tree in joins.iter() { - print_loop(f, tree, spaces + indent_size)?; - writeln!(f)?; - } - - Ok(()) - } - } - } - - print_loop(f, self, 0) - } -} - -// ------------------------------------------------ -// Impls - -impl MirSegment { - pub fn new(l1: Location, l2: Location) -> Self { - MirSegment { from: l1, to: l2 } - } - - /// Expand the path through the segment to a full set of [`Location`]s. - fn squash_block_path( - &self, - basic_blocks: &BasicBlocks, - path: impl Iterator, - ) -> Vec { - path - .flat_map(|bb| { - let bbd = &basic_blocks[bb]; - let from = if bb == self.from.block { - self.from.statement_index - } else { - 0 - }; - - let to = if bb == self.to.block { - self.to.statement_index - } else { - bbd.statements.len() - }; - - (from ..= to).map(move |idx| Location { - block: bb, - statement_index: idx, - }) - }) - .collect::>() - } - - pub(crate) fn paths_along_segment( - &self, - graph: &CleanedBody, - ) -> Vec> { - graph.paths_from_to(self.from.block, self.to.block) - } - - fn spanned_locations(&self, graph: &CleanedBody) -> HashSet { - let block_paths = self.paths_along_segment(graph); - let body = graph.body(); - block_paths - .into_iter() - .flat_map(|path| { - self.squash_block_path(&body.basic_blocks, path.into_iter()) - }) - .collect::>() - } - - pub fn into_diff<'tcx>( - self, - ctxt: &PermissionsCtxt<'_, 'tcx>, - ) -> HashMap, PermissionsDataDiff> { - let p0 = ctxt.location_to_point(self.from); - let p1 = ctxt.location_to_point(self.to); - let before = &ctxt.permissions_domain_at_point(p0); - let after = &ctxt.permissions_domain_at_point(p1); - before.diff(after) - } -} - -impl SegmentTree { - pub fn new(body: MirSegment, span: Span) -> Self { - Self::Single { - segment: body, - span, - attached: vec![], - } - } - - /// Find a [`SegmentTree::Single`] node which matches *exactly* the given segment. - pub fn find_single( - &mut self, - segment: MirSegment, - ) -> Option<&mut SegmentTree> { - let node = &mut *self; - - match node { - SegmentTree::Single { segment: seg, .. } if *seg == segment => Some(node), - SegmentTree::Single { .. } => None, - SegmentTree::Split { - segments: SplitType::ControlFlow { joins, .. }, - .. - } => { - // NOTE: the split set is regarded as atomic so - // it isn't included in the search. - for s in joins.iter_mut() { - let r = s.find_single(segment); - if r.is_some() { - return r; - } - } - - None - } - - SegmentTree::Split { - segments: SplitType::Linear { first, second }, - .. - } => first - .as_mut() - .find_single(segment) - .or_else(|| second.as_mut().find_single(segment)), - } - } - - /// Replace a [`SegmentTree::Single`] node which matches *exactly* the given segment. - /// The subtree must fragment the [`MirSegment`] correctly, otherwise the tree - /// will enter an invalid state. - pub fn replace_single( - &mut self, - to_replace: MirSegment, - subtree: SegmentTree, - ) -> Result<()> { - // TODO better error handling here. - let node = self.find_single(to_replace); - - if node.is_none() { - bail!("the provided mir segment to replace doesn't exist {to_replace:?}"); - } - - let node = node.unwrap(); - - if let SegmentTree::Single { segment, .. } = node { - assert_eq!(to_replace, *segment); - } else { - bail!("SegmentTree::find_single can only return a Single variant. This is an implementation bug"); - } - - *node = subtree; - - Ok(()) - } - - pub(crate) fn subtree_contains( - &self, - location: Location, - graph: &CleanedBody, - ) -> bool { - let segment = match self { - SegmentTree::Split { reach, .. } => reach, - SegmentTree::Single { segment, .. } => segment, - }; - let locs = segment.spanned_locations(graph); - locs.contains(&location) - } - - /// Find the /leaf/ [`MirSegment`] and it's corresponding `Span` that enclose - /// `location`. The `location` is expected to be used as the end of step. - pub(crate) fn find_segment_for_end<'a>( - &'a self, - location: Location, - graph: &CleanedBody, - ) -> SegmentSearchResult<'a> { - match self { - SegmentTree::Single { segment, .. } if segment.to != location => { - SegmentSearchResult::Enclosing(self) - } - - SegmentTree::Single { segment, span, .. } => { - SegmentSearchResult::StepExists(*segment, *span) - } - - SegmentTree::Split { - segments: SplitType::Linear { first, second }, - .. - } => { - if first.subtree_contains(location, graph) { - first.find_segment_for_end(location, graph) - } else if second.subtree_contains(location, graph) { - second.find_segment_for_end(location, graph) - } else { - SegmentSearchResult::NotFound - } - } - - SegmentTree::Split { - segments: SplitType::ControlFlow { joins, .. }, - .. - } => - // NOTE: the split locations are atomic and cannot be split. - { - joins - .iter() - .find(|s| s.subtree_contains(location, graph)) - .map_or(SegmentSearchResult::NotFound, |next| { - next.find_segment_for_end(location, graph) - }) - } - } - } -} diff --git a/crates/aquascope/src/analysis/stepper/segmented_mir.rs b/crates/aquascope/src/analysis/stepper/segmented_mir.rs new file mode 100644 index 000000000..8ca7ed178 --- /dev/null +++ b/crates/aquascope/src/analysis/stepper/segmented_mir.rs @@ -0,0 +1,1030 @@ +//! Internal state for managing permissions steps. +//! +//! The `SegmentedMir` aids the stepper in making sure that +//! steps made are always _valid_. In this context a step is defined +//! as a `MirSegment`, a simple struct that contains a `from` and `to` +//! location defining the step. The finished segmented mir is valid if +//! it satisfies the following criteria: +//! +//! 1. All segments are valid (more on this later). +//! 2. Segments form a total cover of the body under analysis. +//! 3. No location is included in multiple steps (see exceptions to this below). +//! +//! Segment validity is the main crux of the above definition and this is +//! split into three separate definitions. There exist three different kinds +//! of segments (spiritually, they are the same in the code): +//! +//! - Linear segments: a segment representing a linear piece of control flow. +//! A linear segment has a single point of entry and a single exit. Formally, +//! this is defined as: +//! Given a `MirSegment { from, to }`, it is linear iff: +//! `from` dominates `to` and `to` post-dominates `from` +//! These segments are what we ultimately want. +//! +//! - Split segments: a segment representing the start of conditional control-flow. +//! These segments relax the definition of a linear segment, in that the `to` +//! location *does not* post-dominate `from`. These segments are important when +//! representing control-flow given by a `switchInt`. In brief, a `switchInt` +//! will have multiple jump targets based on its argument, and each one of these +//! targets will be made into a split segment, stepping `from` the `switchInt` +//! and stepping `to` the target location. +//! +//! - Join segments: a segment representing the close of conditional control-flow. +//! These segments are the opposite of split segments, and relax the definition +//! of a linear segment by lifting the requirement that `from` dominates `to`. After +//! control-flow has been split (by say, a `switchInt`) join segments represent the +//! steps needed to unify the control-flow again. +//! +//! Unless specified, the word "segment" or "step" always refers to a linear segment. +//! Whenever the stepper says "insert a step ending at location L", this will _always_ +//! result in a linear step as the other two variants need to be explicitly handled. +//! +//! To maintain validity we use a recursive tree that incrementally builds up sequences +//! of linear steps. The tree layout looks (roughly) as follows: +//! +//! ```text +//! type LinearSegment = MirSegment +//! type SplitSegment = MirSegment +//! type JoinSegment = MirSegment +//! +//! data ControlFlow = Linear LinearSegment +//! | Branch +//! { splits :: [SplitSegment] +//! , joins :: [JoinSegment] +//! , nested :: Collection +//! } +//! +//! data Collection = [ControlFlow] +//! ``` +//! +//! To build this tree we manage a set of `CollectionBuilder`s, these +//! store the last `Location` from a step, and only allow inserting a +//! linear step into a collection. The exact process won't be outlined here, +//! but the stepper will open a branch when it encounters an `if` or `match`, +//! this opening will then create a new builder for each branch target. Builders +//! are then destroyed when either (1) it has reached a stopping point as +//! previously specified by the stepper, or (2) the branch that spawned the builder +//! is being closed. +//! +//! There is a little more to the process than this, for example: making sure that +//! branches and segments are created within the natural structure of the MIR and only +//! inserting steps in previously "unstepped" areas. But for those really curious +//! feel free to start at the [`SegmentedMirBuilder::insert`] function and explore +//! from there. + +use anyhow::{anyhow, bail, ensure, Result}; +use rustc_data_structures::{ + frozen::Frozen, + fx::{FxHashMap as HashMap, FxHashSet as HashSet}, + graph::*, + transitive_relation::{TransitiveRelation, TransitiveRelationBuilder}, + unify::{InPlaceUnificationTable, UnifyKey}, +}; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_middle::mir::{BasicBlock, Location}; +use rustc_span::Span; + +use super::MirSegment; +use crate::analysis::ir_mapper::IRMapper; + +// -------------------------- +// Decls sections + +rustc_index::newtype_index! { + pub(super) struct SegmentId {} +} + +rustc_index::newtype_index! { + pub(super) struct BranchId {} +} + +rustc_index::newtype_index! { + /// Collections are groups of segments thare nest. + /// E.g., when a branch contains another branch. + /// These are controlled internally. + pub(super) struct CollectionId {} +} + +rustc_index::newtype_index! { + /// Scopes are controlled at the segment-level + /// and controlled by the caller. + pub(super) struct ScopeId {} +} + +rustc_index::newtype_index! { + pub(super) struct TableId {} +} + +impl UnifyKey for TableId { + type Value = (); + + fn index(&self) -> u32 { + self.as_u32() + } + + fn from_index(i: u32) -> Self { + Self::from_u32(i) + } + + fn tag() -> &'static str { + "TableId" + } +} + +lazy_static::lazy_static! { + static ref BASE_SCOPE: ScopeId = ScopeId::new(0); +} + +#[derive(Copy, Clone, Debug)] +#[allow(dead_code)] +enum LengthKind { + Bounded { + /// Entry location for the collection, location + /// must dominate all locations contained within the collection. + root: Location, + phi: Location, + }, + Unbounded { + /// Exit location (if it exists) where control flow must leave, + /// if a phi exists then it must post-dominate all locations + /// contained within the collection. + root: Location, + }, +} + +#[derive(Debug)] +pub(super) struct SegmentData { + pub(super) segment: MirSegment, + pub(super) span: Span, + pub(super) scope: ScopeId, +} + +#[derive(Debug)] +pub(super) struct BranchData { + table_id: TableId, + pub(super) reach: MirSegment, + + /// Split segments, `from` dominates `to` but `to` does not post-dominate `from`. + pub(super) splits: Vec, + + // NOTE: join segments aren't currently used for anything. Previously we + // had lots of complex logic dictating when the join steps should be + // included but through lots of testing it seemed that the visual results + // we wanted _never_ used the join steps. We still keep them around in + // case a counterexample to that is found, or until I(gavinleroy) can + // come up with a sufficient formal reason why we don't need them. + // See the documentation in `table_builder` for more details. + /// Join segments, `to` post-dominates `from` but `from` does not post-dominate `to`. + #[allow(dead_code)] + pub(super) joins: Vec, + + pub(super) nested: Vec, +} + +#[derive(Copy, Clone, Debug)] +pub(super) enum CFKind { + Linear(SegmentId), + Branch(BranchId), +} + +#[derive(Debug)] +pub(super) struct Collection { + pub(super) data: Vec, + kind: LengthKind, +} + +#[derive(Copy, Clone, Debug)] +struct CollectionBuilder { + collection: CollectionId, + current_location: Location, +} + +#[derive(Copy, Clone, Debug)] +struct BuilderIdx(usize); + +#[derive(Copy, Clone)] +enum FindResult { + None, + NonLinear(BranchId, Location), + Linear(BuilderIdx), +} + +#[derive(Debug, Default)] +struct OpenCollections(Vec); + +type BranchSpannerMap<'a> = + HashMap Span + 'a>>; + +pub(super) struct SegmentedMirBuilder<'a, 'tcx: 'a> { + mapper: &'a IRMapper<'a, 'tcx>, + first_collection: CollectionId, + root_mappings: BranchSpannerMap<'a>, + collections: IndexVec, + branches: IndexVec, + segments: IndexVec, + processing: OpenCollections, + branch_roots: InPlaceUnificationTable, + scope_graph: TransitiveRelationBuilder, + open_scopes: Vec, + next_scope: ScopeId, +} + +pub(super) struct SegmentedMir { + pub(super) first_collection: CollectionId, + collections: Frozen>, + branches: Frozen>, + segments: Frozen>, + scopes: TransitiveRelation, +} + +// -------------------------- +// Impl sections + +impl BranchData { + pub fn new(tid: TableId, root: Location, phi: Option) -> Self { + let to = phi.unwrap_or(root); + BranchData { + table_id: tid, + reach: MirSegment::new(root, to), + splits: Vec::default(), + joins: Vec::default(), + nested: Vec::default(), + } + } +} + +#[allow(dead_code)] +impl OpenCollections { + pub fn push(&mut self, c: CollectionBuilder) { + self.0.push(c) + } + + pub fn iter(&self) -> impl Iterator + '_ { + // Open collections are pushed on the end, but we want to search + // in the most recently pushed by reverse the Vec::iter + self.0.iter().rev() + } + + pub fn enumerate( + &self, + ) -> impl Iterator + '_ { + // Open collections are pushed on the end, but we want to search + // in the most recently pushed by reverse the Vec::iter + self + .0 + .iter() + .enumerate() + .map(|(i, o)| (BuilderIdx(i), o)) + .rev() + } + + pub fn iter_mut( + &mut self, + ) -> impl Iterator + '_ { + // Open collections are pushed on the end, but we want to search + // in the most recently pushed, thus using reversing. + self.0.iter_mut().rev() + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn drain_collections<'a, 'this: 'a>( + &'this mut self, + cids: &'a HashSet, + ) -> impl Iterator + 'a { + self.0.drain_filter(|cb| cids.contains(&cb.collection)) + } + + pub fn get(&self, i: BuilderIdx) -> &CollectionBuilder { + &self.0[i.0] + } + + pub fn get_mut(&mut self, i: BuilderIdx) -> &mut CollectionBuilder { + &mut self.0[i.0] + } + + pub fn clear(&mut self) { + self.0.clear() + } +} + +impl std::fmt::Debug for SegmentedMirBuilder<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "#") + } +} + +impl SegmentedMir { + pub(super) fn segments(&self) -> impl Iterator + '_ { + self.segments.iter().map(|sd| sd.segment) + } + + pub fn get_branch_scope(&self, bid: BranchId) -> ScopeId { + let branch = self.get_branch(bid); + let sid = branch.splits[0]; + let segment = self.get_segment(sid); + segment.scope + } + + pub fn get_collection(&self, cid: CollectionId) -> &Collection { + &self.collections[cid] + } + + pub fn get_segment(&self, sid: SegmentId) -> &SegmentData { + &self.segments[sid] + } + + pub fn get_branch(&self, bid: BranchId) -> &BranchData { + &self.branches[bid] + } + + /// Returns all ancestor scopes excluding `scope`. + pub fn parent_scopes( + &self, + scope: ScopeId, + ) -> impl Iterator + '_ { + self.scopes.reachable_from(scope).into_iter() + } +} + +enum GetSpanner<'a> { + GetFrom(BranchId), + InsertNew(Box Span + 'a>), +} + +impl<'a, 'tcx: 'a> SegmentedMirBuilder<'a, 'tcx> { + pub fn make(mapper: &'a IRMapper<'a, 'tcx>) -> Self { + let from = mapper.cleaned_graph.start_node().start_location(); + + let mut collections = IndexVec::new(); + + // We start with an empty linear collection. + // XXX: we could also try to find the exit location for the + // entire body but having this information isn't useful + // for the end of the body. Phi nodes are used to make + // sure we don't accidentally jump past the end of a + // join but with the return there isn't anything after. + let first_collection = collections.push(Collection { + data: Vec::default(), + kind: LengthKind::Unbounded { root: from }, + }); + + let mut this = Self { + first_collection, + mapper, + root_mappings: HashMap::default(), + collections, + branches: IndexVec::default(), + segments: IndexVec::default(), + processing: OpenCollections::default(), + branch_roots: InPlaceUnificationTable::default(), + scope_graph: TransitiveRelationBuilder::default(), + // NOTE: this maintains that there is always + // an open scope that the visitor cannot close. + open_scopes: vec![*BASE_SCOPE], + next_scope: BASE_SCOPE.plus(1), + }; + + this.processing.push(CollectionBuilder { + collection: first_collection, + current_location: mapper.cleaned_graph.start_node().start_location(), + }); + + this + } + + fn finish_first_collection(&mut self) -> Result<()> { + ensure!(self.processing.len() == 1, "More than one collection open"); + self.processing.clear(); + Ok(()) + } + + pub fn freeze(mut self) -> Result { + self.finish_first_collection()?; + + Ok(SegmentedMir { + first_collection: self.first_collection, + segments: Frozen::freeze(self.segments), + branches: Frozen::freeze(self.branches), + collections: Frozen::freeze(self.collections), + scopes: self.scope_graph.freeze(), + }) + } + + fn next_scope(&mut self) -> ScopeId { + let next = self.next_scope; + // The scope graph is used to find _parent scopes_. + self.scope_graph.add(next, self.current_scope()); + self.next_scope.increment_by(1); + next + } + + // ------------------------------------------------ + // Scope operations + // + // NOTE: scopes are controlled by the HIR Visitor + // so we don't need to sanitize them at all. + // They return Results to match the interface + // of everything else though. + + // NOTE: After starting a body analysis this should never be None. + fn current_scope(&self) -> ScopeId { + *self.open_scopes.last().unwrap() + } + + pub fn open_scope(&mut self) -> Result { + let next_scope = self.next_scope(); + self.open_scopes.push(next_scope); + Ok(next_scope) + } + + pub fn close_scope(&mut self, idx: ScopeId) -> Result<()> { + ensure!(idx != *BASE_SCOPE, "cannot close base scope"); + + let last_open = self.open_scopes.last().ok_or(anyhow!("no open scopes"))?; + + ensure!( + *last_open == idx, + "closing wrong scope expected: {last_open:?} given: {idx:?}" + ); + + self.open_scopes.pop(); + Ok(()) + } + + // ----------------- + // Branch operations + + /// Finds the basic block that is the last post-dominator of the successors of `root`. + fn least_post_dominator(&self, root: BasicBlock) -> Option { + log::debug!("Finding the least post-dominator for root {root:?}"); + let mapper = &self.mapper; + + // Find all basic blocks that are reachable from the root. + let reachable = mapper + .cleaned_graph + .depth_first_search(root) + .filter(|&to| mapper.dominates(root, to)) + .collect::>(); + + // Find the blocks that is the _most_ post-dominating, + // this is a point that must post-dominate everything else. + let most_post_dominating = reachable + .iter() + .find(|&can| reachable.iter().all(|&n| mapper.post_dominates(*can, n)))?; + + // If a block dominates the "most post-dominator" that means that this + // block also post-dominates all branches that occur after the root. + // We exclude the (1) root itself, and (2) any false edges. False edges + // are common in loop lowering but the borrowck semantics indicate that + // we should consider points *after* the false edges as having left the branches. + let candidate_leasts = reachable + .iter() + .filter(|&can| { + *can != root + && !mapper.cleaned_graph.is_false_edge(*can) + && mapper.dominates(*can, *most_post_dominating) + }) + .collect::>(); + + // The least post-dominator dominates all the other post-dominators. + candidate_leasts + .iter() + .find(|&can| { + candidate_leasts + .iter() + .all(|&n| mapper.dominates(**can, *n)) + }) + .copied() + .copied() + } + + fn mk_branch( + &mut self, + location: Location, + get_span: GetSpanner<'a>, + ) -> Result { + let mapper = &self.mapper; + let scope = self.current_scope(); + + // The convergence of all branching paths. + let phi_opt = self + .least_post_dominator(location.block) + .map(|bb| bb.start_location()); + + log::debug!("Chosen least-post-dominator: {phi_opt:?}"); + + let builder_opt = self + .processing + .iter_mut() + .find(|cb| mapper.ldominates(cb.current_location, location)); + + let Some(builder) = builder_opt else { + bail!("no open collection dominates root location {location:?}"); + }; + + ensure!( + builder.current_location == location, + "opening a branch missed a step, expected {:?} given: {:?}", + builder.current_location, + location + ); + + // Make a new branch + let tid = self.branch_roots.new_key(()); + let bid = self.branches.push(BranchData::new(tid, location, phi_opt)); + let branch = &mut self.branches[bid]; + + // Save the Location -> Span mappings under this root BranchId. + let get_span = match get_span { + GetSpanner::InsertNew(b) => { + self.root_mappings.insert(bid, b); + &self.root_mappings[&bid] + } + GetSpanner::GetFrom(bid) => &self.root_mappings[&bid], + }; + + // Push the new Branch as a control flow kind on + // the current collection's data set. + self.collections[builder.collection] + .data + .push(CFKind::Branch(bid)); + + let length_kind = if let Some(phi) = phi_opt { + builder.current_location = phi; + LengthKind::Bounded { + root: location, + phi, + } + } else { + // TODO: how should we update the collection if there + // isn't a phi? My current feeling is that we should + // just close the collection. + LengthKind::Unbounded { root: location } + }; + + // For each of the target BasicBlocks of the switchInt: + for sblock in mapper.cleaned_graph.successors(location.block) { + // 1. insert the split segment into the branch + let mut to = sblock.start_location(); + let span = get_span(&mut to); + let sid = self.segments.push(SegmentData { + segment: MirSegment::new(location, to), + span, + scope, + }); + branch.splits.push(sid); + + // 2. Open a new Collection with it's starting + // location at the branch target location. + let cid = self.collections.push(Collection { + data: Vec::default(), + kind: length_kind, + }); + + // 3. Store this new collection in the branch middle section. + branch.nested.push(cid); + + // 4. Put a new collection builder on the open collection stack. + self.processing.push(CollectionBuilder { + collection: cid, + current_location: to, + }); + } + + Ok(bid) + } + + /// Opens a branch of control flow rooted at `location`. + /// + /// The function implicitly adds a new segment for all split steps + /// and `get_span` should return the associated Span for these split steps. + pub fn open_branch( + &mut self, + location: Location, + get_span: impl Fn(&mut Location) -> Span + 'a, + ) -> Result { + log::debug!("opening user initiated branch at {location:?}"); + log::debug!("open branches BEFORE {:#?}", self.processing); + let r = self.mk_branch(location, GetSpanner::InsertNew(Box::new(get_span))); + log::debug!("open branches AFTER {:#?}", self.processing); + r + } + + fn open_child_branch( + &mut self, + parent: BranchId, + root: Location, + ) -> Result<()> { + log::debug!("opening implicit branch at {root:?}"); + let child = self.mk_branch(root, GetSpanner::GetFrom(parent))?; + let parent_tid = self.branches[parent].table_id; + let child_tid = self.branches[child].table_id; + self.branch_roots.union(parent_tid, child_tid); + Ok(()) + } + + /// Closes a branch of control flow with an origin root of `location`. + /// + /// Contrary to previous implementations, the function does not implicitly + /// add a new segment for all split steps. + pub fn close_branch(&mut self, bid: BranchId) -> Result<()> { + let table_root = self.branches[bid].table_id; + + let branches_to_close = self + .branches + .iter_enumerated() + .filter_map(|(bid, bd)| { + (table_root == self.branch_roots.find(bd.table_id)).then_some(bid) + }) + .collect::>(); + + for bid in branches_to_close.into_iter() { + let branch = &mut self.branches[bid]; + + let nested_collections = + branch.nested.iter().copied().collect::>(); + + let closed_builders = + self.processing.drain_collections(&nested_collections); + + log::debug!( + "closing builders: {:#?}", + closed_builders.collect::>() + ); + } + + log::debug!("State after closing branches {:#?}", self.processing); + + Ok(()) + } + + fn find_containing_branch(&self, cid: CollectionId) -> Option { + self + .branches + .iter_enumerated() + .find_map(|(bid, branch)| branch.nested.contains(&cid).then_some(bid)) + } + + /// Search through the list of open builders and return the one that can + /// be used to insert a new step ending at `location`. + fn find_suitable_collection(&mut self, location: Location) -> FindResult { + let mapper = &self.mapper; + + // We can insert into a collection where the last location + // was the dominates the new location to insert. + let builder_opt = self.processing.enumerate().find_map(|(i, cb)| { + log::debug!("Trying to find open collection: {cb:?}"); + mapper + .ldominates(cb.current_location, location) + .then_some((i, cb)) + }); + + // No collection found + let Some((builder_i, builder)) = builder_opt else { + return FindResult::None; + }; + + // Return the found builder to create a new linear step. + if mapper.lpost_dominates(location, builder.current_location) { + log::debug!( + "location post-dominates builder: {location:?} {:?} {:?}", + builder.current_location, + builder_i + ); + return FindResult::Linear(builder_i); + } + + // Fallback case for when we want to open an implicit branch. However, + // if there doesn't exist a parent branch, this is just an internal error. + match self.find_containing_branch(builder.collection) { + None => { + log::error!("couldn't find branch containing {:?}", builder.collection); + FindResult::None + } + Some(bid) => FindResult::NonLinear(bid, builder.current_location), + } + } + + // ---------- + // Insertions + + /// Insert a step ending at the given `Location`. + /// + /// It's the `SegmentedMir`s job to find out where the step came from, + /// in the case of ambiguity the given path hint can be used, this + /// proves most usefull when an implicit branch child needs to be spawned. + /// See the doc comment for further details. + pub fn insert( + &mut self, + location: Location, + path_hint: Option, + span: Span, + ) -> Result<()> { + log::debug!( + "starting insertion with hint {path_hint:?} at {location:?} \ninto: {:?}", + self.processing + ); + + match self.find_suitable_collection(location) { + // BAD case, no dominating locations where we can insert. + // + // XXX: returning an internal error here is too limiting. It seems + // that if control-flow constructs are (mis)-used, then the MIR + // is already more simplified than we would expect. This approach + // siliently ignores these insertions, but we leave a log warning + // to help debugging if something bad happens. + // + // This was changed from an Error with the introduction + // of the weird expr test cases. Making this change has not + // knowingly made previously failing test cases pass, nor has it + // affected the steps produced by the test suite. + FindResult::None => { + log::warn!( + "no suitable collection for location {location:?} {:#?}", + self.processing + ); + + Ok(()) + } + + // RARE case: spawn a new child branch and retry the insert. + // These automatic branches are used to handle match expressions + // that compile to a series of `switchInt`s. + FindResult::NonLinear(parent, branch_loc) => { + self.open_child_branch(parent, branch_loc)?; + self.insert(location, path_hint, span) + } + + // COMMON case: we can insert a linear segment into the found builder. + FindResult::Linear(builder_idx) => { + let scope = self.current_scope(); + let builder = self.processing.get_mut(builder_idx); + let collection = &mut self.collections[builder.collection]; + + let mut insert_to = |to| { + let segment = MirSegment::new(builder.current_location, to); + let segment_data = SegmentData { + segment, + span, + scope, + }; + log::debug!( + "Inserting {segment:?} into builder {builder:?} {builder_idx:?}" + ); + + let segid = self.segments.push(segment_data); + collection.data.push(CFKind::Linear(segid)); + builder.current_location = to; + }; + + match collection.kind { + // If the step attempts to go past its previously computed bound + // we will cut it short. I(gavinleroy) haven't yet seen this happen, + // but in theory it's possible and is bad because it bypasses the + // branching mechanisms. + LengthKind::Bounded { phi, .. } + if self.mapper.ldominates(phi, location) => + { + log::error!( + "Linear insert is stepping past the join point {location:?} {phi:?}" + ); + + insert_to(phi) + } + + _ => insert_to(location), + } + + Ok(()) + } + } + } +} + +#[cfg(test)] +pub(crate) mod test_exts { + use rustc_data_structures::{ + captures::Captures, graph::iterate::post_order_from_to, + }; + use rustc_middle::mir::BasicBlockData; + + use super::*; + + pub trait SegmentedMirTestExt { + fn validate(&self, mapper: &IRMapper) -> Result<(), InvalidReason>; + } + + #[derive(Debug)] + pub enum InvalidReason { + MissingLocations { + missing: Vec, + }, + // DuplicateLocation { + // at: Location, + // }, + InvalidSegment { + segment: MirSegment, + kind: BadSegmentKind, + }, + } + + #[derive(Debug)] + #[allow(clippy::enum_variant_names)] + pub enum BadSegmentKind { + SplitNoDom, + JoinNoPostDom, + LinearNoDom, + LinearNoPostDom, + } + + fn explode_block<'a, 'tcx: 'a>( + bb: BasicBlock, + block: &'a BasicBlockData<'tcx>, + from: Option, + to: Option, + ) -> impl Iterator + Captures<'tcx> + 'a { + // End is an inclusive index. + let start = from.unwrap_or(0); + let end = to.unwrap_or(block.statements.len()); + (start ..= end).map(move |i| Location { + block: bb, + statement_index: i, + }) + } + + impl MirSegment { + fn explode<'a, 'tcx: 'a>( + self, + mapper: &'a IRMapper<'a, 'tcx>, + ) -> impl Iterator + Captures<'tcx> + 'a { + let sb = self.from.block; + let eb = self.to.block; + let graph = &mapper.cleaned_graph; + let mut block_path = post_order_from_to(graph, sb, Some(eb)); + // The target block is never added in the post-order. + block_path.push(eb); + + block_path.into_iter().flat_map(move |bb| { + let body = &mapper.cleaned_graph.body(); + let from = (bb == sb).then_some(self.from.statement_index); + let to = (bb == eb).then_some(self.to.statement_index); + explode_block(bb, &body.basic_blocks[bb], from, to) + }) + } + } + + impl SegmentedMir { + fn is_valid_collection( + &self, + cid: CollectionId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let collection = self.get_collection(cid); + for kind in collection.data.iter() { + match kind { + CFKind::Linear(sid) => self.is_valid_segment(*sid, ssf, mapper)?, + CFKind::Branch(bid) => self.is_valid_branch(*bid, ssf, mapper)?, + } + } + + Ok(()) + } + + fn is_valid_split_segment( + &self, + sid: SegmentId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let SegmentData { segment: s, .. } = self.get_segment(sid); + + if !mapper.ldominates(s.from, s.to) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::SplitNoDom, + }); + } + + for at in s.explode(mapper) { + ssf.insert(at); + } + + Ok(()) + } + + fn is_valid_join_segment( + &self, + sid: SegmentId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let SegmentData { segment: s, .. } = self.get_segment(sid); + + if !mapper.lpost_dominates(s.to, s.from) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::JoinNoPostDom, + }); + } + + for at in s.explode(mapper) { + ssf.insert(at); + } + + Ok(()) + } + + fn is_valid_segment( + &self, + sid: SegmentId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let SegmentData { segment: s, .. } = self.get_segment(sid); + if !mapper.ldominates(s.from, s.to) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::LinearNoDom, + }); + } + + if !mapper.lpost_dominates(s.to, s.from) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::LinearNoPostDom, + }); + } + + for at in s.explode(mapper) { + ssf.insert(at); + } + + Ok(()) + } + + fn is_valid_branch( + &self, + bid: BranchId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let branch = self.get_branch(bid); + + for &sid in branch.splits.iter() { + self.is_valid_split_segment(sid, ssf, mapper)?; + } + + for &sid in branch.joins.iter() { + self.is_valid_join_segment(sid, ssf, mapper)?; + } + + for &cid in branch.nested.iter() { + self.is_valid_collection(cid, ssf, mapper)?; + } + + Ok(()) + } + } + + impl SegmentedMirTestExt for SegmentedMir { + /// See the module documentation for a sense of what valid means. Here + /// the below three basic things are checked. In the future, these guarantees + /// will hopefully only ever get stronger, and never weaker. + /// + /// 1. All segments are valid regarding where they appear in the collection. + /// 2. The segments form a total cover of the body. + /// 3. At each branch location (`switchInt`) there must exist a split segment + /// for each possible branch target. + fn validate(&self, mapper: &IRMapper) -> Result<(), InvalidReason> { + let body = &mapper.cleaned_graph.body(); + let seen_so_far = &mut HashSet::default(); + + let all_locations = mapper + .cleaned_graph + .blocks() + .flat_map(|block| { + explode_block(block, &body.basic_blocks[block], None, None) + }) + .collect::>(); + + self.is_valid_collection(self.first_collection, seen_so_far, mapper)?; + let missing = all_locations + .difference(&*seen_so_far) + .copied() + .collect::>(); + if missing.is_empty() { + Ok(()) + } else { + Err(InvalidReason::MissingLocations { missing }) + } + } + } +} diff --git a/crates/aquascope/src/analysis/stepper/table_builder.rs b/crates/aquascope/src/analysis/stepper/table_builder.rs new file mode 100644 index 000000000..5625d18b3 --- /dev/null +++ b/crates/aquascope/src/analysis/stepper/table_builder.rs @@ -0,0 +1,403 @@ +//! Convert permissions steps into tables viewable by the frontend. + +use rustc_data_structures::{ + self, + fx::{FxHashMap as HashMap, FxHashSet as HashSet}, +}; +use rustc_middle::mir::{Local, Location, Place}; +use rustc_span::Span; +use rustc_utils::{test_utils::DUMMY_CHAR_RANGE, PlaceExt, SpanExt}; + +use super::{segmented_mir::*, *}; +use crate::{analysis::permissions::PermissionsCtxt, errors}; + +/// A single unprocessed table, mapping Places to their differences for a MirSegment. +#[derive(Debug)] +pub(super) struct Table<'tcx> { + span: Span, + segment: MirSegment, + data: HashMap, PermissionsDataDiff>, +} + +/// A series of tables, identified by the _ending location_ of the step. +/// +/// Except in branchess, ending locations should only contains a +/// single table. These tables are currently collapsed into a single +/// larger table and shows per-line, though, this restriction could +/// be relaxed in the future. +/// +/// See [`prettify_permission_steps`] for how tables get merged. +pub(super) type Tables<'tcx> = HashMap>>; + +pub(super) struct TableBuilder<'a, 'tcx: 'a> { + pub(super) analysis: &'a AquascopeAnalysis<'a, 'tcx>, + pub(super) ctxt: &'a PermissionsCtxt<'a, 'tcx>, + pub(super) mir: &'a SegmentedMir, + pub(super) locals_at_scope: HashMap>, +} + +#[allow(clippy::similar_names)] +impl<'a, 'tcx: 'a> TableBuilder<'a, 'tcx> { + pub(super) fn finalize_body( + &self, + start_loc: Location, + body_span: Span, + mode: PermIncludeMode, + ) -> Vec { + let first_point = self.ctxt.location_to_point(start_loc); + let first_domain = &self.ctxt.permissions_domain_at_point(first_point); + let empty_domain = &self.ctxt.domain_bottom(); + let body_open_brace = body_span.shrink_to_lo(); + + // Upon entry, the function parameters are already "live". But we want to + // special case this, and show that they "come alive" at the opening brace. + let first_diff = empty_domain.diff(first_domain); + + // Insert a segment into a table filtering defined places. + let mut diffs = Tables::default(); + + // We do an unchecked insert here to avoid + // the segment from getting filtered because the + // segment from and to locations are equal. + let seg = MirSegment::new(start_loc, start_loc); + diffs.entry(seg.to).or_default().push(Table { + segment: seg, + span: body_open_brace, + data: first_diff, + }); + self.insert_collection(&mut diffs, self.mir.first_collection); + + prettify_permission_steps(self.analysis, diffs, mode) + } + + fn locals_to_filter(&self, scope: ScopeId) -> HashSet { + self + .mir + .parent_scopes(scope) + .filter_map(|sid| self.locals_at_scope.get(&sid)) + .flatten() + .copied() + .collect::>() + } + + fn insert_collection(&self, result: &mut Tables<'tcx>, cid: CollectionId) { + let collection = self.mir.get_collection(cid); + + for &part in collection.data.iter() { + match part { + CFKind::Linear(seg_id) => self.insert_segment(result, seg_id), + CFKind::Branch(branch_id) => self.insert_branch(result, branch_id), + } + } + } + + fn insert_segment(&self, result: &mut Tables<'tcx>, sid: SegmentId) { + let ctxt = &self.ctxt; + let &SegmentData { + segment, + span, + scope, + } = self.mir.get_segment(sid); + + let to_filter = self.locals_to_filter(scope); + + if segment.from == segment.to { + return; + } + + let p0 = ctxt.location_to_point(segment.from); + let p1 = ctxt.location_to_point(segment.to); + let before = &ctxt.permissions_domain_at_point(p0); + let after = &ctxt.permissions_domain_at_point(p1); + let mut diff = before.diff(after); + + let removed = diff + .drain_filter(|place, _| to_filter.contains(&place.local)) + .collect::>(); + + if !removed.is_empty() { + log::debug!( + "removed domain places due to attached filter at {:?} {:?}", + segment.to, + removed + ); + } + + let table = Table { + segment, + span, + data: diff, + }; + + log::info!("saving segment diff {segment:?}"); + result.entry(segment.to).or_default().push(table); + } + + // NOTE: when inserting a branch we currently ignore join steps. Within the + // function the previous code is left commented out. It was left in case + // we need to quickly bring it back, but through testing I found + // it was a lot of complex logic that removed all join steps, every time. + // Therefore, to save time, we just ignore them! We did this filtering + // to remove any weird permissions changes that were branch sensitive in + // order to avoid showing the same change in permissions multiple times. + // Should we decide to change this then this code will become relevant again. + fn insert_branch(&self, result: &mut Tables<'tcx>, bid: BranchId) { + let BranchData { + reach, + splits, + // joins, + nested, + .. + } = self.mir.get_branch(bid); + + let mut entire_diff = reach.into_diff(self.ctxt); + + log::debug!( + "Inserting Branched Collection {:?}:\n\tsplits: {:?}\n\tmiddle: {:?}", + reach, + splits, + nested + ); + + let mut temp_middle = Tables::default(); + // let mut temp_joins = Tables::default(); + + for &sid in splits.iter() { + self.insert_segment(&mut temp_middle, sid); + } + + for &cid in nested.iter() { + self.insert_collection(&mut temp_middle, cid); + } + + // for &sid in joins.iter() { + // self.insert_segment(&mut temp_joins, sid); + // } + + // Find the locals which were filtered from all scopes. In theory, + // `all_scopes` should contains the same scope, copied over, + // but the SegmentedMir doesn't enforce this and there's no + // scope attached to collections. + let scope_here = self.mir.get_branch_scope(bid); + let all_attached = self + .locals_at_scope + .get(&scope_here) + .map(|v| v.iter()) + .unwrap_or_default() + .collect::>(); + + let attached_here = entire_diff + .drain_filter(|place: &Place, _| all_attached.contains(&place.local)) + .collect::>(); + + // let diffs_in_tables = |tbls: &Tables| { + // tbls + // .iter() + // .flat_map(|(_, v)| v.iter().flat_map(|tbl| tbl.data.values())) + // .copied() + // .collect::>() + // }; + + // Flatten all tables to the unique `PermissionsDataDiff`s + // that exist within them. + + // let diffs_in_branches = diffs_in_tables(&mut temp_middle); + // for (_, v) in temp_joins.iter_mut() { + // for tbl in v.iter_mut() { + // let drained = tbl + // .data + // .drain_filter(|_, diff| diffs_in_branches.contains(diff)) + // .map(|(p, _)| p) + // .collect::>(); + // log::debug!("diffs at join loc removed for redundancy {drained:#?}"); + // } + // } + + result.extend(temp_middle); + // result.extend(temp_joins); + + // Attach filtered locals + result.entry(reach.to).or_default().push(Table { + span: reach.span(self.ctxt), + segment: *reach, + data: attached_here, + }); + } +} + +// Prettify, means: +// - Remove all places that are not source visible +// - Remove all tables which are empty +// - Convert Spans to Ranges +#[allow(clippy::if_not_else)] +pub(super) fn prettify_permission_steps<'tcx>( + analysis: &AquascopeAnalysis<'_, 'tcx>, + perm_steps: Tables<'tcx>, + mode: PermIncludeMode, +) -> Vec { + let ctxt = &analysis.permissions; + let tcx = ctxt.tcx; + let body = &ctxt.body_with_facts.body; + + let should_keep = |p: &PermissionsDataDiff| -> bool { + !(matches!(p.is_live, ValueStep::None { value: Some(false) }) + || (mode == PermIncludeMode::Changes && p.is_empty())) + }; + + macro_rules! place_to_string { + ($p:expr) => { + $p.to_string(tcx, body) + .unwrap_or_else(|| String::from("")) + }; + } + + let first_error_span_opt = + errors::get_span_of_first_error(ctxt.def_id.expect_local()) + .and_then(|s| s.as_local(ctxt.body_with_facts.body.span)); + let source_map = tcx.sess.source_map(); + + let mut semi_filtered = HashMap::< + usize, + Vec<(MirSegment, Span, Vec<(Place<'tcx>, PermissionsDataDiff)>)>, + >::default(); + + // Goal: filter out differences for Places that + // aren't source-visible. As well as those that come + // after the first error span. + // Group these intermediate tables by line numbers to make + // collapsing them easier. + for (_, v) in perm_steps.into_iter() { + for Table { + segment, + span, + data, + } in v.into_iter() + { + // Attach the span to the end of the line. Later, all permission + // steps appearing on the same line will be combined. + let span = source_map.span_extend_to_line(span).shrink_to_hi(); + let entries = data + .into_iter() + .filter(|(place, diff)| { + place.is_source_visible(tcx, body) && should_keep(diff) + }) + .collect::>(); + + // This could be a little more graceful. The idea is that + // we want to remove all permission steps which occur after + // the first error, but the steps involved with the first + // error could still be helpful. This is why we filter all + // spans with a LO BytePos greater than the error + // span HI BytePos. + if !(entries.is_empty() + || first_error_span_opt + .is_some_and(|err_span| err_span.hi() < span.lo())) + { + // We'll store things by line number + let line_num = source_map.lookup_line(span.hi()).unwrap().line; + semi_filtered + .entry(line_num) + .or_default() + .push((segment, span, entries)); + } else { + log::debug!( + "segment diff at {segment:?} was empty or follows an error" + ); + } + } + } + + // NOTE: we're at odds with the multi-table setup. This quick + // hack combines table entries into a single table until the + // visual explanation gets up-to-speed. + // Another weird thing about this is that you can have a single + // table with two changes for one place. + // ```example + // # fn main() { + // let closure = |s: &str| s.len(); // s: +R+O + // // s: -R-O + // // closure: +R+O + // # } + // ``` + // imagine that the comments to the right of the Let represent + // a pseudo combined table. The path `s` gains and loses the same + // set of permissions in the same table. This is kind of weird, we'd + // rather just show *no change*. + + semi_filtered + .into_iter() + .filter_map(|(line, entries)| { + + // Conforming to the above HACK this just takes any (from, to) pair. + let dummy_char_range = DUMMY_CHAR_RANGE.with(|range| *range); + let (from, to, range) = entries.first().map_or_else( + || (dummy_char_range, dummy_char_range, dummy_char_range), + |(MirSegment { from, to }, span, _)| { + let range = analysis.span_to_range(*span); + let from = analysis.span_to_range(ctxt.location_to_span(*from)); + let to = analysis.span_to_range(ctxt.location_to_span(*to)); + (from, to, range) + }, + ); + + let mut combined_table = + HashMap::, PermissionsDataDiff>::default(); + + // For all tables which fall on the same line, we combine them into a single table + // and remove all *SYMMETRIC* differences. That is, if you have permission changes such as: + // - path: +R+O + // - path: -R-O + // these are exactly symmetric, and will be removed. + log::debug!("Finishing the combined table for line {line}"); + for (segment, _, diffs) in entries.into_iter() { + for (place, diff) in diffs.into_iter() { + match combined_table.entry(place) { + Entry::Vacant(o) => { + log::debug!("- Place: {place:?} Segment {segment:?}\n\t\t{diff:?}"); + o.insert(diff); + } + Entry::Occupied(o) => { + let old_diff = o.get(); + if diff.is_symmetric_diff(old_diff) { + log::debug!( + "X Place {place:?} had a symmetric difference." + ); + o.remove(); + // master_table.remove(idx); + continue; + } else { + log::warn!("Clashing places on a step table were not symmetric: {place:?}"); + } + } + }; + } + } + + // This means the tables were symmetric and all were removed. + if combined_table.is_empty() { + return None; + } + + let mut master_table_vec = combined_table + .into_iter() + .collect::>(); + + master_table_vec + .sort_by_key(|(place, _)| (place.local.as_usize(), place.projection)); + + let master_table = PermissionsStepTable { + from, + to, + state: master_table_vec + .into_iter() + .map(|(place, diff)| (place_to_string!(place), diff)) + .collect::>(), + }; + + Some(PermissionsLineDisplay { + location: range, + state: vec![master_table], + }) + }) + .collect::>() +} diff --git a/crates/aquascope/src/lib.rs b/crates/aquascope/src/lib.rs index d5e515bba..a9bedcff8 100644 --- a/crates/aquascope/src/lib.rs +++ b/crates/aquascope/src/lib.rs @@ -49,6 +49,9 @@ clippy::option_option, clippy::similar_names )] +// Only used for testing purposes, can we dissallow +// uncommon codepoints when not testing? +#![allow(uncommon_codepoints)] extern crate datafrog; extern crate either; @@ -61,7 +64,6 @@ extern crate rustc_data_structures; extern crate rustc_driver; extern crate rustc_error_messages; extern crate rustc_errors; -extern crate rustc_graphviz; extern crate rustc_hir; extern crate rustc_hir_pretty; extern crate rustc_index; diff --git a/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap b/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap index 7fc90a7bc..483d67b89 100644 --- a/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap +++ b/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap @@ -326,7 +326,7 @@ description: add_big_strings@closure_0.test type: None value: false path_moved: - type: None + type: Low path_uninitialized: type: None value: false @@ -386,7 +386,7 @@ description: add_big_strings@closure_0.test type: None value: false path_moved: - type: None + type: Low path_uninitialized: type: Low loan_read_refined: diff --git a/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap b/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap index 344048534..2c0066b47 100644 --- a/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap +++ b/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap @@ -109,7 +109,7 @@ description: reverse@vec_0.test type: None value: false path_moved: - type: None + type: Low path_uninitialized: type: Low loan_read_refined: