From 1e741cd6a4db31bffc22dbe603e40286d274ec0f Mon Sep 17 00:00:00 2001 From: Gavin Gray Date: Tue, 6 Jun 2023 18:26:42 +0200 Subject: [PATCH] Creating permission steps incrementally (#78) * initial incremental stepper variant. * track collection structure. * Minor tweaks with ForLoopDesugaring & branch joins * Minor refactor and table entry filters. Fixed table entry filtering scheme that was causing some of the loop examples to fail. Table entries are now first grouped by line number which is also how they are collapsed, this seems to have broken a different example, and the issue with closures still remains. Needed still is documentation and addressing comments. * Fix loop spans and ifs without and else. Removed old algorithm alongside other useless utilities like the DFSFinder that was unstable. No "join" segments are inserted for branches which I will need to revisit. * Add weird_exprs tests, updated loops. * Updated tests and documentation. * Update test doc for SegmentedMir. --- crates/aquascope/Cargo.toml | 4 +- .../src/analysis/ir_mapper/body_graph.rs | 263 ++-- .../src/analysis/ir_mapper/mir_locations.rs | 30 +- .../aquascope/src/analysis/ir_mapper/mod.rs | 74 +- .../src/analysis/ir_mapper/post_dominators.rs | 142 ++ crates/aquascope/src/analysis/mod.rs | 1 + .../src/analysis/permissions/context.rs | 22 + .../src/analysis/stepper/find_steps.rs | 1234 ----------------- .../src/analysis/stepper/hir_steps.rs | 1226 ++++++++++++++++ crates/aquascope/src/analysis/stepper/mod.rs | 195 ++- .../src/analysis/stepper/segment_tree.rs | 375 ----- .../src/analysis/stepper/segmented_mir.rs | 1030 ++++++++++++++ .../src/analysis/stepper/table_builder.rs | 403 ++++++ crates/aquascope/src/lib.rs | 4 +- ...epper__add_big_strings@closure_0.test.snap | 4 +- .../stepper__reverse@vec_0.test.snap | 2 +- 16 files changed, 3187 insertions(+), 1822 deletions(-) delete mode 100644 crates/aquascope/src/analysis/stepper/find_steps.rs create mode 100644 crates/aquascope/src/analysis/stepper/hir_steps.rs delete mode 100644 crates/aquascope/src/analysis/stepper/segment_tree.rs create mode 100644 crates/aquascope/src/analysis/stepper/segmented_mir.rs create mode 100644 crates/aquascope/src/analysis/stepper/table_builder.rs diff --git a/crates/aquascope/Cargo.toml b/crates/aquascope/Cargo.toml index fdde4db7c..3e0674ba3 100644 --- a/crates/aquascope/Cargo.toml +++ b/crates/aquascope/Cargo.toml @@ -15,7 +15,7 @@ publish = false rustc_private = true [features] -testing = ["lazy_static"] +testing = [] [dependencies] anyhow = "1.0.0" @@ -32,7 +32,7 @@ miri = {git = "https://github.com/rust-lang/miri.git", rev = "35d6927663065d7fde aquascope_workspace_utils = { version = "0.2", path = "../aquascope_workspace_utils" } # testing utils -lazy_static = { version = "1.4", optional = true } +lazy_static = { version = "1.4" } [dev-dependencies] insta = { version = "1.22.0", features = ["json", "yaml", "redactions"] } diff --git a/crates/aquascope/src/analysis/ir_mapper/body_graph.rs b/crates/aquascope/src/analysis/ir_mapper/body_graph.rs index 7de9080ee..db9d14f25 100644 --- a/crates/aquascope/src/analysis/ir_mapper/body_graph.rs +++ b/crates/aquascope/src/analysis/ir_mapper/body_graph.rs @@ -1,7 +1,7 @@ -use itertools::Itertools; -use rustc_data_structures::{fx::FxHashMap as HashMap, graph::*}; +use rustc_data_structures::{captures::Captures, graph::*}; use rustc_middle::mir::{ - BasicBlock, BasicBlockData, BasicBlocks, Body, Location, + BasicBlock, BasicBlockData, BasicBlocks, Body, Location, Terminator, + TerminatorKind, }; use smallvec::SmallVec; @@ -19,15 +19,6 @@ impl<'a, 'tcx: 'a> CleanedBody<'a, 'tcx> { self.0 } - // TODO: cache the results - pub(crate) fn paths_from_to( - &self, - from: BasicBlock, - to: BasicBlock, - ) -> Vec> { - DFSFinder::find_paths_from_to(self, from, to) - } - /// Compute the locations successor. /// /// If the specified location lies in the middle of a `BasicBlock`, @@ -56,14 +47,49 @@ impl<'a, 'tcx: 'a> CleanedBody<'a, 'tcx> { statement_index: 0, }) } else { + log::debug!("No Location (or too many) successor(s) found: {nexts:?}"); None } } } + pub fn terminator_in_block(&self, block: BasicBlock) -> &Terminator<'tcx> { + self.body().basic_blocks[block].terminator() + } + + pub fn blocks( + &self, + ) -> impl Iterator + Captures<'a> + Captures<'tcx> + '_ { + self + .0 + .basic_blocks + .postorder() + .iter() + .filter(|bb| CleanedBody::keep_block(&self.0.basic_blocks[**bb])) + .copied() + } + + pub fn is_false_edge(&self, bb: BasicBlock) -> bool { + matches!( + self.0.basic_blocks[bb].terminator().kind, + TerminatorKind::FalseEdge { .. } + ) + } + fn keep_block(bb: &BasicBlockData) -> bool { !bb.is_cleanup && !bb.is_empty_unreachable() } + + fn is_imaginary_target( + from_data: &BasicBlockData, + target: BasicBlock, + ) -> bool { + let TerminatorKind::FalseEdge { imaginary_target, .. } = from_data.terminator().kind else { + return false; + }; + + imaginary_target == target + } } // ----------- @@ -96,7 +122,11 @@ impl<'tcx> WithSuccessors for CleanedBody<'_, 'tcx> { node: Self::Node, ) -> >::Iter { ::successors(&self.0.basic_blocks, node) - .filter(|bb| CleanedBody::keep_block(&self.0.basic_blocks[*bb])) + .filter(|bb| { + let from_data = &self.0.basic_blocks[*bb]; + CleanedBody::keep_block(from_data) + && !CleanedBody::is_imaginary_target(from_data, *bb) + }) .collect::>() .into_iter() } @@ -119,126 +149,103 @@ impl<'tcx> WithPredecessors for CleanedBody<'_, 'tcx> { } } -/// Finds all paths between two nodes. -/// -/// This DFS will find all unique paths between two nodes. This -/// includes allowing loops to be traversed (at most once). -/// This is quite a HACK to briefly satisfy the needs of the -/// [stepper](crate::analysis::stepper::compute_permission_steps). -struct DFSFinder<'graph, G> -where - G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, -{ - graph: &'graph G, - paths: Vec>, - stack: Vec, - visited: HashMap, -} - -impl<'graph, G> DFSFinder<'graph, G> -where - G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, -{ - pub fn new(graph: &'graph G) -> Self { - Self { - graph, - paths: vec![], - stack: vec![], - visited: HashMap::default(), - } - } - - pub fn find_paths_from_to( - graph: &'graph G, - from: G::Node, - to: G::Node, - ) -> Vec> { - let mut dfs = Self::new(graph); - dfs.search(from, to); - dfs.paths.into_iter().unique().collect::>() - } - - fn insert(&mut self, n: G::Node) -> bool { - let v = self.visited.entry(n).or_default(); - if *v >= 2 { - return false; - } - *v += 1; - true - } - - fn remove(&mut self, n: G::Node) { - let v = self.visited.entry(n).or_default(); - assert!(*v > 0); - *v -= 1; - } - - fn search(&mut self, from: G::Node, to: G::Node) { - if !self.insert(from) { - return; - } - - self.stack.push(from); - - if from == to { - self.paths.push(self.stack.clone()); - self.remove(to); - self.stack.pop().unwrap(); - return; - } - - for v in self.graph.successors(from) { - self.search(v, to); - } - - self.stack.pop().unwrap(); - self.remove(from); - } -} - #[cfg(test)] mod test { - use rustc_data_structures::graph::vec_graph::VecGraph; + use rustc_utils::BodyExt; - use super::*; + use super::{super::AllPostDominators, *}; + use crate::test_utils as tu; - #[test] - fn if_shape() { - // Diamond shaped IF. - let graph = VecGraph::new(6, vec![ - (0u32, 1u32), - (1u32, 2u32), - (1u32, 3u32), - (2u32, 4u32), - (3u32, 4u32), - (4u32, 5u32), - ]); - - let paths_0_5 = vec![vec![0, 1, 2, 4, 5], vec![0, 1, 3, 4, 5]]; - - assert_eq!(DFSFinder::find_paths_from_to(&graph, 0, 5), paths_0_5); - } + // CleanedBody tests #[test] - fn while_loop_shape() { - // While loop shape: - // 0 -> 1 -> 2 -> 3 -> 5 - // ^ | - // | v - // |-- 4 - let graph = VecGraph::new(6, vec![ - (0u32, 1u32), - (1u32, 2u32), - (2u32, 3u32), - (3u32, 5u32), - (3u32, 4u32), - (4u32, 2u32), - ]); - - let paths_0_5 = vec![vec![0, 1, 2, 3, 5], vec![0, 1, 2, 3, 4, 2, 3, 5]]; - let mut paths = DFSFinder::find_paths_from_to(&graph, 0, 5); - paths.sort_by_key(|l| l.len()); - - assert_eq!(paths, paths_0_5); + fn cleaned_body_simple_if() { + // EXPECTED MIR: + // ------------- + // bb0: { + // StorageLive(_2); + // _2 = const 0_i32; + // FakeRead(ForLet(None), _2); + // StorageLive(_3); + // StorageLive(_4); + // _4 = const true; + // switchInt(move _4) -> [0: bb3, otherwise: bb1]; + // } + // + // bb1: { + // _5 = CheckedAdd(_2, const 1_i32); + // assert(!move (_5.1: bool), ) -> [success: bb2, unwind: bb5]; + // } + // + // bb2: { + // _2 = move (_5.0: i32); + // _3 = const (); + // goto -> bb4; + // } + // + // bb3: { + // _3 = const (); + // goto -> bb4; + // } + // + // bb4: { + // StorageDead(_4); + // StorageDead(_3); + // _0 = _2; + // StorageDead(_2); + // return; + // } + // + // bb5 (cleanup): { + // resume; + // } + + tu::compile_normal( + r#" +fn foo() -> i32 { + let mut v1 = 0; + if true { + v1 += 1; + } + return v1; +} +"#, + |tcx| { + tu::for_each_body(tcx, |_, wfacts| { + let cleaned_graph = CleanedBody(&wfacts.body); + + let post_doms = AllPostDominators::::build( + &cleaned_graph, + wfacts.body.all_returns().map(|loc| loc.block), + ); + + let cleaned_blocks = cleaned_graph.blocks().collect::>(); + + let bb0 = BasicBlock::from_usize(0); + let bb1 = BasicBlock::from_usize(1); + let bb2 = BasicBlock::from_usize(2); + let bb3 = BasicBlock::from_usize(3); + let bb4 = BasicBlock::from_usize(4); + let bb5 = BasicBlock::from_usize(5); + + assert!(cleaned_blocks.contains(&bb0)); + assert!(cleaned_blocks.contains(&bb1)); + assert!(cleaned_blocks.contains(&bb2)); + assert!(cleaned_blocks.contains(&bb3)); + assert!(cleaned_blocks.contains(&bb4)); + // Cleanup blocks + assert!(!cleaned_blocks.contains(&bb5)); + + for &bb in vec![bb0, bb1, bb2, bb3, bb4].iter() { + assert!(post_doms.is_postdominated_by(bb, bb4)); + } + + assert!(!post_doms.is_postdominated_by(bb0, bb2)); + assert!(!post_doms.is_postdominated_by(bb0, bb3)); + assert!(post_doms.is_postdominated_by(bb1, bb2)); + assert!(!post_doms.is_postdominated_by(bb1, bb3)); + }) + }, + ); } } diff --git a/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs b/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs index 170b1b6e5..4337460a7 100644 --- a/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs +++ b/crates/aquascope/src/analysis/ir_mapper/mir_locations.rs @@ -26,24 +26,24 @@ impl MirOrderedLocations { } pub fn exit_location(&self) -> Option { - self.exit_block.map(|block| { - let statement_index = *self - .locations - .get(&block) - .expect("Block with no associated locations") - .last() - .unwrap(); - Location { - block, - statement_index, - } - }) + let block = self.exit_block?; + self.locations.get(&block).map_or_else( + // Block has no associated index then default to the start + || Some(block.start_location()), + // Get the last associated index of the block + |vs| { + vs.last().map(|&statement_index| Location { + block, + statement_index, + }) + }, + ) } pub fn get_entry_exit_locations(&self) -> Option<(Location, Location)> { - self - .entry_location() - .and_then(|mn| self.exit_location().map(|mx| (mn, mx))) + let entry = self.entry_location()?; + let exit = self.exit_location()?; + Some((entry, exit)) } pub fn values(&self) -> impl Iterator + Captures<'_> { diff --git a/crates/aquascope/src/analysis/ir_mapper/mod.rs b/crates/aquascope/src/analysis/ir_mapper/mod.rs index 20e933071..a4c222a02 100644 --- a/crates/aquascope/src/analysis/ir_mapper/mod.rs +++ b/crates/aquascope/src/analysis/ir_mapper/mod.rs @@ -27,8 +27,8 @@ pub struct IRMapper<'a, 'tcx> { body: &'a Body<'tcx>, hir_to_mir: HashMap>, gather_mode: GatherMode, - dominators: Dominators, - post_dominators: AllPostDominators, + pub(crate) dominators: Dominators, + pub(crate) post_dominators: AllPostDominators, } // TODO: I want to decompose this into more specific regions. @@ -107,6 +107,36 @@ where ir_map } + pub fn ldominates(&self, dom: Location, node: Location) -> bool { + if dom.block == node.block { + return dom.statement_index <= node.statement_index; + } + self.dominates(dom.block, node.block) + } + + pub fn lpost_dominates(&self, pdom: Location, node: Location) -> bool { + if pdom.block == node.block { + return pdom.statement_index >= node.statement_index; + } + self.post_dominates(pdom.block, node.block) + } + + pub fn dominates(&self, dom: BasicBlock, node: BasicBlock) -> bool { + self.dominators.is_reachable(node) && self.dominators.dominates(dom, node) + } + + pub fn post_dominates(&self, pdom: BasicBlock, node: BasicBlock) -> bool { + self.post_dominators.is_postdominated_by(node, pdom) + } + + /// Returns true if the terminator in the location's block is a `switchInt`. + pub fn is_terminator_switchint(&self, location: Location) -> bool { + matches!( + self.cleaned_graph.terminator_in_block(location.block).kind, + mir::TerminatorKind::SwitchInt { .. } + ) + } + pub fn local_assigned_place(&self, local: &hir::Local) -> Vec> { use either::Either; use mir::{FakeReadCause as FRC, StatementKind as SK}; @@ -192,20 +222,34 @@ where idxs.sort_unstable(); } - let basic_blocks = total_location_map.keys().collect::>(); + let basic_blocks = total_location_map.keys().copied().collect::>(); - let entry_block = basic_blocks.iter().find(|&&&b1| { - basic_blocks.iter().all(|&&b2| { - self.dominators.is_reachable(b2) - && (b1 == b2 || self.dominators.dominates(b1, b2)) + let entry_block = basic_blocks + .iter() + .find(|&&candidate_dom| { + basic_blocks.iter().all(|&block| { + self.dominators.is_reachable(block) + && self.dominators.dominates(candidate_dom, block) + }) }) - }); + .copied(); - let exit_block = basic_blocks.iter().find(|&&&b1| { - basic_blocks.iter().all(|&&b2| { - b1 == b2 || self.post_dominators.is_postdominated_by(b2, b1) - }) - }); + let find_exit_from = |basic_blocks: &[BasicBlock]| -> Option { + basic_blocks + .iter() + .find(|&&candidate_postdom| { + basic_blocks.iter().all(|&block| { + self + .post_dominators + .is_postdominated_by(block, candidate_postdom) + }) + }) + .copied() + }; + + let exit_block = find_exit_from(&basic_blocks); + + log::debug!("Gathering MIR location entry / exit blocks: {entry_block:?}{exit_block:?}"); if exit_block.is_none() { log::debug!("Found locations: {total_location_map:#?}"); @@ -215,8 +259,8 @@ where } Some(MirOrderedLocations { - entry_block: entry_block.map(|b| **b), - exit_block: exit_block.map(|b| **b), + entry_block, + exit_block, locations: total_location_map, }) } diff --git a/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs b/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs index 9f40ff48d..dae93bf67 100644 --- a/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs +++ b/crates/aquascope/src/analysis/ir_mapper/post_dominators.rs @@ -40,3 +40,145 @@ impl AllPostDominators { } } } + +#[cfg(test)] +mod tests { + use rustc_data_structures::graph::{vec_graph::VecGraph, *}; + + use super::*; + + struct VG { + source: N, + forward: VecGraph, + backward: VecGraph, + } + + impl VG { + fn make(size: usize, source: N, edges: Vec<(N, N)>) -> Self { + let rev = edges.iter().map(|&(f, s)| (s, f)).collect::>(); + VG { + source, + forward: VecGraph::new(size, edges), + backward: VecGraph::new(size, rev), + } + } + } + + impl DirectedGraph for VG { + type Node = N; + } + + impl<'graph, N: Idx> GraphSuccessors<'graph> for VG { + type Item = N; + type Iter = smallvec::IntoIter<[N; 10]>; + } + + impl<'graph, N: Idx> GraphPredecessors<'graph> for VG { + type Item = N; + type Iter = smallvec::IntoIter<[N; 10]>; + } + + impl WithStartNode for VG { + fn start_node(&self) -> N { + self.source + } + } + + impl WithNumNodes for VG { + fn num_nodes(&self) -> usize { + self.forward.num_nodes() + } + } + + impl WithSuccessors for VG { + fn successors( + &self, + node: Self::Node, + ) -> >::Iter { + self + .forward + .successors(node) + .iter() + .copied() + .collect::>() + .into_iter() + } + } + + impl WithPredecessors for VG { + fn predecessors( + &self, + node: Self::Node, + ) -> >::Iter { + self + .backward + .successors(node) + .iter() + .copied() + .collect::>() + .into_iter() + } + } + + #[test] + fn pdom_diamond() { + let diamond = VG::::make(4, 0, vec![(0, 1), (0, 2), (1, 3), (2, 3)]); + let post_doms = AllPostDominators::build(&diamond, std::iter::once(3)); + for b in 0 ..= 2 { + assert!(post_doms.is_postdominated_by(b, 3)); + } + } + + #[test] + fn pdom_linear() { + let nodes = 100; + let edges = (0 .. nodes).zip(1 ..).collect::>(); + let line = VG::::make(nodes, 0, edges); + let post_doms = AllPostDominators::build(&line, std::iter::once(nodes - 1)); + for i in 0 .. nodes { + for j in i + 1 .. nodes { + assert!( + post_doms.is_postdominated_by(i, j), + "{j} should post-dominate {i}" + ); + } + } + } + + #[test] + fn pdom_double_diamond() { + // 2 5 + // 0 -> 1 4 7 -> + // 3 6 + let dd = VG::::make(8, 0, vec![ + (0, 1), + (1, 2), + (1, 3), + (2, 4), + (3, 4), + (4, 5), + (4, 6), + (5, 7), + (6, 7), + ]); + let post_doms = AllPostDominators::build(&dd, std::iter::once(7)); + + assert!(post_doms.is_postdominated_by(0, 1)); + assert!(post_doms.is_postdominated_by(0, 4)); + assert!(post_doms.is_postdominated_by(0, 7)); + assert!(post_doms.is_postdominated_by(1, 4)); + assert!(post_doms.is_postdominated_by(1, 7)); + assert!(post_doms.is_postdominated_by(4, 7)); + + for i in 0 .. 8 { + for &bad in &[2, 3, 5, 6] { + if i != bad { + assert!( + !post_doms.is_postdominated_by(i, bad), + "{bad} should NOT post-dominate {i}" + ); + } + } + } + } +} diff --git a/crates/aquascope/src/analysis/mod.rs b/crates/aquascope/src/analysis/mod.rs index 9e6ab4fee..a07ddbb41 100644 --- a/crates/aquascope/src/analysis/mod.rs +++ b/crates/aquascope/src/analysis/mod.rs @@ -540,6 +540,7 @@ impl<'a, 'tcx: 'a> AquascopeAnalysis<'a, 'tcx> { smooth_elements(spans) } + /// Convert a potentially non-contiguous collection of [`Point`]s into [`Span`]s. fn points_to_spans( &self, points: impl IntoIterator, diff --git a/crates/aquascope/src/analysis/permissions/context.rs b/crates/aquascope/src/analysis/permissions/context.rs index 42d7e570b..2df868c1d 100644 --- a/crates/aquascope/src/analysis/permissions/context.rs +++ b/crates/aquascope/src/analysis/permissions/context.rs @@ -429,6 +429,28 @@ impl<'a, 'tcx> PermissionsCtxt<'a, 'tcx> { body.all_places(tcx, def_id).collect::>() } + pub fn domain_bottom(&self) -> PermissionsDomain<'tcx> { + self + .domain_places() + .into_iter() + .map(|place| { + (place, PermissionsData { + is_live: false, + type_droppable: false, + type_writeable: false, + type_copyable: false, + path_moved: None, + path_uninitialized: false, + loan_read_refined: None, + loan_write_refined: None, + loan_drop_refined: None, + permissions: Permissions::bottom(), + }) + }) + .collect::>() + .into() + } + pub fn permissions_domain_at_point( &self, point: Point, diff --git a/crates/aquascope/src/analysis/stepper/find_steps.rs b/crates/aquascope/src/analysis/stepper/find_steps.rs deleted file mode 100644 index 242599f07..000000000 --- a/crates/aquascope/src/analysis/stepper/find_steps.rs +++ /dev/null @@ -1,1234 +0,0 @@ -//! Core analysis for creating permission steps. -//! -//! # Overview -//! -//! Defined on the MIR, a *permissions step* is the difference in permissions between -//! two adjacent MIR [`Point`]s. The difference represents the gains and losses that -//! occur between adjacent permissions states. Permission steps on the MIR are useless -//! to the average user, thus we cluster subsequences of MIR instructions and take the -//! difference between the first and last point to create a larger step. -//! -//! At a high-level, the strategy is to partition the MIR into subsequences (referred to as segments), -//! such that each segment represents a single permission step. I.E. each segment is a cluster -//! of instructions representing one source-level permissions step. -//! After clustering, the steps are easily computed in isolation to create the final permissions -//! steps. As we'll see later, the “isolation” is broken down a little to prevent some specific -//! visual effects. -//! -//! # Splitting Strategy -//! -//! The main goal of the permission stepper is to provide steps that map to logical “steps” in -//! the source code. First, the steps will be determined using HIR language constructs, which are -//! subsequently lowered to fit the more granular MIR language constructs. -//! Starting with the HIR, a so-called “logical step” is roughly defined to be a [`Stmt`](rustc_hir::Stmt). -//! Typically statements fall on their own line and they mark the beginning and end -//! of some potentially permissions-altering operation. This makes up the first loose for -//! finding permissions steps. -//! -//! Statements however, do not cover how permissions change in a control-flow sensitive construct. -//! For example, the statements at the beginning of the then and else branch might execute with -//! different permissions, this sudden change of permissions needs to be communicated to the user, -//! rather than happening on instructions of the CFG these permissions are changed on the *edges*. -//! This forms the second rule of creating a step, namely, a branch in control flow is also a -//! permissions-altering “operation”. The full rules for tracking permissions steps at each -//! respective level of granularity are outlined below. -//! -//! ## Source to HIR -//! -//! In the source code, we'd like a permissions step to be shown after each line and at the -//! opening brace (`{`) of a new block. This requires us to take a permissions step at the -//! following HIR locations. -//! -//! * From before to after each [`Stmt`](rustc_hir::Stmt). -//! * From the last [`Block`](rustc_hir::Block) statement to after the `expr` of a [`Block`](rustc_hir::Block). -//! * From before a branch to *before* the first expression of a branch target. For example, at the source-level, -//! this would map to a step from before an `if` to the directly after the opening `{` of the -//! then / else block. -//! -//! Each node in the HIR generates several MIR locations. For information on how to map between the -//! two see the [`IRMapper`]. Important for the stepper, is the ability to find the first, and -//! last, MIR [`Location`] which came from a HIR node. First and last are used in the sense of -//! a [dominator](https://en.wikipedia.org/wiki/Dominator_(graph_theory)) and [post-dominator](https://en.wikipedia.org/wiki/Dominator_(graph_theory)#Postdominance) -//! respectively. The main idea is that the HIR traversal allows us to find the proper *slice points* -//! for the MIR graph. -//! -//! ## HIR to MIR -//! -//! When forming permission steps in the MIR, the most crucial invariant is that the permissions steps -//! form a total cover of the control-flow graph. This invariant remains to ensure that no change in -//! permissions is *missed*. If a change in permissions is not shown (at the source-level), -//! this is due to segmenting steps at the wrong boundaries or at too coarse a granularity. -//! Because of this invariant, the stepper uses a strategy to "slice" the MIR into segments, such that -//! these segments always form a total cover. -//! -//! ### Data Structures Summary -//! -//! The key data structures involvled are the [`MirSegment`] and [`SegmentTree`]. -//! The [`MirSegment`] is a simple struct storing the two points, where a permimssion step -//! will step `from` and where steps `to`. This means that a `MirSegment` must lie on a -//! valid path within the MIR. -//! -//! The [`SegmentTree`] (not to be confused with a [segment tree](https://en.wikipedia.org/wiki/Segment_tree)) -//! is a tree which holds [`MirSegment`]s in its leaves. -//! -//! ### Slicing -//! -//! The core operation performed on the [`SegmentTree`] is taking a *slice*. There are two kinds of -//! slices: -//! -//! 1. linear slices, those that **do not** contain permissions-altering CFG edges. -//! 2. control-flow slices, those that **only** contain permissions-altering CFG edges. -//! -//! These two slices exist to maintain the invariants of the [`MirSegment`] and [`SegmentTree`]. -//! Fundamentally, these slices work on different *shapes* of the underlying graph. -//! -//! #### Linear Slices -//! -//! A *linear slice* slices a portion of the graph which forms a continuous subsequence. -//! -//! Example: -//! -//! ```text -//! before slice: -//! -//! slice point -//! | -//! [segment 1] | -//! ⬤ [l1] ----> ⬤ [l2] ----> ⬤ [l3] -//! | -//! | -//! -//! -//! after slice: -//! -//! [segment 1] -//! ⬤ [l1] ----> ⬤ [l2] -//! -//! [segment 2] -//! ⬤ [l2] ----> ⬤ [l3] -//! -//! ``` -//! -//! In the above example there exists a linear sequence of control-flow from `l1 ⟶ l2 ⟶ l3`. -//! Depicted, is a *linear slice* of this segment at location `l2`. Linear slices *always* -//! split a single segment, into two new segments which maintain the [`MirSegment`] invariant. -//! These slices are used after [`Stmt`s](rustc_hir::Stmt) and the end of a [`Block` expression](rustc_hir::Block). -//! -//! #### Control-flow slices -//! -//! A *control-flow* slice, then does not slice a continuous subsequence but multiple that -//! /span across/ branches of control flow. -//! -//! Example: -//! ```text -//! before slice: -//! -//! slice point -//! | -//! | -//! | -//! ------> ⬤ [l2] ------- -//! | | | -//! [segment 1] | | v -//! ----> ⬤ [l1] | ⬤ [l4] ----> -//! | | ^ -//! | | | -//! ------> ⬤ [l3] ------- -//! | -//! | -//! | -//! -//! -//! after slice: -//! -//! [segment 1] -//! ⬤ [l1] ----> ⬤ [l2] -//! -//! [segment 2] -//! ⬤ [l1] ----> ⬤ [l3] -//! -//! [segment 3] -//! ⬤ [l2] ----> ⬤ [l4] -//! -//! [segment 4] -//! ⬤ [l3] ----> ⬤ [l4] -//! -//! ``` -//! Before the slice in segment 1 there is a graph which roughly captures the shape -//! of an if expression. location `l1` would be the branch point (corresponding -//! to a `SwitchInt`), `l2` and `l3` would be the then and else branches. Here these -//! branches are abstracted to a single point, but in practice they can be any valid -//! [`MirSegment`]. Then location `l4` joins the branches and control flow continues -//! again linearly. -//! -//! In order to slice a control-flow segment properly, a set of locations is required -//! and the function mapping a location to a control-flow path must be bijective. -//! In the above example, the possible paths through this segment (the usliced segment 1) are: -//! 1. `l1`, `l2`, `l4` -//! 2. `l1`, `l3`, `l4` -//! -//! Therefore, in order to perform a proper slice, the set (`l2`, `l3`) is provided. -//! Luckily, these locations are easy to obtain from the structure of the HIR and correspond -//! to the opening block of each branch. -//! -//! After slicing, the result is four segments that form a total cover of the original -//! segment, and each has a clear entry / exit point for *it's specific control flow*. -//! -//! NOTE: one small semantic difference between the resulting segments. The segments -//! which form the so-called "split set" (segments 1 and 2 in the above example) *cannot* -//! be further split. They are treated as **atomic**. This is intuitive if you image that -//! they only contains edges in the CFG (there would be nothing left to spliti). -//! -//! # Finalizing Differences -//! -//! Slicing the MIR into segments is the core task for the stepper and results -//! in a proper [`SegmentTree`]. The last task of the stepper engine is to take the -//! permissions difference between the domain after the segment, and that before. -//! See the [`PermissionsCtxt`] for more information about computing a [`PermissionsDomain`]. -//! -//! When computing the differences however, there is an edge case when handling liveness. -//! As a result of the generated MIR, it's possible for the left-hand-side of an assignment -//! to gain permissions before it seems it should. This occurs when the initializer expression -//! is more complex (e.g. an [`If`](rustc_hir::Expr) or [`Block`](rustc_hir::Expr) expression). -//! To ensure initialized places don't gain permissions before the end of the let statement, -//! these places are marked as /attached/ to a specific MIR location, and they are filtered -//! from any nested segment step results. -//! -//! # Known Shortcomings -//! -//! There are a few major known limitations, they can be resolved we just need the time: -//! -//! - Function bodies that contain infinite loops `loop {}` cannot be analyzed. -//! More general, if there does not contain an exit point to the function the -//! current algorithm will report this limitation to the user. -//! -//! - The control-flow slicing is too strict, if there exists an `if` without -//! and `else`, or if there are multiple returns, the algorithm also fails. - -use anyhow::{bail, Result}; -use rustc_data_structures::{self, fx::FxHashMap as HashMap}; -use rustc_hir::{ - self as hir, - intravisit::{self, Visitor as HirVisitor}, - HirId, -}; -use rustc_middle::{ - hir::nested_filter, - mir::{self, Local, Location, Place}, -}; -use rustc_span::Span; -use rustc_utils::{ - source_map::range::CharRange, test_utils::DUMMY_CHAR_RANGE, PlaceExt, SpanExt, -}; - -use super::{ - segment_tree::{MirSegment, SegmentSearchResult, SegmentTree, SplitType}, - *, -}; -use crate::{ - analysis::{ - ir_mapper::{GatherDepth, IRMapper}, - permissions::{ - Permissions, PermissionsCtxt, PermissionsData, PermissionsDomain, - }, - }, - errors, -}; - -pub fn compute_permission_steps<'a, 'tcx>( - analysis: &AquascopeAnalysis<'a, 'tcx>, -) -> Result> -where - 'tcx: 'a, -{ - let mode = INCLUDE_MODE.copied().unwrap_or(PermIncludeMode::Changes); - let ctxt = &analysis.permissions; - let ir_mapper = &analysis.ir_mapper; - let body = &ctxt.body_with_facts.body; - let _basic_blocks = body.basic_blocks.indices(); - let mut hir_visitor = HirStepPoints::make(ctxt, ir_mapper)?; - hir_visitor.visit_nested_body(ctxt.body_id); - - log::debug!( - "Final tree for permission steps\n{:?}", - hir_visitor.mir_segments - ); - - if let Some((_, msg)) = hir_visitor.unsupported_encounter { - bail!(msg); - } - - if !hir_visitor.fatal_error.is_empty() { - bail!(hir_visitor.fatal_error); - } - - Ok(prettify_permission_steps( - analysis, - hir_visitor.finalize_diffs(), - mode, - )) -} - -// Prettify, means: -// - Remove all places that are not source visible -// - Remove all tables which are empty -// - Convert Spans to Ranges -fn prettify_permission_steps<'tcx>( - analysis: &AquascopeAnalysis<'_, 'tcx>, - perm_steps: HashMap< - Span, - (MirSegment, HashMap, PermissionsDataDiff>), - >, - mode: PermIncludeMode, -) -> Vec { - let ctxt = &analysis.permissions; - let tcx = ctxt.tcx; - let body = &ctxt.body_with_facts.body; - - let should_keep = |p: &PermissionsDataDiff| -> bool { - !(matches!(p.is_live, ValueStep::None { value: Some(false) }) - || (mode == PermIncludeMode::Changes && p.is_empty())) - }; - - macro_rules! place_to_string { - ($p:expr) => { - $p.to_string(tcx, body) - .unwrap_or_else(|| String::from("")) - }; - } - - let first_error_span_opt = - errors::get_span_of_first_error(ctxt.def_id.expect_local()) - .and_then(|s| s.as_local(ctxt.body_with_facts.body.span)); - let source_map = tcx.sess.source_map(); - - perm_steps - .into_iter() - .fold( - HashMap::< - CharRange, - Vec<(MirSegment, Vec<(Place<'tcx>, PermissionsDataDiff)>)>, - >::default(), - |mut acc, (span, (segment, place_to_diffs))| { - // Attach the span to the end of the line. Later, all permission - // steps appearing on the same line will be combined. - let span = source_map.span_extend_to_line(span).shrink_to_hi(); - let entries = place_to_diffs - .into_iter() - .filter(|(place, diff)| { - place.is_source_visible(tcx, body) && should_keep(diff) - }) - .collect::>(); - - // This could be a little more graceful. The idea is that - // we want to remove all permission steps which occur after - // the first error, but the steps involved with the first - // error could still be helpful. This is why we filter all - // spans with a LO BytePos greater than the error - // span HI BytePos. - if !(entries.is_empty() - || first_error_span_opt - .is_some_and(|err_span| err_span.hi() < span.lo())) - { - let range = analysis.span_to_range(span); - acc.entry(range).or_default().push((segment, entries)); - } - - acc - }, - ) - .into_iter() - // HACK FIXME: we're at odds with the multi-table setup. This quick - // hack combines table entries into a single table until the - // visual explanation gets up-to-speed. - // Another weird thing about this is that you can have a single - // table with two changes for one place. - // ```example - // # fn main() { - // let closure = |s: &str| s.len(); // s: +R+O - // // s: -R-O - // // closure: +R+O - // # } - // ``` - // imagine that the comments to the right of the Let represent - // a pseudo combined table. The path `s` gains and loses the same - // set of permissions in the same table. This is kind of weird, we'd - // rather just show *no change*. - .filter_map(|(range, mut entries)| { - for (_, v) in entries.iter_mut() { - v.sort_by_key(|(place, _)| (place.local.as_usize(), place.projection)) - } - - // let state = entries - // .into_iter() - // .map(|(MirSegment { from, to }, diffs)| { - // let state = diffs - // .into_iter() - // .map(|(place, diff)| { - // let s = place_to_string!(place); - // (s, diff) - // }) - // .collect::>(); - // let from = analysis.span_to_range(ctxt.location_to_span(from)); - // let to = analysis.span_to_range(ctxt.location_to_span(to)); - // PermissionsStepTable { from, to, state } - // }) - // .collect::>(); - - // Conforming to the above HACK this just takes any (from, to) pair. - let dummy_char_range = DUMMY_CHAR_RANGE.with(|range| *range); - let (from, to) = entries.first().map_or_else( - || (dummy_char_range, dummy_char_range), - |(MirSegment { from, to }, _)| { - let from = analysis.span_to_range(ctxt.location_to_span(*from)); - let to = analysis.span_to_range(ctxt.location_to_span(*to)); - (from, to) - }, - ); - - let mut master_table: Vec<(Place<'tcx>, PermissionsDataDiff)> = - Vec::default(); - - let is_symmetric_diff = - |diff1: &PermissionsDataDiff, diff2: &PermissionsDataDiff| -> bool { - macro_rules! is_symmetric { - ($v1:expr, $v2:expr) => { - matches!( - (&$v1, &$v2), - (ValueStep::High { .. }, ValueStep::Low { .. }) - | (ValueStep::Low { .. }, ValueStep::High { .. }) - | (ValueStep::None { .. }, ValueStep::None { .. }) - ) - }; - } - let p1 = &diff1.permissions; - let p2 = &diff2.permissions; - is_symmetric!(p1.read, p2.read) - && is_symmetric!(p1.write, p2.write) - && is_symmetric!(p1.drop, p2.drop) - }; - - // For all tables which fall on the same line, we combine them into a single table - // and remove all *SYMMETRIC* differences. That is, if you have permission changes such as: - // - path: +R+O - // - path: -R-O - // these are exactly symmetric, and will be removed. - for (_, diffs) in entries.into_iter() { - for (place, diff) in diffs.into_iter() { - let i_opt = master_table.iter().position(|(p, _)| *p == place); - if let Some(idx) = i_opt { - let (_, old_diff) = &master_table[idx]; - if is_symmetric_diff(&diff, old_diff) { - log::debug!( - "REMOVING place {place:?} with diff {diff:?} into the MT." - ); - master_table.remove(idx); - continue; - } - } - - log::debug!("ADDING place {place:?} with diff {diff:?} into the MT."); - master_table.push((place, diff)); - } - } - - // This means the tables were symmetric and all were removed. - if master_table.is_empty() { - return None; - } - - let master_table = PermissionsStepTable { - from, - to, - state: master_table - .into_iter() - .map(|(place, diff)| (place_to_string!(place), diff)) - .collect::>(), - }; - - Some(PermissionsLineDisplay { - location: range, - state: vec![master_table], - }) - }) - .collect::>() -} - -// ------------------------------------------------ - -macro_rules! fatal { - ($this:expr, $( $rest:tt ),*) => { - let f = format!( $($rest)*); - $this.report_fatal(&f); - bail!(f); - } -} - -/// Visitor for creating permission steps in the HIR. -/// -/// Visits the HIR in a Nested order, splitting the MIR and accumulating permission steps. -struct HirStepPoints<'a, 'tcx> -where - 'tcx: 'a, -{ - ctxt: &'a PermissionsCtxt<'a, 'tcx>, - ir_mapper: &'a IRMapper<'a, 'tcx>, - mir_segments: Box, - unsupported_encounter: Option<(Span, String)>, - fatal_error: String, -} - -impl<'a, 'tcx: 'a> HirStepPoints<'a, 'tcx> { - fn make( - ctxt: &'a PermissionsCtxt<'a, 'tcx>, - ir_mapper: &'a IRMapper<'a, 'tcx>, - ) -> Result { - let tcx = ctxt.tcx; - let hir = tcx.hir(); - let body = &hir.body(ctxt.body_id); - let body_hir_id = body.value.hir_id; - let body_span = body.value.span; - - let mol = ir_mapper - .get_mir_locations(body_hir_id, GatherDepth::Nested) - .unwrap(); - - // A body must have an entry location. - let from = mol.entry_location().unwrap(); - - // A body with an infinite loop will not generate MIR that - // contains an exit location. - let Some(to) = mol.exit_location() else { - bail!("The function body under analysis has zero (or many) exit points. This currently isn't supported by the permissions stepper; I suggest trying to rewrite the function to contain a single `return`."); - }; - - let body_segment = MirSegment::new(from, to); - let mir_segments = Box::new(SegmentTree::new(body_segment, body_span)); - - Ok(HirStepPoints { - ctxt, - ir_mapper, - mir_segments, - unsupported_encounter: None, - fatal_error: String::default(), - }) - } - - fn report_unsupported(&mut self, id: HirId, msg: &str) { - if self.unsupported_encounter.is_none() { - let span = self.span_of(id); - self.unsupported_encounter = Some((span, String::from(msg))); - } - } - - fn report_fatal(&mut self, msg: &str) { - self.fatal_error.push_str(&"-".repeat(5)); - self.fatal_error.push('\n'); - self.fatal_error.push_str(msg); - } - - /// Determine whether the traversal should visited nested HIR nodes. - /// - /// This method is a sort of HACK to avoid picking apart nodes expanded from - /// macros, while visiting nodes expanded from expected desugarings (e.g. for / while loops). - fn should_visit_nested(&self, _id: HirId, span: Span) -> bool { - use rustc_span::hygiene::DesugaringKind as DK; - !span.from_expansion() - || span.is_desugaring(DK::ForLoop) - || span.is_desugaring(DK::WhileLoop) - } - - /// Split an already linear segment into two segments. - /// - /// Example, a block of statements will produce a graph with the following shape: - /// - /// ```text - /// ⬤ l1 --> ⬤ l2 --> ⬤ l3 - /// ``` - /// - /// The above linear sequence could be split at any of the location `l1, l2, l3` and it - /// would produce two valid segments. For example, splitting the above at `l2` would produce: - /// - /// ```text - /// SegmentTree::Split { - /// segments: SplitType::Linear { - /// first: MirSegment(l1, l2), - /// second: MirSegment(l2, l3), - /// }, - /// reach: MirSegment(l1, l3), - /// ... - /// } - /// ``` - fn insert_linear_step_at( - &mut self, - span: Span, - location: Location, - attached_here: Vec, - ) -> Result<()> { - let enclosing_segment = self - .mir_segments - .as_ref() - .find_segment_for_end(location, &self.ir_mapper.cleaned_graph); - - match enclosing_segment { - SegmentSearchResult::NotFound => { - fatal!(self, "{location:?} should always be enclosed in the graph"); - } - SegmentSearchResult::StepExists(segment, ..) => { - log::warn!( - "linear step had slice conflict at {location:?} with {segment:?}" - ); - Ok(()) - } - - SegmentSearchResult::Enclosing(SegmentTree::Single { - segment, - span: old_span, - attached, - }) => { - let mut paths = - segment.paths_along_segment(&self.ir_mapper.cleaned_graph); - - let first_step = SegmentTree::Single { - segment: MirSegment::new(segment.from, location), - attached: attached_here, - span, - }; - - let second_step = SegmentTree::Single { - segment: MirSegment::new(location, segment.to), - attached: vec![], - span: *old_span, - }; - - let _ = paths - .drain_filter(|path| path.contains(&location.block)) - .collect::>(); - - if !paths.is_empty() { - fatal!(self, "Inserting a linear segment should not result in fragmentation.\nSplitting segment: {segment:?} at {location:?}. Remaining paths: {paths:#?}"); - } - - let subtree = SegmentTree::Split { - segments: SplitType::Linear { - first: Box::new(first_step), - second: Box::new(second_step), - }, - reach: *segment, - span: *old_span, - attached: attached.clone(), - }; - - let segment = *segment; - self.mir_segments.as_mut().replace_single(segment, subtree) - } - - _ => { - fatal!(self, "Enclosing segments can only be a `Single` variant, this is a stepper bug!"); - } - } - } - - /// Split a segment into a series of split / join segments for a piece of control flow. - /// - /// Example, a simple `if ... { ... } else { ... }` expression will produce a diamond shaped CFG. - /// - /// ```text - /// ⬤ l1 - /// / \ - /// ⬤ l2 ⬤ l3 - /// \ / - /// ⬤ l4 - /// ``` - /// - /// In this diagram, the initial `MirSegment` is `l1` -> `l4`. To produce a well-formed - /// `SegmentTree::Split` node, the locations `[l2, l3]` should be provided as arguments. - /// - /// The specified locations for splitting should satisfy the following properties. - /// 1. All locations are enclosed by the same MirSegment, (in the above example `(l1, l4)`). - /// 2. Each location should correspond to a single path through the control flow. In the above - /// example, the two possible paths are `[l1, l2, l4]` and `[l1, l3, l4]`. - /// 3. The locations should be bijective wrt the possible control-flow paths. - /// - /// The above example would produce a SegmentTree with the following shape: - /// - /// ```text - /// SegmentTree::Split { - /// segments: SegmentType::ControlFlow { - /// splits: vec![ - /// MirSegment::new(l1, l2), - /// MirSegment::new(l1, l3) - /// ], - /// joins: vec![ - /// MirSegment::new(l2, l4), - /// MirSegment::new(l3, l4) - /// ], - /// }, - /// reach: ..., - /// span: ..., - /// } - /// ``` - fn insert_cf_step_at(&mut self, steps: Vec<(Location, Span)>) -> Result<()> { - if steps.is_empty() { - return Ok(()); - } - - let graph = &self.ir_mapper.cleaned_graph; - - let enclosings = steps - .iter() - .filter_map(|(location, _)| { - let res = self.mir_segments.find_segment_for_end(*location, graph); - if let SegmentSearchResult::Enclosing(SegmentTree::Single { - segment, - span, - attached, - }) = res - { - Some((*segment, *span, attached.clone())) - } else { - log::error!( - "searching for {location:?} came up with no result {res:?}" - ); - None - } - }) - .collect::>(); - - if enclosings.len() < steps.len() { - fatal!(self, "not every locations step had an enclosing segment."); - } - - let (segment, old_span, attached) = enclosings.first().unwrap(); - - if !enclosings.iter().all(|(s, _, _)| s == segment) { - fatal!(self, "not all provided locations map to the same enclosing segment: {enclosings:#?}"); - } - - let mut paths = segment.paths_along_segment(&self.ir_mapper.cleaned_graph); - - let mut splits = Vec::default(); - let mut joins = Vec::default(); - - for (location, span) in steps.into_iter() { - let split_step = SegmentTree::Single { - segment: MirSegment::new(segment.from, location), - attached: vec![], - span, - }; - - let join_step = SegmentTree::Single { - segment: MirSegment::new(location, segment.to), - attached: vec![], - span: *old_span, - }; - - let _removed_paths = paths - .drain_filter(|path| path.contains(&location.block)) - .collect::>(); - - splits.push(split_step); - joins.push(join_step); - } - - let subtree = SegmentTree::Split { - segments: SplitType::ControlFlow { splits, joins }, - reach: *segment, - span: *old_span, - attached: attached.clone(), - }; - - self.mir_segments.replace_single(*segment, subtree) - } - - fn span_of(&self, id: HirId) -> Span { - let hir = self.ctxt.tcx.hir(); - let span = hir.span(id); - span - .as_local(self.ctxt.body_with_facts.body.span) - .unwrap_or(span) - } - - fn body_value_id(&self) -> HirId { - let hir = self.ctxt.tcx.hir(); - hir.body(self.ctxt.body_id).value.hir_id - } - - /// The [`PermissionsDomain`] ⊥. - /// - /// No permissions, anywhere. - fn domain_bottom(&self) -> PermissionsDomain<'tcx> { - self - .ctxt - .domain_places() - .into_iter() - .map(|place| { - (place, PermissionsData { - is_live: false, - type_droppable: false, - type_writeable: false, - type_copyable: false, - path_moved: None, - path_uninitialized: false, - loan_read_refined: None, - loan_write_refined: None, - loan_drop_refined: None, - permissions: Permissions::bottom(), - }) - }) - .collect::>() - .into() - } - - /// Convert the current [`SegmentTree`] into permission steps. - fn finalize_diffs( - self, - ) -> HashMap, PermissionsDataDiff>)> - { - let body_hir_id = self.body_value_id(); - let body_open_brace = self.span_of(body_hir_id).shrink_to_lo(); - let first_point = self.ctxt.location_to_point(self.body_segment().from); - let first_domain = &self.ctxt.permissions_domain_at_point(first_point); - let empty_domain = &self.domain_bottom(); - - // Upon entry, the function parameters are already "live". But we want to - // special case this, and show that they "come alive" at the opening brace. - let first_diff = empty_domain.diff(first_domain); - - fn diff_subtree<'tcx>( - ctxt: &PermissionsCtxt<'_, 'tcx>, - tree: &SegmentTree, - result: &mut HashMap< - Span, - (MirSegment, HashMap, PermissionsDataDiff>), - >, - attached_at: &mut HashMap, - ) { - log::trace!( - "\ndiff_subtree\n[FILTERS]:\n{attached_at:?}\n[TREE]:{tree:?}" - ); - - macro_rules! is_attached { - ($set:expr, $place:expr, $loc:expr) => { - $set.get(&$place.local).map(|l| *l == $loc).unwrap_or(false) - }; - } - - let mut insert_segment = |segment: MirSegment, span: Span| { - if segment.from != segment.to { - let p0 = ctxt.location_to_point(segment.from); - let p1 = ctxt.location_to_point(segment.to); - let before = &ctxt.permissions_domain_at_point(p0); - let after = &ctxt.permissions_domain_at_point(p1); - let mut diff = before.diff(after); - - let removed = diff - .drain_filter(|place, _| { - is_attached!(attached_at, place, segment.to) - }) - .collect::>(); - - log::debug!( - "removed domain places due to attached filter at {:?} {:?}", - segment.to, - removed - ); - - result.insert(span, (segment, diff)); - } - }; - - match tree { - SegmentTree::Single { segment, span, .. } => { - insert_segment(*segment, *span) - } - SegmentTree::Split { - segments, - attached, - reach, - span, - } => { - // Add the attached places filter - for local in attached.iter() { - log::debug!( - "filtering Local {local:?} not attached to {:?}", - reach.to - ); - - let old = attached_at.insert(*local, reach.to); - assert!(old.is_none()); - } - - match segments { - SplitType::Linear { first, second } => { - diff_subtree(ctxt, first, result, attached_at); - diff_subtree(ctxt, second, result, attached_at); - } - - // CF Splits with exactly one branch / join are considered linear - // This happens frequently when there is ForLoop desugaring. - SplitType::ControlFlow { splits, joins } - if splits.len() == 1 && joins.len() == 1 => - { - diff_subtree(ctxt, &splits[0], result, attached_at); - diff_subtree(ctxt, &joins[0], result, attached_at); - } - - SplitType::ControlFlow { splits, joins } => { - for subtree in splits.iter() { - diff_subtree(ctxt, subtree, result, attached_at); - } - - let mut joined_diff = HashMap::default(); - let mut entire_diff = reach.into_diff(ctxt); - - // Rules for joining two domain differences. - // 1. We always insert the attached locals. - let attached_here = entire_diff - .drain_filter(|place, _| { - is_attached!(attached_at, place, reach.to) - }) - .collect::>(); - - // 2. Differences not found in *any* of the join segments are ignored - for subtree in joins.iter() { - let mut temp = HashMap::default(); - diff_subtree(ctxt, subtree, &mut temp, attached_at); - - // HACK: remove any differences that were attached to this span. - temp.remove(span); - - // HACK: manually remove any attached places which got added. - for (_, (_, diffs)) in temp.iter_mut() { - diffs - .drain_filter(|place, _| attached_here.contains_key(place)); - } - - joined_diff.extend(temp); - } - - assert!(!result.contains_key(span)); - assert!(joined_diff.get(span).is_none()); - - // FIXME: the reach is not the correct set of points here. - // But we don't currently have a good semantic model for - // what it should be. They aren't currently being - // displayed by the frontend so this isn't a problem (yet). - result.insert(*span, (*reach, attached_here)); - result.extend(joined_diff); - } - } - - // Remove the attached places filter. - for local in attached.iter() { - attached_at.remove(local); - } - } - } - } - - let mut diffs = HashMap::default(); - let mut attached_at = HashMap::default(); - let dummy_loc = Location { - block: mir::START_BLOCK, - statement_index: 0, - }; - - diffs.insert( - body_open_brace, - ( - MirSegment { - from: dummy_loc, - to: dummy_loc, - }, - first_diff, - ), - ); - - diff_subtree(self.ctxt, &self.mir_segments, &mut diffs, &mut attached_at); - - diffs - } - - fn body_segment(&self) -> &MirSegment { - match self.mir_segments.as_ref() { - SegmentTree::Split { reach, .. } => reach, - SegmentTree::Single { segment, .. } => segment, - } - } -} - -macro_rules! split_with_control_flow { - ($this:tt, $ids:expr) => { - split_with_control_flow!($this, $ids, "CF-SPLIT ") - }; - - ($this:tt, $ids:expr, $msg:expr) => { - let f = format!("{}\nsplitting the control flow with:\n{:#?}", $msg, $ids); - $ids - .into_iter() - .map(|id| { - $this - .ir_mapper - .get_mir_locations(id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.entry_location().map(|entry| { - let span = $this.span_of(id).shrink_to_lo(); - (entry, span) - }) - }) - }) - .fold(Some(Vec::default()), |acc, step| { - if let (Some(mut acc), Some(step)) = (acc, step) { - acc.push(step); - Some(acc) - } else { - None - } - }) - .and_then(|steps| $this.insert_cf_step_at(steps).ok()) - .unwrap_or_else(|| { - $this.report_fatal(&f); - }); - }; -} - -macro_rules! split_with_linear { - ($this:tt, $id:expr) => { - split_with_linear!($this, $id, "splitting linearly") - }; - - ($this:tt, $id:expr, $msg:expr) => { - split_with_linear!($this, $id, $msg, vec![]) - }; - - ($this:tt, $id:expr, $msg:expr, $attached:expr) => { - $this - .ir_mapper - .get_mir_locations($id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.exit_location().map(|exit| { - let span = $this.span_of($id); - let exit = $this - .ir_mapper - .cleaned_graph - .location_successor(exit) - .unwrap_or(exit); - $this - .insert_linear_step_at(span, exit, $attached) - .expect(""); - }) - }) - .unwrap_or_else(|| { - log::warn!( - "Expected entry / exit locations but none were found: {:?}", - $msg - ); - }); - }; -} - -impl<'a, 'tcx: 'a> HirVisitor<'tcx> for HirStepPoints<'a, 'tcx> { - type NestedFilter = nested_filter::All; - - fn nested_visit_map(&mut self) -> Self::Map { - self.ctxt.tcx.hir() - } - - fn visit_stmt(&mut self, stmt: &'tcx hir::Stmt) { - use rustc_hir::StmtKind as SK; - let hir = self.nested_visit_map(); - let error_msg = - format!("Analyzing statement : {}", hir.node_to_string(stmt.hir_id)); - - let locals = match stmt.kind { - SK::Local(local) => { - let places = self.ir_mapper.local_assigned_place(local); - places.into_iter().map(|p| p.local).collect::>() - } - _ => vec![], - }; - - split_with_linear!(self, stmt.hir_id, error_msg, locals); - - if self.should_visit_nested(stmt.hir_id, stmt.span) { - intravisit::walk_stmt(self, stmt); - } - } - - fn visit_block(&mut self, block: &'tcx hir::Block) { - let hir = self.ctxt.tcx.hir(); - - for stmt in block.stmts.iter() { - self.visit_stmt(stmt); - } - - if let Some(expr) = block.expr { - let error_msg = - format!("end-of-statement expr: {}", hir.node_to_string(expr.hir_id)); - split_with_linear!(self, expr.hir_id, error_msg); - self.visit_expr(expr); - } - } - - fn visit_expr(&mut self, expr: &'tcx hir::Expr) { - use hir::{ExprKind as EK, LoopSource, StmtKind as SK}; - - let hir = self.nested_visit_map(); - let error_msg = - format!("Analyzing expr : {}", hir.node_to_string(expr.hir_id)); - - match expr.kind { - // Special case for While Loop desugaring, this shouldn't be necessary - // when generic loops are handled. - EK::Loop( - hir::Block { - stmts: [], - expr: - Some(hir::Expr { - kind: EK::If(cnd, then, Some(els)), - .. - }), - .. - }, - _label, - LoopSource::While, - _loop_span, - ) => { - self - .ir_mapper - .get_mir_locations(then.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.entry_location().map(|then_entry| { - self - .ir_mapper - .get_mir_locations(els.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.exit_location().map(|else_exit| { - let loop_end = self.span_of(expr.hir_id).shrink_to_hi(); - let if_start = self.span_of(then.hir_id).shrink_to_lo(); - - let ls = - vec![(then_entry, if_start), (else_exit, loop_end)]; - - self.insert_cf_step_at(ls).expect(""); - }) - }) - .unwrap(); - }) - }) - .unwrap(); - - // Skip the else block, it only contains the break statement. - intravisit::walk_expr(self, cnd); - intravisit::walk_expr(self, then); - } - - // Special case for For Loop desugaring, this shouldn't be necessary - // when generic loops are handled. - EK::Loop( - hir::Block { - stmts: - [hir::Stmt { - kind: - SK::Expr(hir::Expr { - kind: EK::Match(cnd, [none, some], _), - .. - }), - .. - }], - expr: None, - .. - }, - _label, - LoopSource::ForLoop, - _loop_span, - ) => { - self - .ir_mapper - .get_mir_locations(some.body.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.entry_location().map(|then_entry| { - self - .ir_mapper - .get_mir_locations(none.body.hir_id, GatherDepth::Nested) - .and_then(|mir_order| { - mir_order.exit_location().map(|else_exit| { - let loop_end = self.span_of(expr.hir_id).shrink_to_hi(); - let loop_start = - self.span_of(some.body.hir_id).shrink_to_lo(); - - let ls = - vec![(then_entry, loop_start), (else_exit, loop_end)]; - - self.insert_cf_step_at(ls).expect(""); - }) - }) - .unwrap(); - }) - }) - .unwrap(); - - // ignore the none branch as it just contains the break. - intravisit::walk_expr(self, cnd); - intravisit::walk_arm(self, some); - } - - // TODO: have a split strategy for bare loops. They could be infinite, and - // thus have no exit block. This shouldn't be an issue but it currently is. - EK::Loop(_block, _label, LoopSource::Loop, _span) => { - self.report_unsupported(expr.hir_id, "Bare loops aren't working yet, sorry! Can I interest you in a `for` or `while` loop?"); - } - - EK::If(cnd, then, else_opt) => { - // NOTE: first we need to walk and split the condition. In the - // case of a more complex condition expression, splitting this - // first will result in a split location closest to the `SwitchInt`. - intravisit::walk_expr(self, cnd); - - let ids = [Some(then), else_opt] - .iter() - .flatten() - .map(|n| n.hir_id) - .collect::>(); - - split_with_control_flow!(self, ids, error_msg); - - intravisit::walk_expr(self, then); - if let Some(els) = else_opt { - intravisit::walk_expr(self, els); - } - } - - EK::Match(swtch, arms, _source) => { - // NOTE: first we need to walk and split the condition. In the - // case of a more complex condition expression, splitting this - // first will result in a split location closest to the `SwitchInt`. - intravisit::walk_expr(self, swtch); - - let ids = arms - .iter() - .map(|arm| { - if arm.guard.is_some() { - self.report_unsupported( - arm.hir_id, - "Arm guards are not supported, sorry!", - ) - } - - arm.body.hir_id - }) - .collect::>(); - - split_with_control_flow!(self, ids, error_msg); - - for arm in arms.iter() { - intravisit::walk_arm(self, arm); - } - } - _ => { - intravisit::walk_expr(self, expr); - } - } - } -} diff --git a/crates/aquascope/src/analysis/stepper/hir_steps.rs b/crates/aquascope/src/analysis/stepper/hir_steps.rs new file mode 100644 index 000000000..14d112800 --- /dev/null +++ b/crates/aquascope/src/analysis/stepper/hir_steps.rs @@ -0,0 +1,1226 @@ +//! HIR-level stepper (and entry point) for computing permissions steps. +//! +//! The permissions stepper computes the differences in permissions +//! between two "states". These differences are computed per [`mir::Place`], +//! to read how they are aggregated and displayed see [super::table_builder]. +//! +//! Computing these permissions steps takes a surprising amount of coordination +//! between the HIR and the MIR. Fundamentally, the HIR has the information we +//! need about the _source program_ while the MIR holds the information +//! about control-flow and code points. Because permissions steps are associated +//! with a source span, we need the HIR to communicate this down to the MIR, but +//! we need the MIR to ensure that created steps are valid. To understand +//! the validation of creating permissions steps see [`super::segmented_mir`]. +//! +//! At a (very) high-level, we insert steps after anything interesting +//! could happen. Interesting in this case means (1) it's visible at the source- +//! level, and (2) a change in permissions could be captured. The three main places +//! where this could happen are: +//! +//! 1. After statements. +//! 2. After the final expression in blocks. +//! 3. Entering a block, potentially from a conditional branch +//! which can cause liveness permissions changes. +//! +//! For most of the process, the [`SegmentedMirBuilder`] handles all the +//! tough work of making sure steps are valid. There are a few cases when +//! the HIR knows more about the structure of a program and they all have to +//! do with placing spans. Life would be much better if we didn't have to +//! place spans, or if the rust compiler had a richer model for tracking spans +//! but that's not the case (_stares longingly out the window_). The main places +//! where this happens is for loop desugaring, and branches. The reason why is +//! touched on briefly. +//! +//! Several constructs as they appear in the HIR are desugared compared to the +//! language constructs one uses in Rust source code. For example a `while cnd { ... }` +//! loop, will get desugared into `loop { if cnd { ... } else { break; }}`. These +//! desugarings have to be special cased by the stepper so that we get the span +//! place _just right_. +//! +//! Branches again require the HIR to make some decisions about step locations. +//! When a match expression is encountered, it might look like the following: +//! +//! ```ignore +//! match Some(10) { +//! None => 0, +//! Some(n) => { +//! n * 2 +//! }, +//! } +//! ``` +//! +//! When computing steps over the arms of the match, the `SegmentedMirBuilder` would +//! insert a step at the very beginning of each branch target. However, that's not +//! quite what we want, if the user things of the opening curly brace as the beginning +//! of the branch, then in the `Some` case `n` is _already bound_. We can use info +//! at the HIR level to find this micro adjustment which computes the branch target +//! as being after the code initializing all bound variables in a match pattern. + +use anyhow::{anyhow, Result}; +use rustc_data_structures::{self, fx::FxHashMap as HashMap}; +use rustc_hir::{ + self as hir, + intravisit::{self, Visitor as HirVisitor}, + BodyId, HirId, +}; +use rustc_middle::{ + hir::nested_filter, + mir::{self, Body, Local, Location}, + ty::TyCtxt, +}; +use rustc_span::Span; +use rustc_utils::SpanExt; + +use super::{segmented_mir::*, table_builder::*, *}; +use crate::analysis::ir_mapper::{GatherDepth, IRMapper}; + +/// Visitor for creating permission steps in the HIR. +/// +/// Visits the HIR in a Nested order, splitting the MIR and accumulating permission steps. +pub(super) struct HirStepPoints<'a, 'tcx> +where + 'tcx: 'a, +{ + tcx: &'a TyCtxt<'tcx>, + body: &'a Body<'tcx>, + body_id: BodyId, + ir_mapper: &'a IRMapper<'a, 'tcx>, + + // Error reporting counters + unsupported_features: Vec, + fatal_errors: Vec, + + // Actual state of the analysis + /// Entry location of the body under analysis. + start_loc: Location, + locals_at_scope: HashMap>, + /// Stack of the current branch entry points, used + /// for hinting path steps to the `SegmentedMir`. + current_branch_start: Vec, + mir_segments: SegmentedMirBuilder<'a, 'tcx>, +} + +/// Makes calling functions on the SegmentedMir easier. +/// All functions on the `SegmentedMir` return a Result in +/// the case that the internal state gets off. When it does, +/// we should save the error and stop the current computation. +/// As with most error-relevant things, if internally an error +/// state is entered more errors are likely to occur, but it's +/// really the first we care about. +macro_rules! invoke_internal { + (on_fail -> $ret:expr, $this:ident, $call:ident, $($param:expr),*) => { + match $this.mir_segments.$call($( $param ),*) { + Err(e) => { + $this.fatal_errors.push(e); + return $ret; + }, + Ok(v) => v, + } + }; + (on_fail -> $ret:expr, $this:ident, $call:ident) => { + invoke_internal!(on_fail -> $ret, $this, $call,) + }; + (on_fail -> $ret:expr, $this:ident, $call:ident, $($param:expr),*) => { + invoke_internal!(on_fail -> $ret, $this, $call, $($param:expr),*) + }; + ($this:ident, $call:ident) => { + invoke_internal!(on_fail -> (), $this, $call,) + }; + ($this:ident, $call:ident, $( $param:expr ),*) => { + invoke_internal!(on_fail -> (), $this, $call, $( $param ),*) + }; +} + +macro_rules! report_unexpected { + ($this:ident, $($param:expr),*) => { + $this.fatal_errors.push(anyhow!($( $param ),*)) + } +} + +macro_rules! report_unsupported { + ($this:ident, $($param:expr),*) => { + $this.unsupported_features.push(anyhow!($( $param ),*)) + } +} + +impl<'a, 'tcx: 'a> HirStepPoints<'a, 'tcx> { + pub(super) fn make( + tcx: &'a TyCtxt<'tcx>, + body: &'a Body<'tcx>, + body_id: BodyId, + ir_mapper: &'a IRMapper<'a, 'tcx>, + ) -> Result { + let mir_segments = SegmentedMirBuilder::make(ir_mapper); + let start_loc = mir::START_BLOCK.start_location(); + + Ok(HirStepPoints { + tcx, + body, + body_id, + ir_mapper, + unsupported_features: Vec::default(), + fatal_errors: Vec::default(), + start_loc, + locals_at_scope: HashMap::default(), + current_branch_start: Vec::default(), + mir_segments, + }) + } + + fn process_error(stack: &[anyhow::Error]) -> Option { + use itertools::Itertools; + if stack.is_empty() { + return None; + } + + Some( + stack + .iter() + .map(|e: &anyhow::Error| e.to_string()) + .join("\n"), + ) + } + + pub(super) fn get_unsupported_feature(&self) -> Option { + Self::process_error(&self.unsupported_features) + } + + pub(super) fn get_internal_error(&self) -> Option { + Self::process_error(&self.fatal_errors) + } + + pub(super) fn finalize( + self, + analysis: &AquascopeAnalysis<'_, 'tcx>, + mode: PermIncludeMode, + ) -> Result> { + let body_hir_id = self.body_value_id(); + let body_span = self.span_of(body_hir_id); + + let mir_segments = self.mir_segments.freeze()?; + + log::debug!( + "Steps analysis found these steps: {:#?}", + mir_segments.segments().collect::>() + ); + + let finalizer = TableBuilder { + analysis, + ctxt: &analysis.permissions, + mir: &mir_segments, + locals_at_scope: self.locals_at_scope, + }; + + Ok(finalizer.finalize_body(self.start_loc, body_span, mode)) + } + + // Used for tracking path hints of the current branches. + + fn get_path_hint(&self) -> Option { + self.current_branch_start.last().copied() + } + + fn push_branch_start(&mut self, location: Location) { + self.current_branch_start.push(location) + } + + fn pop_branch_start(&mut self, expecting: Location) { + if let Some(popped) = self.current_branch_start.pop() && popped != expecting { + report_unexpected!(self, "expecting popped location {expecting:?} but got {popped:?}") + } + } + + /// Determine whether the traversal should visited nested HIR nodes. + /// + /// This method is a sort of HACK to avoid picking apart nodes expanded from + /// macros, while visiting nodes expanded from expected desugarings (e.g. for / while loops). + fn span_of(&self, id: HirId) -> Span { + let hir = self.tcx.hir(); + let span = hir.span(id); + span.as_local(self.body.span).unwrap_or(span) + } + + fn body_value_id(&self) -> HirId { + let hir = self.tcx.hir(); + hir.body(self.body_id).value.hir_id + } + + fn get_node_entry(&self, hir_id: HirId) -> Option { + let mir_order = self + .ir_mapper + .get_mir_locations(hir_id, GatherDepth::Nested)?; + mir_order.entry_location() + } + + fn get_node_exit(&self, hir_id: HirId) -> Option { + let mir_order = self + .ir_mapper + .get_mir_locations(hir_id, GatherDepth::Nested)?; + + // HACK: shift the exit to the next successor if available. + // this way we capture the state changes for a single + // operation rather than having an off by one. + // TODO: a more elegant solution would be to have a way to + // specify at which execution point you want the permission + // state, before, middle, or after an instruction. This is + // similar to what the MIR does but it doesn't provide an + // after point, only a start and mid. + mir_order.exit_location().map(|e| { + self + .ir_mapper + .cleaned_graph + .location_successor(e) + .unwrap_or(e) + }) + } + + fn prettify_node(&self, hir_id: HirId) -> String { + let hir = self.tcx.hir(); + hir.node_to_string(hir_id) + } + + /// Open a conditional expression for branching. On success, returns + /// the exit `Location` of the given conditon. + /// + /// Examples, given a `EK::If(Expr, Expr, Option)`, the given condition expression should + /// be the first expression in the tuple, which is the condition. + /// For a `EK::Match(Expr, [Arm], ...)` the given condition should be the first expression + /// in the tuple which is the match condition. + fn expr_condition_prelude( + &mut self, + cnd: &'tcx hir::Expr, + expr: &'tcx hir::Expr, + ) -> Option { + // NOTE: first we need to walk and split the condition. In the + // case of a more complex condition expression, splitting this + // first will result in a split location closest to the `SwitchInt`. + self.visit_expr(cnd); + let Some(cnd_exit) = self.get_node_exit(cnd.hir_id).or_else(|| { + log::warn!( + "EXPR condition has no exit {} looking at expr entry", + self.prettify_node(cnd.hir_id) + ); + self.get_node_entry(expr.hir_id) + }) else { + log::warn!("cannot do EXPR prelude, aborting"); + return None; + }; + + invoke_internal!( + on_fail -> None, + self, + insert, + cnd_exit, + self.get_path_hint(), + self.span_of(cnd.hir_id) + ); + + Some(cnd_exit) + } + + /// Close the entire branching expression which had the condition exit. + /// + /// Here, the given expression should be the _entire_ `EK::If` or `EK::Match`. + fn expr_condition_postlude(&mut self, bid: BranchId, hir_id: HirId) { + log::warn!( + "flushing and closing branch steps:\n{}", + self.prettify_node(hir_id) + ); + + invoke_internal!(self, close_branch, bid); + } + + /// Inserts a step point after the specified `HirId`. This + /// method is generic and takes the raw span returned by the + /// `IRMapper`, if a node requires tweaking for the span this + /// should not be used. + fn insert_step_at_node_exit(&mut self, hir_id: HirId) { + if let Some(exit) = self.get_node_exit(hir_id) { + invoke_internal!( + self, + insert, + exit, + self.get_path_hint(), + self.span_of(hir_id) + ); + } else { + log::warn!( + "Node {} doesn't have an exit location.", + self.prettify_node(hir_id) + ); + } + } + + fn condition_produced_switchint(&self, expr: &'tcx hir::Expr) -> bool { + if let Some(exit) = self.get_node_exit(expr.hir_id) { + log::debug!( + "checking location {exit:?} to see if terminator is switchInt" + ); + self.ir_mapper.is_terminator_switchint(exit) + } else { + // If the IRMapper can't determine a single exit location that + // is most often caused by branching, in this case we just assume + // that a switchInt was procued. We could do something more robust + // if we see the need for it. + true + } + } + + // Factored out of the Visitor because this same logic is needed for + // EK::If and while loop desugarings, just with a different location + // to span mapping. + fn handle_expr_if( + &mut self, + expr: &'tcx hir::Expr, + cnd: &'tcx hir::Expr, + then: &'tcx hir::Expr, + else_opt: Option<&'tcx hir::Expr>, + entry_locs_to_spans: HashMap, + ) { + log::debug!( + "visiting EXPR-IF\n\tCND: {}\n\t\tTHEN: {}\n\t\tELSE: {}", + self.prettify_node(cnd.hir_id), + self.prettify_node(then.hir_id), + else_opt.map_or(String::from(""), |e| self.prettify_node(e.hir_id)) + ); + let expr_id = expr.hir_id; + let Some(cnd_exit) = self.expr_condition_prelude(cnd, expr) else { + return; + }; + + let mapper = self.ir_mapper; + // We use this default span because an ExprKind::If can produce branches + // that "don't exist" at the HIR-level. This happens when no else-branch + // is provided, therefore we chose this default span to match the end + // of the If expression itself. + let default_span = self.span_of(expr_id).shrink_to_hi(); + let branch_id = invoke_internal!( + self, + open_branch, + cnd_exit, + move |to: &mut Location| { + entry_locs_to_spans + .iter() + .find_map(|(&l, &span)| { + if mapper.ldominates(*to, l) { + *to = l; + Some(span) + } else { + None + } + }) + .unwrap_or(default_span) + } + ); + + if let Some(then_entry) = self.get_node_entry(then.hir_id) { + self.push_branch_start(then_entry); + self.visit_expr(then); + self.pop_branch_start(then_entry); + } else { + log::warn!( + "then-branch doesn't have entry {}", + self.prettify_node(then.hir_id) + ); + } + + if let Some(els) = else_opt { + if let Some(els_entry) = self.get_node_entry(els.hir_id) { + self.push_branch_start(els_entry); + self.visit_expr(els); + self.pop_branch_start(els_entry); + } else { + log::warn!( + "else-branch doesn't have entry {}", + self.prettify_node(els.hir_id) + ); + } + } + + self.expr_condition_postlude(branch_id, expr_id); + } + + fn handle_expr_match( + &mut self, + expr: &'tcx hir::Expr, + cnd: &'tcx hir::Expr, + arms: &'tcx [hir::Arm], + entry_locs_to_spans: HashMap, + ) { + let expr_id = expr.hir_id; + let Some(cnd_exit) = self.expr_condition_prelude(cnd, expr) else { + return; + }; + let mapper = self.ir_mapper; + let branch_id = invoke_internal!( + self, + open_branch, + cnd_exit, + move |to: &mut Location| { + entry_locs_to_spans + .iter() + .find_map(|(&l, &span)| { + if mapper.ldominates(*to, l) { + // Update the location to be the entry of the arm. + *to = l; + Some(span) + } else { + None + } + }) + .unwrap_or(Span::default()) + } + ); + + for arm in arms { + self.visit_arm(arm); + } + + self.expr_condition_postlude(branch_id, expr_id); + } +} + +impl<'a, 'tcx: 'a> HirVisitor<'tcx> for HirStepPoints<'a, 'tcx> { + type NestedFilter = nested_filter::All; + + fn nested_visit_map(&mut self) -> Self::Map { + self.tcx.hir() + } + + fn visit_body(&mut self, body: &'tcx hir::Body) { + intravisit::walk_body(self, body); + self.insert_step_at_node_exit(body.value.hir_id); + } + + fn visit_block(&mut self, block: &'tcx hir::Block) { + let scope = invoke_internal!(self, open_scope); + for stmt in block.stmts.iter() { + self.visit_stmt(stmt); + } + + if let Some(expr) = block.expr { + log::debug!("BLOCK contains final EXPR"); + self.visit_expr(expr); + self.insert_step_at_node_exit(expr.hir_id); + } + invoke_internal!(self, close_scope, scope); + } + + fn visit_stmt(&mut self, stmt: &'tcx hir::Stmt) { + use rustc_hir::StmtKind as SK; + + log::debug!( + "Starting analysis of STMT {}\n", + self.prettify_node(stmt.hir_id), + ); + + let scope = invoke_internal!(self, open_scope); + + if let SK::Local(local) = stmt.kind { + let places = self.ir_mapper.local_assigned_place(local); + let locals = places.into_iter().map(|p| p.local).collect::>(); + if !locals.is_empty() { + log::debug!("storing locals at scope {scope:?} {locals:?}"); + self.locals_at_scope.insert(scope, locals); + } + } + + intravisit::walk_stmt(self, stmt); + + // Close the scope before inserting the final steps. + invoke_internal!(self, close_scope, scope); + + self.insert_step_at_node_exit(stmt.hir_id); + } + + fn visit_expr(&mut self, expr: &'tcx hir::Expr) { + use hir::{ExprKind as EK, LoopSource, MatchSource, StmtKind as SK}; + match expr.kind { + EK::If(cnd, then, else_opt) => { + // For the generic case we can take the use the opening brace of each branch + // target as the span. + let mut entry_to_spans = HashMap::default(); + + // Insert the location and span for the then branch + if let Some(then_entry) = self.get_node_entry(then.hir_id) { + let then_span = self.span_of(then.hir_id).shrink_to_lo(); + entry_to_spans.insert(then_entry, then_span); + } + + // Insert the location and span for the else branch + if let Some(els) = else_opt && let Some(else_entry) = self.get_node_entry(els.hir_id) { + let else_span = self.span_of(els.hir_id).shrink_to_lo(); + entry_to_spans.insert(else_entry, else_span); + } + + self.handle_expr_if(expr, cnd, then, else_opt, entry_to_spans); + } + + // HACK: Special cases for ForLoop and While desugarings. + // + // These special cases are needed to _adjust the spans_. + // Example: + // ```ignore + // fn foo(mut s: String) { + // s.push_str("looping ") + // let b = &mut s; // - Table 1 - + // // b: +R +W + // // s: -R -W -O + // while true { /* open */ + // b.push_str("again... and "); + // } /* close */ // - Table 2 - + // // b: -R -W + // // s: +R +W +O + // s.push_str("done!"); + // println!("{s}"); + // } + // ``` + // If we don't adjust for the desugaring, "Table 2" would + // be placed on the line labeled "/* open */", but we want + // it to actually get placed at the end of the loop where + // it is depicted above. A similar adjustment is needed + // for `for` loops. + + // While loops need to be detected with the surrounding loop. + EK::Loop( + hir::Block { + stmts: [], + expr: + Some(hir::Expr { + kind: EK::If(cnd, then, Some(els)), + .. + }), + .. + }, + _, + LoopSource::While, + _, + ) => { + // For the generic case we can take the use the opening brace of each branch + // target as the span. + let mut entry_to_spans = HashMap::default(); + + // Insert the location and span for the then branch + if let Some(then_entry) = self.get_node_entry(then.hir_id) { + let then_span = self.span_of(then.hir_id).shrink_to_lo(); + entry_to_spans.insert(then_entry, then_span); + } + + // Insert the location and span for the else branch + if let Some(else_entry) = self.get_node_entry(els.hir_id) { + // NOTE: we adjust the span of the break block to + // be _after_ the loop. + let else_span = self.span_of(expr.hir_id).shrink_to_hi(); + entry_to_spans.insert(else_entry, else_span); + } + + self.handle_expr_if(expr, cnd, then, Some(els), entry_to_spans); + } + + EK::Loop( + hir::Block { + stmts: + [hir::Stmt { + kind: + SK::Expr(hir::Expr { + kind: EK::Match(cnd, arms @ [none, some], _), + .. + }), + .. + }], + expr: None, + .. + }, + _, + LoopSource::ForLoop, + _, + ) => { + let mut entry_to_spans = HashMap::default(); + + let loop_start = self.span_of(some.body.hir_id).shrink_to_lo(); + let loop_end = self.span_of(expr.hir_id).shrink_to_hi(); + + // Iterator::next => None, breaking out of the loop + if let Some(none_entry) = self.get_node_entry(none.body.hir_id) { + entry_to_spans.insert(none_entry, loop_end); + } + + // Iterator::next => Some(_), execute loop body + if let Some(some_entry) = self.get_node_entry(some.body.hir_id) { + entry_to_spans.insert(some_entry, loop_start); + } + + #[allow(clippy::needless_borrow)] + self.handle_expr_match(expr, cnd, &arms, entry_to_spans); + } + + // NOTE: if a match condition doesn't produce a `switchInt`, there + // is no need to open a scope for this case. This most + // commonly happens when there is a single arm (common for desugarings) + // but it can also happen if future arms are elided. However, we + // still want to show the steps at the arm locations. + EK::Match(cnd, [_], MatchSource::ForLoopDesugar) + if !self.condition_produced_switchint(cnd) => + { + log::debug!( + "Match condition didn't produce switchInt {}", + self.prettify_node(cnd.hir_id) + ); + intravisit::walk_expr(self, expr); + } + + // TODO this view of how a match branches is too simplistic, and + // doesn't accurately reflect reality. There could be many + // generated `switchInt`s or there could be none. + // Example: + // ```ignore + // match x { + // 0 => 1, + // 1 => 1, + // x => x, + // } + // ``` + // the above match block would generate NO `switchInt`, just + // a series of `goto`s. Contrasted with something such as: + // + // ```ignore + // match x { + // None => 1, + // Some(1) => 1, + // Some(x) => x, + // } + // ``` + // + // which will actually generate two `switchInt`s, one for the + // discriminant match and another for the inner integer check. + // These two cases are relatively simple, but branching for a + // generic match is complicated with the current internal API. + // What we would want, is automatic opening of a branch, + // this would make closing branches more difficult ... + // I'm(gavin) currently in thinking mode for this. + EK::Match(cnd, arms, _) => { + // This is the generic case and assumes no desugaring. + // For the span we want to pick the END of the matched pattern, + // but we choose the location as the entry to the arm body + // (after all bound variables have been assigned). + let entry_to_spans = arms + .iter() + .filter_map(|arm| { + let id = arm.body.hir_id; + self + .get_node_entry(id) + .map(|entry| (entry, self.span_of(arm.pat.hir_id).shrink_to_hi())) + }) + .collect::>(); + + self.handle_expr_match(expr, cnd, arms, entry_to_spans); + } + _ => { + intravisit::walk_expr(self, expr); + } + } + } + + // NOTE: it's impotant that arms handle path hinting + fn visit_arm(&mut self, arm: &'tcx hir::Arm) { + if arm.guard.is_some() { + // TODO: NYI. + report_unsupported!( + self, + "match arm guards are not yet supported {}", + self.prettify_node(arm.hir_id) + ); + } + + // We use the arm_entry for path hinting, because it's + // closer the the `switchInt`. + if let Some(arm_entry) = self.get_node_entry(arm.hir_id) { + self.push_branch_start(arm_entry); + + // We get the entry of the arm body (or before the arm guard), + // this is where any arm patterns will be initialized and bound. + if let Some(entry) = self.get_node_entry(arm.body.hir_id) { + let span = self.span_of(arm.hir_id).shrink_to_lo(); + invoke_internal!(self, insert, entry, self.get_path_hint(), span); + self.visit_expr(arm.body); + // self.insert_step_at_node_exit(arm.hir_id); + } else { + intravisit::walk_arm(self, arm); + } + + self.pop_branch_start(arm_entry); + } else { + log::warn!( + "match-arm doesn't have entry {}", + self.prettify_node(arm.hir_id) + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::{super::segmented_mir::test_exts::SegmentedMirTestExt, *}; + use crate::{analysis::ir_mapper::GatherMode, test_utils as tu}; + + macro_rules! compile_and_run { + ($code:expr) => { + tu::compile_normal($code, |tcx| { + tu::for_each_body(tcx, |body_id, wfacts| { + let body = &wfacts.body; + let mapper = IRMapper::new(tcx, body, GatherMode::IgnoreCleanup); + let mut visitor = HirStepPoints::make(&tcx, body, body_id, &mapper) + .expect("Failed to create stepper"); + visitor.visit_nested_body(body_id); + + if let Some(uf) = visitor.get_unsupported_feature() { + eprintln!("unsupported feature: {uf:?}"); + panic!("unsupported feature"); + } + + if let Some(ie) = visitor.get_internal_error() { + eprintln!("internal error: {ie:?}"); + panic!("internal error"); + } + + let smir = visitor + .mir_segments + .freeze() + .expect("Failed to freeze SegmentedMirBuilder"); + + if let Err(invalid) = smir.validate(&mapper) { + eprintln!("invalid reason: {invalid:?}"); + panic!("invalid smir"); + } + }) + }) + }; + } + + // Compile a piece of Rust code and assert that the generated SegmentedMir + // structure is valid. See `is_valid` for more details on what that means. + macro_rules! test_valid_segmented_mir { + (panics_with $s:expr => $name:ident, $code:expr) => { + #[test] + #[should_panic(expected = $s)] + fn $name() { + compile_and_run!($code); + } + }; + (should_panic => $name:ident, $code:expr) => { + #[test] + #[should_panic] + fn $name() { + compile_and_run!($code); + } + }; + ($name:ident, $code:expr) => { + #[test] + fn $name() { + compile_and_run!($code); + } + }; + } + + test_valid_segmented_mir!( + linear_stmts, + r#" +fn test() { + let a = String::from(""); + let b = &a; + let c = &&b; + println!("{c}"); + let d = &&&&&&c; + println!("{d} {}", 1 + 1 + 1 + 1 + 1 + 1); +} +"# + ); + + test_valid_segmented_mir!( + branch_simple, + r#" +fn test() { + let s = String::from(""); + + if true { + let b1 = &mut s; + b1.push_str("No!"); + } else { + let b1 = &mut s; + b1.push_str("Never!"); + } + + println!("{s}"); +} +"# + ); + + test_valid_segmented_mir!( + match_simple, + r#" +fn test(n: Option) -> i32 { + match n { + Some(n) => 1, + None => 0, + } +} +"# + ); + + test_valid_segmented_mir!( + match_with_child, + r#" +fn test(n: Option) -> i32 { + match n { + Some(0) => 1, + Some(n) => test(Some(n - 1)) * n, + None => 0, + } +} +"# + ); + + // ----------------------------------- + // Functions taken from weird_exprs.rs + // + // These merely test the resilience of + // the stepper, and none of them have + // been inspected to see if the visual + // output is worth anything. + + test_valid_segmented_mir!( + weird_exprs_strange, + r#" +fn strange() -> bool { let _x: bool = return true; } +"# + ); + + test_valid_segmented_mir!( + weird_exprs_funny, + r#" +fn funny() { + fn f(_x: ()) { } + f(return); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_what, + r#" +use std::cell::Cell; +fn what() { + fn the(x: &Cell) { + return while !x.get() { x.set(true); }; + } + let i = &Cell::new(false); + let dont = {||the(i)}; + dont(); + assert!((i.get())); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_zombiejesus, + r#" +fn zombiejesus() { + loop { + while (return) { + if (return) { + match (return) { + 1 => { + if (return) { + return + } else { + return + } + } + _ => { return } + }; + } else if (return) { + return; + } + } + if (return) { break; } + } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_notsure, + r#" +use std::mem::swap; +fn notsure() { + let mut _x: isize; + let mut _y = (_x = 0) == (_x = 0); + let mut _z = (_x = 0) < (_x = 0); + let _a = (_x += 0) == (_x = 0); + let _b = swap(&mut _y, &mut _z) == swap(&mut _y, &mut _z); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_canttouchthis, + r#" +fn canttouchthis() -> usize { + fn p() -> bool { true } + let _a = (assert!((true)) == (assert!(p()))); + let _c = (assert!((p())) == ()); + let _b: bool = (println!("{}", 0) == (return 0)); +} +"# + ); + + // XXX: The HIR constructs that turn into NOPs, e.g., the + // `loop { if break {} }` are not present in the + // simplified MIR, which currently causes a few issues. + test_valid_segmented_mir!( + panics_with "invalid smir" => + weird_exprs_angrydome, + r#" +fn angrydome() { + loop { if break { } } + let mut i = 0; + loop { i += 1; if i == 1 { match (continue) { 1 => { }, _ => panic!("wat") } } + break; } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_evil_lincoln, + r#" +fn evil_lincoln() { let _evil = println!("lincoln"); } +"# + ); + + test_valid_segmented_mir!( + weird_exprs_dots, + r#" +fn dots() { + assert_eq!(String::from(".................................................."), + format!("{:?}", .. .. .. .. .. .. .. .. .. .. .. .. .. + .. .. .. .. .. .. .. .. .. .. .. ..)); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_u8, + r#" +fn u8(u8: u8) { + if u8 != 0u8 { + assert_eq!(8u8, { + macro_rules! u8 { + (u8) => { + mod u8 { + pub fn u8<'u8: 'u8 + 'u8>(u8: &'u8 u8) -> &'u8 u8 { + "u8"; + u8 + } + } + }; + } + + u8!(u8); + let &u8: &u8 = u8::u8(&8u8); + ::u8(0u8); + u8 + }); + } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_fishy, + r#" +fn fishy() { + assert_eq!(String::from("><>"), + String::<>::from::<>("><>").chars::<>().rev::<>().collect::()); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_union, + r#" +fn union() { + union union<'union> { union: &'union union<'union>, } +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_punch_card, + r#" +fn punch_card() -> impl std::fmt::Debug { + ..=..=.. .. .. .. .. .. .. .. .. .. .. ..=.. .. + ..=.. ..=.. .. .. .. .. .. .. .. .. ..=..=..=.. + ..=.. ..=.. ..=.. ..=.. .. ..=..=.. .. ..=.. .. + ..=..=.. .. ..=.. ..=.. ..=.. .. .. .. ..=.. .. + ..=.. ..=.. ..=.. ..=.. .. ..=.. .. .. ..=.. .. + ..=.. ..=.. ..=.. ..=.. .. .. ..=.. .. ..=.. .. + ..=.. ..=.. .. ..=..=.. ..=..=.. .. .. ..=.. .. +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_rmatch, + r#" + fn r#match() { + let val = match match match match match () { + () => () + } { + () => () + } { + () => () + } { + () => () + } { + () => () + }; + assert_eq!(val, ()); + } + "# + ); + + test_valid_segmented_mir!( + weird_exprs_i_yield, + r#" +fn i_yield() { + static || { + yield yield yield yield yield yield yield yield yield; + }; +} +"# + ); + + // XXX: arm guards are not currently supported. + test_valid_segmented_mir!( + panics_with "unsupported feature" => + weird_exprs_match_nested_if, + r#" +fn match_nested_if() { + let val = match () { + () if if if if true {true} else {false} {true} else {false} {true} else {false} => true, + _ => false, + }; + assert!(val); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_monkey_barrel, + r#" +fn monkey_barrel() { + let val = ()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=()=(); + assert_eq!(val, ()); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_𝚌𝚘𝚗𝚝𝚒𝚗𝚞𝚎, + r#" +fn 𝚌𝚘𝚗𝚝𝚒𝚗𝚞𝚎() { + type 𝚕𝚘𝚘𝚙 = i32; + fn 𝚋𝚛𝚎𝚊𝚔() -> 𝚕𝚘𝚘𝚙 { + let 𝚛𝚎𝚝𝚞𝚛𝚗 = 42; + return 𝚛𝚎𝚝𝚞𝚛𝚗; + } + assert_eq!(loop { + break 𝚋𝚛𝚎𝚊𝚔 (); + }, 42); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_function, + r#" +fn function() { + struct foo; + impl FnOnce<()> for foo { + type Output = foo; + extern "rust-call" fn call_once(self, _args: ()) -> Self::Output { + foo + } + } + let foo = foo () ()() ()()() ()()()() ()()()()(); +} +"# + ); + + // The match will desugar to something with an + // arm guard which are NYI. + test_valid_segmented_mir!( + panics_with "unsupported feature" => + weird_exprs_bathroom_stall, + r#" +fn bathroom_stall() { + let mut i = 1; + matches!(2, _|_|_|_|_|_ if (i+=1) != (i+=1)); + assert_eq!(i, 13); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_closure_matching, + r#" +fn closure_matching() { + let x = |_| Some(1); + let (|x| x) = match x(..) { + |_| Some(2) => |_| Some(3), + |_| _ => unreachable!(), + }; + assert!(matches!(x(..), |_| Some(4))); +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_semisemisemisemisemi, + r#" +fn semisemisemisemisemi() { + ;;;;;;; ;;;;;;; ;;; ;;; ;; + ;; ;; ;;;; ;;;; ;; + ;;;;;;; ;;;;; ;; ;;;; ;; ;; + ;; ;; ;; ;; ;; ;; + ;;;;;;; ;;;;;;; ;; ;; ;; +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_useful_syntax, + r#" +fn useful_syntax() { + use {{std::{{collections::{{HashMap}}}}}}; + use ::{{{{core}, {std}}}}; + use {{::{{core as core2}}}}; +} +"# + ); + + test_valid_segmented_mir!( + weird_exprs_infcx, + r#" +fn infcx() { + pub mod cx { + pub mod cx { + pub use super::cx; + pub struct Cx; + } + } + let _cx: cx::cx::Cx = cx::cx::cx::cx::cx::Cx; +} +"# + ); +} diff --git a/crates/aquascope/src/analysis/stepper/mod.rs b/crates/aquascope/src/analysis/stepper/mod.rs index a7c1eccc8..f1be6929f 100644 --- a/crates/aquascope/src/analysis/stepper/mod.rs +++ b/crates/aquascope/src/analysis/stepper/mod.rs @@ -1,27 +1,32 @@ //! Analysis for the “Missing-at” relations. -mod find_steps; -mod segment_tree; +mod hir_steps; +#[allow(clippy::similar_names)] +mod segmented_mir; +mod table_builder; use std::collections::hash_map::Entry; -use anyhow::Result; -pub use find_steps::compute_permission_steps; +use anyhow::{bail, Result}; use fluid_let::fluid_let; -use rustc_data_structures::fx::FxHashMap as HashMap; -use rustc_middle::mir::Place; +use rustc_data_structures::{self, fx::FxHashMap as HashMap}; +use rustc_hir::intravisit::Visitor as HirVisitor; +use rustc_middle::mir::{Location, Place}; +use rustc_span::Span; use rustc_utils::source_map::range::CharRange; use serde::{Deserialize, Serialize}; use ts_rs::TS; use crate::analysis::{ - permissions::{Permissions, PermissionsData, PermissionsDomain}, + permissions::{ + Permissions, PermissionsCtxt, PermissionsData, PermissionsDomain, + }, AquascopeAnalysis, LoanKey, MoveKey, }; fluid_let!(pub static INCLUDE_MODE: PermIncludeMode); -#[derive(Debug, PartialEq, Eq, Clone, Copy, Deserialize, Serialize, Hash)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Hash)] pub enum PermIncludeMode { Changes, All, @@ -61,18 +66,34 @@ pub struct PermissionsLineDisplay { pub state: Vec, } -#[derive(Clone, PartialEq, Eq, Serialize, TS)] -#[serde(tag = "type")] -#[ts(export)] -pub enum ValueStep -where - A: Clone +pub trait Stepable: + Copy + + Clone + + std::fmt::Debug + + std::cmp::PartialEq + + std::cmp::Eq + + std::hash::Hash + + Serialize + + TS +{ +} + +impl Stepable for A where + A: Copy + + Clone + std::fmt::Debug + std::cmp::PartialEq + std::cmp::Eq + + std::hash::Hash + Serialize - + TS, + + TS { +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize, TS)] +#[serde(tag = "type")] +#[ts(export)] +pub enum ValueStep { High { value: A, }, @@ -83,15 +104,19 @@ where }, } -impl std::fmt::Debug for ValueStep -where - A: Clone - + std::fmt::Debug - + std::cmp::PartialEq - + std::cmp::Eq - + Serialize - + TS, -{ +impl ValueStep { + // TODO: this is a loose surface-level notion of symmetry. + fn is_symmetric_diff(&self, rhs: &Self) -> bool { + matches!( + (self, rhs), + (ValueStep::High { .. }, ValueStep::Low { .. }) + | (ValueStep::Low { .. }, ValueStep::High { .. }) + | (ValueStep::None { .. }, ValueStep::None { .. }) + ) + } +} + +impl std::fmt::Debug for ValueStep { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ValueStep::High { .. } => write!(f, "↑"), @@ -110,7 +135,7 @@ where // the default BoolStep can be taken. macro_rules! make_diff { ($base:ident => $diff:ident { $($i:ident),* }) => { - #[derive(Clone, PartialEq, Eq, Serialize, TS)] + #[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize, TS)] #[ts(export)] pub struct $diff { $( pub $i: ValueStep, )* @@ -134,7 +159,7 @@ impl std::fmt::Debug for PermissionsDiff { } } -#[derive(Clone, Serialize, TS, PartialEq, Eq)] +#[derive(Copy, Clone, Serialize, TS, PartialEq, Eq, Hash)] #[ts(export)] pub struct PermissionsDataDiff { pub is_live: ValueStep, @@ -166,6 +191,15 @@ impl PermissionsDataDiff { fn is_empty(&self) -> bool { self.permissions.is_empty() } + + fn is_symmetric_diff(&self, rhs: &PermissionsDataDiff) -> bool { + let p1 = &self.permissions; + let p2 = &rhs.permissions; + + p1.read.is_symmetric_diff(&p2.read) + && p1.write.is_symmetric_diff(&p2.write) + && p1.drop.is_symmetric_diff(&p2.drop) + } } impl Difference for bool { @@ -181,19 +215,13 @@ impl Difference for bool { } } -impl ValueStep -where - T: Clone + std::fmt::Debug + std::cmp::PartialEq + Eq + Serialize + TS, -{ +impl ValueStep { fn is_empty(&self) -> bool { matches!(self, Self::None { .. }) } } -impl Difference for Option -where - A: Clone + PartialEq + Eq + std::fmt::Debug + Serialize + TS, -{ +impl Difference for Option { type Diff = ValueStep; fn diff(&self, rhs: Option) -> Self::Diff { @@ -246,22 +274,91 @@ impl Difference for PermissionsData { impl<'tcx> Difference for &PermissionsDomain<'tcx> { type Diff = HashMap, PermissionsDataDiff>; fn diff(&self, rhs: &PermissionsDomain<'tcx>) -> Self::Diff { - self - .iter() - .fold(HashMap::default(), |mut acc, (place, p1)| { - let p2 = rhs.get(place).unwrap(); - let diff = p1.diff(*p2); - - match acc.entry(*place) { - Entry::Occupied(_) => { - panic!("Permissions step already in output for {place:?}"); - } - Entry::Vacant(entry) => { - entry.insert(diff); - } + let mut diffs = HashMap::default(); + + for (place, p1) in self.iter() { + let p2 = rhs.get(place).unwrap(); + let diff = p1.diff(*p2); + + match diffs.entry(*place) { + Entry::Occupied(_) => { + panic!("Permissions step already in output for {place:?}"); + } + Entry::Vacant(entry) => { + entry.insert(diff); } + } + } + + diffs + } +} - acc - }) +/// Represents a segment of the MIR control-flow graph. +/// +/// A `MirSegment` corresponds directly to locations where a permissions step +/// will be made. However, a segment is also control-flow specific. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct MirSegment { + pub from: Location, + pub to: Location, +} + +impl std::fmt::Debug for MirSegment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "MirSegment({:?} -> {:?})", self.from, self.to) } } + +impl MirSegment { + pub fn new(l1: Location, l2: Location) -> Self { + MirSegment { from: l1, to: l2 } + } + + /// A _rough_ approximation of the source span of the step. + pub fn span(&self, ctxt: &PermissionsCtxt) -> Span { + let lo = ctxt.location_to_span(self.from); + let hi = ctxt.location_to_span(self.to); + lo.with_hi(hi.hi()) + } + + pub fn into_diff<'tcx>( + self, + ctxt: &PermissionsCtxt<'_, 'tcx>, + ) -> HashMap, PermissionsDataDiff> { + let p0 = ctxt.location_to_point(self.from); + let p1 = ctxt.location_to_point(self.to); + let before = &ctxt.permissions_domain_at_point(p0); + let after = &ctxt.permissions_domain_at_point(p1); + before.diff(after) + } +} + +// ---------- +// Main entry + +pub fn compute_permission_steps<'a, 'tcx>( + analysis: &AquascopeAnalysis<'a, 'tcx>, +) -> Result> +where + 'tcx: 'a, +{ + let mode = INCLUDE_MODE.copied().unwrap_or(PermIncludeMode::Changes); + let ctxt = &analysis.permissions; + let ir_mapper = &analysis.ir_mapper; + let body = &ctxt.body_with_facts.body; + let _basic_blocks = body.basic_blocks.indices(); + let mut hir_visitor = + hir_steps::HirStepPoints::make(&ctxt.tcx, body, ctxt.body_id, ir_mapper)?; + hir_visitor.visit_nested_body(ctxt.body_id); + + if let Some(msg) = hir_visitor.get_unsupported_feature() { + bail!(msg); + } + + if let Some(fatal_error) = hir_visitor.get_internal_error() { + bail!(fatal_error); + } + + hir_visitor.finalize(analysis, mode) +} diff --git a/crates/aquascope/src/analysis/stepper/segment_tree.rs b/crates/aquascope/src/analysis/stepper/segment_tree.rs deleted file mode 100644 index 9ff5d09ab..000000000 --- a/crates/aquascope/src/analysis/stepper/segment_tree.rs +++ /dev/null @@ -1,375 +0,0 @@ -use anyhow::{bail, Result}; -use rustc_data_structures::{ - self, - fx::{FxHashMap as HashMap, FxHashSet as HashSet}, -}; -use rustc_middle::mir::{BasicBlock, BasicBlocks, Local, Location, Place}; -use rustc_span::Span; - -use super::*; -use crate::analysis::{ir_mapper::CleanedBody, permissions::PermissionsCtxt}; - -/// Represents a segment of the MIR control-flow graph. -/// -/// A `MirSegment` corresponds directly to locations where a permissions step -/// will be made. However, a segment is also control-flow specific. -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct MirSegment { - pub from: Location, - pub to: Location, -} - -/// The types of splits that can be performed on a [`SegmentTre::Single`]. -#[derive(Clone)] -pub enum SplitType { - /// A split of a segment that is not due to control flow. - /// Example, after each `Stmt` a step is created, this is simply - /// a step in a linear sequence. - Linear { - first: Box, - second: Box, - }, - - /// Split of a complex control flow. - /// For example, the `split_segments` of an `ExprKind::If` would be the segments - /// from the if condition, to the start of the then / else blocks. - /// The `join_segments` are all the those that end at the same join point. - /// - /// NOTE: any segment stored in the `splits` of a SplitType::ControlFlow - /// can not be split again, these are *atomic*. - ControlFlow { - splits: Vec, - joins: Vec, - }, -} - -/// A `SegmentTree` represents the control flow graph of a MIR `Body`. -/// It's used to keep track of the entire graph as it is sliced during -/// the permission steps analysis. -#[derive(Clone)] -pub enum SegmentTree { - /// An inner tree node with children. - Split { - segments: SplitType, - reach: MirSegment, - span: Span, - attached: Vec, - }, - - /// A leaf segment that is expected to be split again later. - Single { - segment: MirSegment, - span: Span, - attached: Vec, - }, -} - -/// Search result when trying to find the smallest enclosing segment for a location. -/// -/// NOTE: this is used under the assumption that the location cannot be the -/// ending location of a step (this would result in a zero distance step). -#[derive(Clone, Debug)] -pub enum SegmentSearchResult<'a> { - Enclosing(&'a SegmentTree), - StepExists(MirSegment, Span), - NotFound, -} - -// ------------------------------------------------ -// Debugging pretty printers - -impl std::fmt::Debug for MirSegment { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "MirSegment({:?} -> {:?})", self.from, self.to) - } -} - -impl std::fmt::Debug for SegmentTree { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - fn print_loop( - f: &mut std::fmt::Formatter, - tree: &SegmentTree, - spaces: usize, - ) -> std::fmt::Result { - let indent_size = 4; - match tree { - SegmentTree::Single { - segment, - attached, - span, - .. - } => { - writeln!( - f, - "{}SegmentTree::Single: {segment:?} {span:?}", - " ".repeat(spaces) - )?; - writeln!( - f, - "{}-locals attached to end {:?}", - " ".repeat(spaces), - attached, - ) - } - SegmentTree::Split { - segments: SplitType::Linear { first, second }, - reach, - attached, - .. - } => { - writeln!( - f, - "{}SegmentTree::Split [LINEAR]: {reach:?}", - " ".repeat(spaces) - )?; - writeln!( - f, - "{}-locals attached to end {:?}", - " ".repeat(spaces), - attached, - )?; - print_loop(f, first, spaces + indent_size)?; - writeln!(f)?; - print_loop(f, second, spaces + indent_size)?; - writeln!(f)?; - - Ok(()) - } - - SegmentTree::Split { - segments: SplitType::ControlFlow { splits, joins }, - reach, - attached, - .. - } => { - writeln!( - f, - "{}SegmentTree::Split [CF]: {reach:?}", - " ".repeat(spaces) - )?; - writeln!( - f, - "{}-locals attached to end {:?}", - " ".repeat(spaces), - attached, - )?; - writeln!(f, "{}Splits:", " ".repeat(spaces))?; - for tree in splits.iter() { - print_loop(f, tree, spaces + indent_size)?; - writeln!(f)?; - } - writeln!(f)?; - - writeln!(f, "{}Joins:", " ".repeat(spaces))?; - for tree in joins.iter() { - print_loop(f, tree, spaces + indent_size)?; - writeln!(f)?; - } - - Ok(()) - } - } - } - - print_loop(f, self, 0) - } -} - -// ------------------------------------------------ -// Impls - -impl MirSegment { - pub fn new(l1: Location, l2: Location) -> Self { - MirSegment { from: l1, to: l2 } - } - - /// Expand the path through the segment to a full set of [`Location`]s. - fn squash_block_path( - &self, - basic_blocks: &BasicBlocks, - path: impl Iterator, - ) -> Vec { - path - .flat_map(|bb| { - let bbd = &basic_blocks[bb]; - let from = if bb == self.from.block { - self.from.statement_index - } else { - 0 - }; - - let to = if bb == self.to.block { - self.to.statement_index - } else { - bbd.statements.len() - }; - - (from ..= to).map(move |idx| Location { - block: bb, - statement_index: idx, - }) - }) - .collect::>() - } - - pub(crate) fn paths_along_segment( - &self, - graph: &CleanedBody, - ) -> Vec> { - graph.paths_from_to(self.from.block, self.to.block) - } - - fn spanned_locations(&self, graph: &CleanedBody) -> HashSet { - let block_paths = self.paths_along_segment(graph); - let body = graph.body(); - block_paths - .into_iter() - .flat_map(|path| { - self.squash_block_path(&body.basic_blocks, path.into_iter()) - }) - .collect::>() - } - - pub fn into_diff<'tcx>( - self, - ctxt: &PermissionsCtxt<'_, 'tcx>, - ) -> HashMap, PermissionsDataDiff> { - let p0 = ctxt.location_to_point(self.from); - let p1 = ctxt.location_to_point(self.to); - let before = &ctxt.permissions_domain_at_point(p0); - let after = &ctxt.permissions_domain_at_point(p1); - before.diff(after) - } -} - -impl SegmentTree { - pub fn new(body: MirSegment, span: Span) -> Self { - Self::Single { - segment: body, - span, - attached: vec![], - } - } - - /// Find a [`SegmentTree::Single`] node which matches *exactly* the given segment. - pub fn find_single( - &mut self, - segment: MirSegment, - ) -> Option<&mut SegmentTree> { - let node = &mut *self; - - match node { - SegmentTree::Single { segment: seg, .. } if *seg == segment => Some(node), - SegmentTree::Single { .. } => None, - SegmentTree::Split { - segments: SplitType::ControlFlow { joins, .. }, - .. - } => { - // NOTE: the split set is regarded as atomic so - // it isn't included in the search. - for s in joins.iter_mut() { - let r = s.find_single(segment); - if r.is_some() { - return r; - } - } - - None - } - - SegmentTree::Split { - segments: SplitType::Linear { first, second }, - .. - } => first - .as_mut() - .find_single(segment) - .or_else(|| second.as_mut().find_single(segment)), - } - } - - /// Replace a [`SegmentTree::Single`] node which matches *exactly* the given segment. - /// The subtree must fragment the [`MirSegment`] correctly, otherwise the tree - /// will enter an invalid state. - pub fn replace_single( - &mut self, - to_replace: MirSegment, - subtree: SegmentTree, - ) -> Result<()> { - // TODO better error handling here. - let node = self.find_single(to_replace); - - if node.is_none() { - bail!("the provided mir segment to replace doesn't exist {to_replace:?}"); - } - - let node = node.unwrap(); - - if let SegmentTree::Single { segment, .. } = node { - assert_eq!(to_replace, *segment); - } else { - bail!("SegmentTree::find_single can only return a Single variant. This is an implementation bug"); - } - - *node = subtree; - - Ok(()) - } - - pub(crate) fn subtree_contains( - &self, - location: Location, - graph: &CleanedBody, - ) -> bool { - let segment = match self { - SegmentTree::Split { reach, .. } => reach, - SegmentTree::Single { segment, .. } => segment, - }; - let locs = segment.spanned_locations(graph); - locs.contains(&location) - } - - /// Find the /leaf/ [`MirSegment`] and it's corresponding `Span` that enclose - /// `location`. The `location` is expected to be used as the end of step. - pub(crate) fn find_segment_for_end<'a>( - &'a self, - location: Location, - graph: &CleanedBody, - ) -> SegmentSearchResult<'a> { - match self { - SegmentTree::Single { segment, .. } if segment.to != location => { - SegmentSearchResult::Enclosing(self) - } - - SegmentTree::Single { segment, span, .. } => { - SegmentSearchResult::StepExists(*segment, *span) - } - - SegmentTree::Split { - segments: SplitType::Linear { first, second }, - .. - } => { - if first.subtree_contains(location, graph) { - first.find_segment_for_end(location, graph) - } else if second.subtree_contains(location, graph) { - second.find_segment_for_end(location, graph) - } else { - SegmentSearchResult::NotFound - } - } - - SegmentTree::Split { - segments: SplitType::ControlFlow { joins, .. }, - .. - } => - // NOTE: the split locations are atomic and cannot be split. - { - joins - .iter() - .find(|s| s.subtree_contains(location, graph)) - .map_or(SegmentSearchResult::NotFound, |next| { - next.find_segment_for_end(location, graph) - }) - } - } - } -} diff --git a/crates/aquascope/src/analysis/stepper/segmented_mir.rs b/crates/aquascope/src/analysis/stepper/segmented_mir.rs new file mode 100644 index 000000000..8ca7ed178 --- /dev/null +++ b/crates/aquascope/src/analysis/stepper/segmented_mir.rs @@ -0,0 +1,1030 @@ +//! Internal state for managing permissions steps. +//! +//! The `SegmentedMir` aids the stepper in making sure that +//! steps made are always _valid_. In this context a step is defined +//! as a `MirSegment`, a simple struct that contains a `from` and `to` +//! location defining the step. The finished segmented mir is valid if +//! it satisfies the following criteria: +//! +//! 1. All segments are valid (more on this later). +//! 2. Segments form a total cover of the body under analysis. +//! 3. No location is included in multiple steps (see exceptions to this below). +//! +//! Segment validity is the main crux of the above definition and this is +//! split into three separate definitions. There exist three different kinds +//! of segments (spiritually, they are the same in the code): +//! +//! - Linear segments: a segment representing a linear piece of control flow. +//! A linear segment has a single point of entry and a single exit. Formally, +//! this is defined as: +//! Given a `MirSegment { from, to }`, it is linear iff: +//! `from` dominates `to` and `to` post-dominates `from` +//! These segments are what we ultimately want. +//! +//! - Split segments: a segment representing the start of conditional control-flow. +//! These segments relax the definition of a linear segment, in that the `to` +//! location *does not* post-dominate `from`. These segments are important when +//! representing control-flow given by a `switchInt`. In brief, a `switchInt` +//! will have multiple jump targets based on its argument, and each one of these +//! targets will be made into a split segment, stepping `from` the `switchInt` +//! and stepping `to` the target location. +//! +//! - Join segments: a segment representing the close of conditional control-flow. +//! These segments are the opposite of split segments, and relax the definition +//! of a linear segment by lifting the requirement that `from` dominates `to`. After +//! control-flow has been split (by say, a `switchInt`) join segments represent the +//! steps needed to unify the control-flow again. +//! +//! Unless specified, the word "segment" or "step" always refers to a linear segment. +//! Whenever the stepper says "insert a step ending at location L", this will _always_ +//! result in a linear step as the other two variants need to be explicitly handled. +//! +//! To maintain validity we use a recursive tree that incrementally builds up sequences +//! of linear steps. The tree layout looks (roughly) as follows: +//! +//! ```text +//! type LinearSegment = MirSegment +//! type SplitSegment = MirSegment +//! type JoinSegment = MirSegment +//! +//! data ControlFlow = Linear LinearSegment +//! | Branch +//! { splits :: [SplitSegment] +//! , joins :: [JoinSegment] +//! , nested :: Collection +//! } +//! +//! data Collection = [ControlFlow] +//! ``` +//! +//! To build this tree we manage a set of `CollectionBuilder`s, these +//! store the last `Location` from a step, and only allow inserting a +//! linear step into a collection. The exact process won't be outlined here, +//! but the stepper will open a branch when it encounters an `if` or `match`, +//! this opening will then create a new builder for each branch target. Builders +//! are then destroyed when either (1) it has reached a stopping point as +//! previously specified by the stepper, or (2) the branch that spawned the builder +//! is being closed. +//! +//! There is a little more to the process than this, for example: making sure that +//! branches and segments are created within the natural structure of the MIR and only +//! inserting steps in previously "unstepped" areas. But for those really curious +//! feel free to start at the [`SegmentedMirBuilder::insert`] function and explore +//! from there. + +use anyhow::{anyhow, bail, ensure, Result}; +use rustc_data_structures::{ + frozen::Frozen, + fx::{FxHashMap as HashMap, FxHashSet as HashSet}, + graph::*, + transitive_relation::{TransitiveRelation, TransitiveRelationBuilder}, + unify::{InPlaceUnificationTable, UnifyKey}, +}; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_middle::mir::{BasicBlock, Location}; +use rustc_span::Span; + +use super::MirSegment; +use crate::analysis::ir_mapper::IRMapper; + +// -------------------------- +// Decls sections + +rustc_index::newtype_index! { + pub(super) struct SegmentId {} +} + +rustc_index::newtype_index! { + pub(super) struct BranchId {} +} + +rustc_index::newtype_index! { + /// Collections are groups of segments thare nest. + /// E.g., when a branch contains another branch. + /// These are controlled internally. + pub(super) struct CollectionId {} +} + +rustc_index::newtype_index! { + /// Scopes are controlled at the segment-level + /// and controlled by the caller. + pub(super) struct ScopeId {} +} + +rustc_index::newtype_index! { + pub(super) struct TableId {} +} + +impl UnifyKey for TableId { + type Value = (); + + fn index(&self) -> u32 { + self.as_u32() + } + + fn from_index(i: u32) -> Self { + Self::from_u32(i) + } + + fn tag() -> &'static str { + "TableId" + } +} + +lazy_static::lazy_static! { + static ref BASE_SCOPE: ScopeId = ScopeId::new(0); +} + +#[derive(Copy, Clone, Debug)] +#[allow(dead_code)] +enum LengthKind { + Bounded { + /// Entry location for the collection, location + /// must dominate all locations contained within the collection. + root: Location, + phi: Location, + }, + Unbounded { + /// Exit location (if it exists) where control flow must leave, + /// if a phi exists then it must post-dominate all locations + /// contained within the collection. + root: Location, + }, +} + +#[derive(Debug)] +pub(super) struct SegmentData { + pub(super) segment: MirSegment, + pub(super) span: Span, + pub(super) scope: ScopeId, +} + +#[derive(Debug)] +pub(super) struct BranchData { + table_id: TableId, + pub(super) reach: MirSegment, + + /// Split segments, `from` dominates `to` but `to` does not post-dominate `from`. + pub(super) splits: Vec, + + // NOTE: join segments aren't currently used for anything. Previously we + // had lots of complex logic dictating when the join steps should be + // included but through lots of testing it seemed that the visual results + // we wanted _never_ used the join steps. We still keep them around in + // case a counterexample to that is found, or until I(gavinleroy) can + // come up with a sufficient formal reason why we don't need them. + // See the documentation in `table_builder` for more details. + /// Join segments, `to` post-dominates `from` but `from` does not post-dominate `to`. + #[allow(dead_code)] + pub(super) joins: Vec, + + pub(super) nested: Vec, +} + +#[derive(Copy, Clone, Debug)] +pub(super) enum CFKind { + Linear(SegmentId), + Branch(BranchId), +} + +#[derive(Debug)] +pub(super) struct Collection { + pub(super) data: Vec, + kind: LengthKind, +} + +#[derive(Copy, Clone, Debug)] +struct CollectionBuilder { + collection: CollectionId, + current_location: Location, +} + +#[derive(Copy, Clone, Debug)] +struct BuilderIdx(usize); + +#[derive(Copy, Clone)] +enum FindResult { + None, + NonLinear(BranchId, Location), + Linear(BuilderIdx), +} + +#[derive(Debug, Default)] +struct OpenCollections(Vec); + +type BranchSpannerMap<'a> = + HashMap Span + 'a>>; + +pub(super) struct SegmentedMirBuilder<'a, 'tcx: 'a> { + mapper: &'a IRMapper<'a, 'tcx>, + first_collection: CollectionId, + root_mappings: BranchSpannerMap<'a>, + collections: IndexVec, + branches: IndexVec, + segments: IndexVec, + processing: OpenCollections, + branch_roots: InPlaceUnificationTable, + scope_graph: TransitiveRelationBuilder, + open_scopes: Vec, + next_scope: ScopeId, +} + +pub(super) struct SegmentedMir { + pub(super) first_collection: CollectionId, + collections: Frozen>, + branches: Frozen>, + segments: Frozen>, + scopes: TransitiveRelation, +} + +// -------------------------- +// Impl sections + +impl BranchData { + pub fn new(tid: TableId, root: Location, phi: Option) -> Self { + let to = phi.unwrap_or(root); + BranchData { + table_id: tid, + reach: MirSegment::new(root, to), + splits: Vec::default(), + joins: Vec::default(), + nested: Vec::default(), + } + } +} + +#[allow(dead_code)] +impl OpenCollections { + pub fn push(&mut self, c: CollectionBuilder) { + self.0.push(c) + } + + pub fn iter(&self) -> impl Iterator + '_ { + // Open collections are pushed on the end, but we want to search + // in the most recently pushed by reverse the Vec::iter + self.0.iter().rev() + } + + pub fn enumerate( + &self, + ) -> impl Iterator + '_ { + // Open collections are pushed on the end, but we want to search + // in the most recently pushed by reverse the Vec::iter + self + .0 + .iter() + .enumerate() + .map(|(i, o)| (BuilderIdx(i), o)) + .rev() + } + + pub fn iter_mut( + &mut self, + ) -> impl Iterator + '_ { + // Open collections are pushed on the end, but we want to search + // in the most recently pushed, thus using reversing. + self.0.iter_mut().rev() + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn drain_collections<'a, 'this: 'a>( + &'this mut self, + cids: &'a HashSet, + ) -> impl Iterator + 'a { + self.0.drain_filter(|cb| cids.contains(&cb.collection)) + } + + pub fn get(&self, i: BuilderIdx) -> &CollectionBuilder { + &self.0[i.0] + } + + pub fn get_mut(&mut self, i: BuilderIdx) -> &mut CollectionBuilder { + &mut self.0[i.0] + } + + pub fn clear(&mut self) { + self.0.clear() + } +} + +impl std::fmt::Debug for SegmentedMirBuilder<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "#") + } +} + +impl SegmentedMir { + pub(super) fn segments(&self) -> impl Iterator + '_ { + self.segments.iter().map(|sd| sd.segment) + } + + pub fn get_branch_scope(&self, bid: BranchId) -> ScopeId { + let branch = self.get_branch(bid); + let sid = branch.splits[0]; + let segment = self.get_segment(sid); + segment.scope + } + + pub fn get_collection(&self, cid: CollectionId) -> &Collection { + &self.collections[cid] + } + + pub fn get_segment(&self, sid: SegmentId) -> &SegmentData { + &self.segments[sid] + } + + pub fn get_branch(&self, bid: BranchId) -> &BranchData { + &self.branches[bid] + } + + /// Returns all ancestor scopes excluding `scope`. + pub fn parent_scopes( + &self, + scope: ScopeId, + ) -> impl Iterator + '_ { + self.scopes.reachable_from(scope).into_iter() + } +} + +enum GetSpanner<'a> { + GetFrom(BranchId), + InsertNew(Box Span + 'a>), +} + +impl<'a, 'tcx: 'a> SegmentedMirBuilder<'a, 'tcx> { + pub fn make(mapper: &'a IRMapper<'a, 'tcx>) -> Self { + let from = mapper.cleaned_graph.start_node().start_location(); + + let mut collections = IndexVec::new(); + + // We start with an empty linear collection. + // XXX: we could also try to find the exit location for the + // entire body but having this information isn't useful + // for the end of the body. Phi nodes are used to make + // sure we don't accidentally jump past the end of a + // join but with the return there isn't anything after. + let first_collection = collections.push(Collection { + data: Vec::default(), + kind: LengthKind::Unbounded { root: from }, + }); + + let mut this = Self { + first_collection, + mapper, + root_mappings: HashMap::default(), + collections, + branches: IndexVec::default(), + segments: IndexVec::default(), + processing: OpenCollections::default(), + branch_roots: InPlaceUnificationTable::default(), + scope_graph: TransitiveRelationBuilder::default(), + // NOTE: this maintains that there is always + // an open scope that the visitor cannot close. + open_scopes: vec![*BASE_SCOPE], + next_scope: BASE_SCOPE.plus(1), + }; + + this.processing.push(CollectionBuilder { + collection: first_collection, + current_location: mapper.cleaned_graph.start_node().start_location(), + }); + + this + } + + fn finish_first_collection(&mut self) -> Result<()> { + ensure!(self.processing.len() == 1, "More than one collection open"); + self.processing.clear(); + Ok(()) + } + + pub fn freeze(mut self) -> Result { + self.finish_first_collection()?; + + Ok(SegmentedMir { + first_collection: self.first_collection, + segments: Frozen::freeze(self.segments), + branches: Frozen::freeze(self.branches), + collections: Frozen::freeze(self.collections), + scopes: self.scope_graph.freeze(), + }) + } + + fn next_scope(&mut self) -> ScopeId { + let next = self.next_scope; + // The scope graph is used to find _parent scopes_. + self.scope_graph.add(next, self.current_scope()); + self.next_scope.increment_by(1); + next + } + + // ------------------------------------------------ + // Scope operations + // + // NOTE: scopes are controlled by the HIR Visitor + // so we don't need to sanitize them at all. + // They return Results to match the interface + // of everything else though. + + // NOTE: After starting a body analysis this should never be None. + fn current_scope(&self) -> ScopeId { + *self.open_scopes.last().unwrap() + } + + pub fn open_scope(&mut self) -> Result { + let next_scope = self.next_scope(); + self.open_scopes.push(next_scope); + Ok(next_scope) + } + + pub fn close_scope(&mut self, idx: ScopeId) -> Result<()> { + ensure!(idx != *BASE_SCOPE, "cannot close base scope"); + + let last_open = self.open_scopes.last().ok_or(anyhow!("no open scopes"))?; + + ensure!( + *last_open == idx, + "closing wrong scope expected: {last_open:?} given: {idx:?}" + ); + + self.open_scopes.pop(); + Ok(()) + } + + // ----------------- + // Branch operations + + /// Finds the basic block that is the last post-dominator of the successors of `root`. + fn least_post_dominator(&self, root: BasicBlock) -> Option { + log::debug!("Finding the least post-dominator for root {root:?}"); + let mapper = &self.mapper; + + // Find all basic blocks that are reachable from the root. + let reachable = mapper + .cleaned_graph + .depth_first_search(root) + .filter(|&to| mapper.dominates(root, to)) + .collect::>(); + + // Find the blocks that is the _most_ post-dominating, + // this is a point that must post-dominate everything else. + let most_post_dominating = reachable + .iter() + .find(|&can| reachable.iter().all(|&n| mapper.post_dominates(*can, n)))?; + + // If a block dominates the "most post-dominator" that means that this + // block also post-dominates all branches that occur after the root. + // We exclude the (1) root itself, and (2) any false edges. False edges + // are common in loop lowering but the borrowck semantics indicate that + // we should consider points *after* the false edges as having left the branches. + let candidate_leasts = reachable + .iter() + .filter(|&can| { + *can != root + && !mapper.cleaned_graph.is_false_edge(*can) + && mapper.dominates(*can, *most_post_dominating) + }) + .collect::>(); + + // The least post-dominator dominates all the other post-dominators. + candidate_leasts + .iter() + .find(|&can| { + candidate_leasts + .iter() + .all(|&n| mapper.dominates(**can, *n)) + }) + .copied() + .copied() + } + + fn mk_branch( + &mut self, + location: Location, + get_span: GetSpanner<'a>, + ) -> Result { + let mapper = &self.mapper; + let scope = self.current_scope(); + + // The convergence of all branching paths. + let phi_opt = self + .least_post_dominator(location.block) + .map(|bb| bb.start_location()); + + log::debug!("Chosen least-post-dominator: {phi_opt:?}"); + + let builder_opt = self + .processing + .iter_mut() + .find(|cb| mapper.ldominates(cb.current_location, location)); + + let Some(builder) = builder_opt else { + bail!("no open collection dominates root location {location:?}"); + }; + + ensure!( + builder.current_location == location, + "opening a branch missed a step, expected {:?} given: {:?}", + builder.current_location, + location + ); + + // Make a new branch + let tid = self.branch_roots.new_key(()); + let bid = self.branches.push(BranchData::new(tid, location, phi_opt)); + let branch = &mut self.branches[bid]; + + // Save the Location -> Span mappings under this root BranchId. + let get_span = match get_span { + GetSpanner::InsertNew(b) => { + self.root_mappings.insert(bid, b); + &self.root_mappings[&bid] + } + GetSpanner::GetFrom(bid) => &self.root_mappings[&bid], + }; + + // Push the new Branch as a control flow kind on + // the current collection's data set. + self.collections[builder.collection] + .data + .push(CFKind::Branch(bid)); + + let length_kind = if let Some(phi) = phi_opt { + builder.current_location = phi; + LengthKind::Bounded { + root: location, + phi, + } + } else { + // TODO: how should we update the collection if there + // isn't a phi? My current feeling is that we should + // just close the collection. + LengthKind::Unbounded { root: location } + }; + + // For each of the target BasicBlocks of the switchInt: + for sblock in mapper.cleaned_graph.successors(location.block) { + // 1. insert the split segment into the branch + let mut to = sblock.start_location(); + let span = get_span(&mut to); + let sid = self.segments.push(SegmentData { + segment: MirSegment::new(location, to), + span, + scope, + }); + branch.splits.push(sid); + + // 2. Open a new Collection with it's starting + // location at the branch target location. + let cid = self.collections.push(Collection { + data: Vec::default(), + kind: length_kind, + }); + + // 3. Store this new collection in the branch middle section. + branch.nested.push(cid); + + // 4. Put a new collection builder on the open collection stack. + self.processing.push(CollectionBuilder { + collection: cid, + current_location: to, + }); + } + + Ok(bid) + } + + /// Opens a branch of control flow rooted at `location`. + /// + /// The function implicitly adds a new segment for all split steps + /// and `get_span` should return the associated Span for these split steps. + pub fn open_branch( + &mut self, + location: Location, + get_span: impl Fn(&mut Location) -> Span + 'a, + ) -> Result { + log::debug!("opening user initiated branch at {location:?}"); + log::debug!("open branches BEFORE {:#?}", self.processing); + let r = self.mk_branch(location, GetSpanner::InsertNew(Box::new(get_span))); + log::debug!("open branches AFTER {:#?}", self.processing); + r + } + + fn open_child_branch( + &mut self, + parent: BranchId, + root: Location, + ) -> Result<()> { + log::debug!("opening implicit branch at {root:?}"); + let child = self.mk_branch(root, GetSpanner::GetFrom(parent))?; + let parent_tid = self.branches[parent].table_id; + let child_tid = self.branches[child].table_id; + self.branch_roots.union(parent_tid, child_tid); + Ok(()) + } + + /// Closes a branch of control flow with an origin root of `location`. + /// + /// Contrary to previous implementations, the function does not implicitly + /// add a new segment for all split steps. + pub fn close_branch(&mut self, bid: BranchId) -> Result<()> { + let table_root = self.branches[bid].table_id; + + let branches_to_close = self + .branches + .iter_enumerated() + .filter_map(|(bid, bd)| { + (table_root == self.branch_roots.find(bd.table_id)).then_some(bid) + }) + .collect::>(); + + for bid in branches_to_close.into_iter() { + let branch = &mut self.branches[bid]; + + let nested_collections = + branch.nested.iter().copied().collect::>(); + + let closed_builders = + self.processing.drain_collections(&nested_collections); + + log::debug!( + "closing builders: {:#?}", + closed_builders.collect::>() + ); + } + + log::debug!("State after closing branches {:#?}", self.processing); + + Ok(()) + } + + fn find_containing_branch(&self, cid: CollectionId) -> Option { + self + .branches + .iter_enumerated() + .find_map(|(bid, branch)| branch.nested.contains(&cid).then_some(bid)) + } + + /// Search through the list of open builders and return the one that can + /// be used to insert a new step ending at `location`. + fn find_suitable_collection(&mut self, location: Location) -> FindResult { + let mapper = &self.mapper; + + // We can insert into a collection where the last location + // was the dominates the new location to insert. + let builder_opt = self.processing.enumerate().find_map(|(i, cb)| { + log::debug!("Trying to find open collection: {cb:?}"); + mapper + .ldominates(cb.current_location, location) + .then_some((i, cb)) + }); + + // No collection found + let Some((builder_i, builder)) = builder_opt else { + return FindResult::None; + }; + + // Return the found builder to create a new linear step. + if mapper.lpost_dominates(location, builder.current_location) { + log::debug!( + "location post-dominates builder: {location:?} {:?} {:?}", + builder.current_location, + builder_i + ); + return FindResult::Linear(builder_i); + } + + // Fallback case for when we want to open an implicit branch. However, + // if there doesn't exist a parent branch, this is just an internal error. + match self.find_containing_branch(builder.collection) { + None => { + log::error!("couldn't find branch containing {:?}", builder.collection); + FindResult::None + } + Some(bid) => FindResult::NonLinear(bid, builder.current_location), + } + } + + // ---------- + // Insertions + + /// Insert a step ending at the given `Location`. + /// + /// It's the `SegmentedMir`s job to find out where the step came from, + /// in the case of ambiguity the given path hint can be used, this + /// proves most usefull when an implicit branch child needs to be spawned. + /// See the doc comment for further details. + pub fn insert( + &mut self, + location: Location, + path_hint: Option, + span: Span, + ) -> Result<()> { + log::debug!( + "starting insertion with hint {path_hint:?} at {location:?} \ninto: {:?}", + self.processing + ); + + match self.find_suitable_collection(location) { + // BAD case, no dominating locations where we can insert. + // + // XXX: returning an internal error here is too limiting. It seems + // that if control-flow constructs are (mis)-used, then the MIR + // is already more simplified than we would expect. This approach + // siliently ignores these insertions, but we leave a log warning + // to help debugging if something bad happens. + // + // This was changed from an Error with the introduction + // of the weird expr test cases. Making this change has not + // knowingly made previously failing test cases pass, nor has it + // affected the steps produced by the test suite. + FindResult::None => { + log::warn!( + "no suitable collection for location {location:?} {:#?}", + self.processing + ); + + Ok(()) + } + + // RARE case: spawn a new child branch and retry the insert. + // These automatic branches are used to handle match expressions + // that compile to a series of `switchInt`s. + FindResult::NonLinear(parent, branch_loc) => { + self.open_child_branch(parent, branch_loc)?; + self.insert(location, path_hint, span) + } + + // COMMON case: we can insert a linear segment into the found builder. + FindResult::Linear(builder_idx) => { + let scope = self.current_scope(); + let builder = self.processing.get_mut(builder_idx); + let collection = &mut self.collections[builder.collection]; + + let mut insert_to = |to| { + let segment = MirSegment::new(builder.current_location, to); + let segment_data = SegmentData { + segment, + span, + scope, + }; + log::debug!( + "Inserting {segment:?} into builder {builder:?} {builder_idx:?}" + ); + + let segid = self.segments.push(segment_data); + collection.data.push(CFKind::Linear(segid)); + builder.current_location = to; + }; + + match collection.kind { + // If the step attempts to go past its previously computed bound + // we will cut it short. I(gavinleroy) haven't yet seen this happen, + // but in theory it's possible and is bad because it bypasses the + // branching mechanisms. + LengthKind::Bounded { phi, .. } + if self.mapper.ldominates(phi, location) => + { + log::error!( + "Linear insert is stepping past the join point {location:?} {phi:?}" + ); + + insert_to(phi) + } + + _ => insert_to(location), + } + + Ok(()) + } + } + } +} + +#[cfg(test)] +pub(crate) mod test_exts { + use rustc_data_structures::{ + captures::Captures, graph::iterate::post_order_from_to, + }; + use rustc_middle::mir::BasicBlockData; + + use super::*; + + pub trait SegmentedMirTestExt { + fn validate(&self, mapper: &IRMapper) -> Result<(), InvalidReason>; + } + + #[derive(Debug)] + pub enum InvalidReason { + MissingLocations { + missing: Vec, + }, + // DuplicateLocation { + // at: Location, + // }, + InvalidSegment { + segment: MirSegment, + kind: BadSegmentKind, + }, + } + + #[derive(Debug)] + #[allow(clippy::enum_variant_names)] + pub enum BadSegmentKind { + SplitNoDom, + JoinNoPostDom, + LinearNoDom, + LinearNoPostDom, + } + + fn explode_block<'a, 'tcx: 'a>( + bb: BasicBlock, + block: &'a BasicBlockData<'tcx>, + from: Option, + to: Option, + ) -> impl Iterator + Captures<'tcx> + 'a { + // End is an inclusive index. + let start = from.unwrap_or(0); + let end = to.unwrap_or(block.statements.len()); + (start ..= end).map(move |i| Location { + block: bb, + statement_index: i, + }) + } + + impl MirSegment { + fn explode<'a, 'tcx: 'a>( + self, + mapper: &'a IRMapper<'a, 'tcx>, + ) -> impl Iterator + Captures<'tcx> + 'a { + let sb = self.from.block; + let eb = self.to.block; + let graph = &mapper.cleaned_graph; + let mut block_path = post_order_from_to(graph, sb, Some(eb)); + // The target block is never added in the post-order. + block_path.push(eb); + + block_path.into_iter().flat_map(move |bb| { + let body = &mapper.cleaned_graph.body(); + let from = (bb == sb).then_some(self.from.statement_index); + let to = (bb == eb).then_some(self.to.statement_index); + explode_block(bb, &body.basic_blocks[bb], from, to) + }) + } + } + + impl SegmentedMir { + fn is_valid_collection( + &self, + cid: CollectionId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let collection = self.get_collection(cid); + for kind in collection.data.iter() { + match kind { + CFKind::Linear(sid) => self.is_valid_segment(*sid, ssf, mapper)?, + CFKind::Branch(bid) => self.is_valid_branch(*bid, ssf, mapper)?, + } + } + + Ok(()) + } + + fn is_valid_split_segment( + &self, + sid: SegmentId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let SegmentData { segment: s, .. } = self.get_segment(sid); + + if !mapper.ldominates(s.from, s.to) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::SplitNoDom, + }); + } + + for at in s.explode(mapper) { + ssf.insert(at); + } + + Ok(()) + } + + fn is_valid_join_segment( + &self, + sid: SegmentId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let SegmentData { segment: s, .. } = self.get_segment(sid); + + if !mapper.lpost_dominates(s.to, s.from) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::JoinNoPostDom, + }); + } + + for at in s.explode(mapper) { + ssf.insert(at); + } + + Ok(()) + } + + fn is_valid_segment( + &self, + sid: SegmentId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let SegmentData { segment: s, .. } = self.get_segment(sid); + if !mapper.ldominates(s.from, s.to) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::LinearNoDom, + }); + } + + if !mapper.lpost_dominates(s.to, s.from) { + return Err(InvalidReason::InvalidSegment { + segment: *s, + kind: BadSegmentKind::LinearNoPostDom, + }); + } + + for at in s.explode(mapper) { + ssf.insert(at); + } + + Ok(()) + } + + fn is_valid_branch( + &self, + bid: BranchId, + ssf: &mut HashSet, + mapper: &IRMapper, + ) -> Result<(), InvalidReason> { + let branch = self.get_branch(bid); + + for &sid in branch.splits.iter() { + self.is_valid_split_segment(sid, ssf, mapper)?; + } + + for &sid in branch.joins.iter() { + self.is_valid_join_segment(sid, ssf, mapper)?; + } + + for &cid in branch.nested.iter() { + self.is_valid_collection(cid, ssf, mapper)?; + } + + Ok(()) + } + } + + impl SegmentedMirTestExt for SegmentedMir { + /// See the module documentation for a sense of what valid means. Here + /// the below three basic things are checked. In the future, these guarantees + /// will hopefully only ever get stronger, and never weaker. + /// + /// 1. All segments are valid regarding where they appear in the collection. + /// 2. The segments form a total cover of the body. + /// 3. At each branch location (`switchInt`) there must exist a split segment + /// for each possible branch target. + fn validate(&self, mapper: &IRMapper) -> Result<(), InvalidReason> { + let body = &mapper.cleaned_graph.body(); + let seen_so_far = &mut HashSet::default(); + + let all_locations = mapper + .cleaned_graph + .blocks() + .flat_map(|block| { + explode_block(block, &body.basic_blocks[block], None, None) + }) + .collect::>(); + + self.is_valid_collection(self.first_collection, seen_so_far, mapper)?; + let missing = all_locations + .difference(&*seen_so_far) + .copied() + .collect::>(); + if missing.is_empty() { + Ok(()) + } else { + Err(InvalidReason::MissingLocations { missing }) + } + } + } +} diff --git a/crates/aquascope/src/analysis/stepper/table_builder.rs b/crates/aquascope/src/analysis/stepper/table_builder.rs new file mode 100644 index 000000000..5625d18b3 --- /dev/null +++ b/crates/aquascope/src/analysis/stepper/table_builder.rs @@ -0,0 +1,403 @@ +//! Convert permissions steps into tables viewable by the frontend. + +use rustc_data_structures::{ + self, + fx::{FxHashMap as HashMap, FxHashSet as HashSet}, +}; +use rustc_middle::mir::{Local, Location, Place}; +use rustc_span::Span; +use rustc_utils::{test_utils::DUMMY_CHAR_RANGE, PlaceExt, SpanExt}; + +use super::{segmented_mir::*, *}; +use crate::{analysis::permissions::PermissionsCtxt, errors}; + +/// A single unprocessed table, mapping Places to their differences for a MirSegment. +#[derive(Debug)] +pub(super) struct Table<'tcx> { + span: Span, + segment: MirSegment, + data: HashMap, PermissionsDataDiff>, +} + +/// A series of tables, identified by the _ending location_ of the step. +/// +/// Except in branchess, ending locations should only contains a +/// single table. These tables are currently collapsed into a single +/// larger table and shows per-line, though, this restriction could +/// be relaxed in the future. +/// +/// See [`prettify_permission_steps`] for how tables get merged. +pub(super) type Tables<'tcx> = HashMap>>; + +pub(super) struct TableBuilder<'a, 'tcx: 'a> { + pub(super) analysis: &'a AquascopeAnalysis<'a, 'tcx>, + pub(super) ctxt: &'a PermissionsCtxt<'a, 'tcx>, + pub(super) mir: &'a SegmentedMir, + pub(super) locals_at_scope: HashMap>, +} + +#[allow(clippy::similar_names)] +impl<'a, 'tcx: 'a> TableBuilder<'a, 'tcx> { + pub(super) fn finalize_body( + &self, + start_loc: Location, + body_span: Span, + mode: PermIncludeMode, + ) -> Vec { + let first_point = self.ctxt.location_to_point(start_loc); + let first_domain = &self.ctxt.permissions_domain_at_point(first_point); + let empty_domain = &self.ctxt.domain_bottom(); + let body_open_brace = body_span.shrink_to_lo(); + + // Upon entry, the function parameters are already "live". But we want to + // special case this, and show that they "come alive" at the opening brace. + let first_diff = empty_domain.diff(first_domain); + + // Insert a segment into a table filtering defined places. + let mut diffs = Tables::default(); + + // We do an unchecked insert here to avoid + // the segment from getting filtered because the + // segment from and to locations are equal. + let seg = MirSegment::new(start_loc, start_loc); + diffs.entry(seg.to).or_default().push(Table { + segment: seg, + span: body_open_brace, + data: first_diff, + }); + self.insert_collection(&mut diffs, self.mir.first_collection); + + prettify_permission_steps(self.analysis, diffs, mode) + } + + fn locals_to_filter(&self, scope: ScopeId) -> HashSet { + self + .mir + .parent_scopes(scope) + .filter_map(|sid| self.locals_at_scope.get(&sid)) + .flatten() + .copied() + .collect::>() + } + + fn insert_collection(&self, result: &mut Tables<'tcx>, cid: CollectionId) { + let collection = self.mir.get_collection(cid); + + for &part in collection.data.iter() { + match part { + CFKind::Linear(seg_id) => self.insert_segment(result, seg_id), + CFKind::Branch(branch_id) => self.insert_branch(result, branch_id), + } + } + } + + fn insert_segment(&self, result: &mut Tables<'tcx>, sid: SegmentId) { + let ctxt = &self.ctxt; + let &SegmentData { + segment, + span, + scope, + } = self.mir.get_segment(sid); + + let to_filter = self.locals_to_filter(scope); + + if segment.from == segment.to { + return; + } + + let p0 = ctxt.location_to_point(segment.from); + let p1 = ctxt.location_to_point(segment.to); + let before = &ctxt.permissions_domain_at_point(p0); + let after = &ctxt.permissions_domain_at_point(p1); + let mut diff = before.diff(after); + + let removed = diff + .drain_filter(|place, _| to_filter.contains(&place.local)) + .collect::>(); + + if !removed.is_empty() { + log::debug!( + "removed domain places due to attached filter at {:?} {:?}", + segment.to, + removed + ); + } + + let table = Table { + segment, + span, + data: diff, + }; + + log::info!("saving segment diff {segment:?}"); + result.entry(segment.to).or_default().push(table); + } + + // NOTE: when inserting a branch we currently ignore join steps. Within the + // function the previous code is left commented out. It was left in case + // we need to quickly bring it back, but through testing I found + // it was a lot of complex logic that removed all join steps, every time. + // Therefore, to save time, we just ignore them! We did this filtering + // to remove any weird permissions changes that were branch sensitive in + // order to avoid showing the same change in permissions multiple times. + // Should we decide to change this then this code will become relevant again. + fn insert_branch(&self, result: &mut Tables<'tcx>, bid: BranchId) { + let BranchData { + reach, + splits, + // joins, + nested, + .. + } = self.mir.get_branch(bid); + + let mut entire_diff = reach.into_diff(self.ctxt); + + log::debug!( + "Inserting Branched Collection {:?}:\n\tsplits: {:?}\n\tmiddle: {:?}", + reach, + splits, + nested + ); + + let mut temp_middle = Tables::default(); + // let mut temp_joins = Tables::default(); + + for &sid in splits.iter() { + self.insert_segment(&mut temp_middle, sid); + } + + for &cid in nested.iter() { + self.insert_collection(&mut temp_middle, cid); + } + + // for &sid in joins.iter() { + // self.insert_segment(&mut temp_joins, sid); + // } + + // Find the locals which were filtered from all scopes. In theory, + // `all_scopes` should contains the same scope, copied over, + // but the SegmentedMir doesn't enforce this and there's no + // scope attached to collections. + let scope_here = self.mir.get_branch_scope(bid); + let all_attached = self + .locals_at_scope + .get(&scope_here) + .map(|v| v.iter()) + .unwrap_or_default() + .collect::>(); + + let attached_here = entire_diff + .drain_filter(|place: &Place, _| all_attached.contains(&place.local)) + .collect::>(); + + // let diffs_in_tables = |tbls: &Tables| { + // tbls + // .iter() + // .flat_map(|(_, v)| v.iter().flat_map(|tbl| tbl.data.values())) + // .copied() + // .collect::>() + // }; + + // Flatten all tables to the unique `PermissionsDataDiff`s + // that exist within them. + + // let diffs_in_branches = diffs_in_tables(&mut temp_middle); + // for (_, v) in temp_joins.iter_mut() { + // for tbl in v.iter_mut() { + // let drained = tbl + // .data + // .drain_filter(|_, diff| diffs_in_branches.contains(diff)) + // .map(|(p, _)| p) + // .collect::>(); + // log::debug!("diffs at join loc removed for redundancy {drained:#?}"); + // } + // } + + result.extend(temp_middle); + // result.extend(temp_joins); + + // Attach filtered locals + result.entry(reach.to).or_default().push(Table { + span: reach.span(self.ctxt), + segment: *reach, + data: attached_here, + }); + } +} + +// Prettify, means: +// - Remove all places that are not source visible +// - Remove all tables which are empty +// - Convert Spans to Ranges +#[allow(clippy::if_not_else)] +pub(super) fn prettify_permission_steps<'tcx>( + analysis: &AquascopeAnalysis<'_, 'tcx>, + perm_steps: Tables<'tcx>, + mode: PermIncludeMode, +) -> Vec { + let ctxt = &analysis.permissions; + let tcx = ctxt.tcx; + let body = &ctxt.body_with_facts.body; + + let should_keep = |p: &PermissionsDataDiff| -> bool { + !(matches!(p.is_live, ValueStep::None { value: Some(false) }) + || (mode == PermIncludeMode::Changes && p.is_empty())) + }; + + macro_rules! place_to_string { + ($p:expr) => { + $p.to_string(tcx, body) + .unwrap_or_else(|| String::from("")) + }; + } + + let first_error_span_opt = + errors::get_span_of_first_error(ctxt.def_id.expect_local()) + .and_then(|s| s.as_local(ctxt.body_with_facts.body.span)); + let source_map = tcx.sess.source_map(); + + let mut semi_filtered = HashMap::< + usize, + Vec<(MirSegment, Span, Vec<(Place<'tcx>, PermissionsDataDiff)>)>, + >::default(); + + // Goal: filter out differences for Places that + // aren't source-visible. As well as those that come + // after the first error span. + // Group these intermediate tables by line numbers to make + // collapsing them easier. + for (_, v) in perm_steps.into_iter() { + for Table { + segment, + span, + data, + } in v.into_iter() + { + // Attach the span to the end of the line. Later, all permission + // steps appearing on the same line will be combined. + let span = source_map.span_extend_to_line(span).shrink_to_hi(); + let entries = data + .into_iter() + .filter(|(place, diff)| { + place.is_source_visible(tcx, body) && should_keep(diff) + }) + .collect::>(); + + // This could be a little more graceful. The idea is that + // we want to remove all permission steps which occur after + // the first error, but the steps involved with the first + // error could still be helpful. This is why we filter all + // spans with a LO BytePos greater than the error + // span HI BytePos. + if !(entries.is_empty() + || first_error_span_opt + .is_some_and(|err_span| err_span.hi() < span.lo())) + { + // We'll store things by line number + let line_num = source_map.lookup_line(span.hi()).unwrap().line; + semi_filtered + .entry(line_num) + .or_default() + .push((segment, span, entries)); + } else { + log::debug!( + "segment diff at {segment:?} was empty or follows an error" + ); + } + } + } + + // NOTE: we're at odds with the multi-table setup. This quick + // hack combines table entries into a single table until the + // visual explanation gets up-to-speed. + // Another weird thing about this is that you can have a single + // table with two changes for one place. + // ```example + // # fn main() { + // let closure = |s: &str| s.len(); // s: +R+O + // // s: -R-O + // // closure: +R+O + // # } + // ``` + // imagine that the comments to the right of the Let represent + // a pseudo combined table. The path `s` gains and loses the same + // set of permissions in the same table. This is kind of weird, we'd + // rather just show *no change*. + + semi_filtered + .into_iter() + .filter_map(|(line, entries)| { + + // Conforming to the above HACK this just takes any (from, to) pair. + let dummy_char_range = DUMMY_CHAR_RANGE.with(|range| *range); + let (from, to, range) = entries.first().map_or_else( + || (dummy_char_range, dummy_char_range, dummy_char_range), + |(MirSegment { from, to }, span, _)| { + let range = analysis.span_to_range(*span); + let from = analysis.span_to_range(ctxt.location_to_span(*from)); + let to = analysis.span_to_range(ctxt.location_to_span(*to)); + (from, to, range) + }, + ); + + let mut combined_table = + HashMap::, PermissionsDataDiff>::default(); + + // For all tables which fall on the same line, we combine them into a single table + // and remove all *SYMMETRIC* differences. That is, if you have permission changes such as: + // - path: +R+O + // - path: -R-O + // these are exactly symmetric, and will be removed. + log::debug!("Finishing the combined table for line {line}"); + for (segment, _, diffs) in entries.into_iter() { + for (place, diff) in diffs.into_iter() { + match combined_table.entry(place) { + Entry::Vacant(o) => { + log::debug!("- Place: {place:?} Segment {segment:?}\n\t\t{diff:?}"); + o.insert(diff); + } + Entry::Occupied(o) => { + let old_diff = o.get(); + if diff.is_symmetric_diff(old_diff) { + log::debug!( + "X Place {place:?} had a symmetric difference." + ); + o.remove(); + // master_table.remove(idx); + continue; + } else { + log::warn!("Clashing places on a step table were not symmetric: {place:?}"); + } + } + }; + } + } + + // This means the tables were symmetric and all were removed. + if combined_table.is_empty() { + return None; + } + + let mut master_table_vec = combined_table + .into_iter() + .collect::>(); + + master_table_vec + .sort_by_key(|(place, _)| (place.local.as_usize(), place.projection)); + + let master_table = PermissionsStepTable { + from, + to, + state: master_table_vec + .into_iter() + .map(|(place, diff)| (place_to_string!(place), diff)) + .collect::>(), + }; + + Some(PermissionsLineDisplay { + location: range, + state: vec![master_table], + }) + }) + .collect::>() +} diff --git a/crates/aquascope/src/lib.rs b/crates/aquascope/src/lib.rs index d5e515bba..a9bedcff8 100644 --- a/crates/aquascope/src/lib.rs +++ b/crates/aquascope/src/lib.rs @@ -49,6 +49,9 @@ clippy::option_option, clippy::similar_names )] +// Only used for testing purposes, can we dissallow +// uncommon codepoints when not testing? +#![allow(uncommon_codepoints)] extern crate datafrog; extern crate either; @@ -61,7 +64,6 @@ extern crate rustc_data_structures; extern crate rustc_driver; extern crate rustc_error_messages; extern crate rustc_errors; -extern crate rustc_graphviz; extern crate rustc_hir; extern crate rustc_hir_pretty; extern crate rustc_index; diff --git a/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap b/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap index 7fc90a7bc..483d67b89 100644 --- a/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap +++ b/crates/aquascope/tests/snapshots/stepper__add_big_strings@closure_0.test.snap @@ -326,7 +326,7 @@ description: add_big_strings@closure_0.test type: None value: false path_moved: - type: None + type: Low path_uninitialized: type: None value: false @@ -386,7 +386,7 @@ description: add_big_strings@closure_0.test type: None value: false path_moved: - type: None + type: Low path_uninitialized: type: Low loan_read_refined: diff --git a/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap b/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap index 344048534..2c0066b47 100644 --- a/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap +++ b/crates/aquascope/tests/snapshots/stepper__reverse@vec_0.test.snap @@ -109,7 +109,7 @@ description: reverse@vec_0.test type: None value: false path_moved: - type: None + type: Low path_uninitialized: type: Low loan_read_refined: