From 9725139b9d5639ac34edbd34efc68a441eccbb27 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 10 May 2024 16:27:22 -0700 Subject: [PATCH 01/95] Untangling the child constructor from recursive loading --- .../src/async_support.rs | 4 +- .../src/construct.rs | 84 +++++++++++-------- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index e7fa19d89e..a3b0469ba1 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -12,7 +12,7 @@ use rustc_middle::{ ty::{GenericArgsRef, TyCtxt}, }; -use crate::construct::{CallKind, PartialGraph}; +use crate::construct::{CallKind, PartialGraph, SubgraphDescriptor}; use super::construct::GraphConstructor; use super::utils::{self, FnResolution}; @@ -167,7 +167,7 @@ pub enum AsyncDeterminationResult { } impl<'tcx> GraphConstructor<'tcx> { - pub(crate) fn try_handle_as_async(&self) -> Option> { + pub(crate) fn try_handle_as_async(&self) -> Option> { let (generator_fn, location) = determine_async(self.tcx, self.def_id, &self.body)?; let calling_context = self.calling_context_for(generator_fn.def_id(), location); diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index f06e82df38..5c327bea60 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -22,7 +22,7 @@ use rustc_middle::{ }; use rustc_mir_dataflow::{self as df}; use rustc_span::ErrorGuaranteed; -use rustc_utils::cache::Cache; +use rustc_utils::{cache::Cache, source_map::find_bodies::find_bodies}; use rustc_utils::{ mir::{borrowck_facts, control_dependencies::ControlDependencies}, BodyExt, PlaceExt, @@ -192,9 +192,12 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> }; let constructor = results.analysis.0; - let (child_constructor, calling_convention) = + let (child_descriptor, calling_convention) = match constructor.determine_call_handling(location, func, args)? { - CallHandling::Ready(one, two) => (one, two), + CallHandling::Ready { + calling_convention, + descriptor, + } => (descriptor, calling_convention), CallHandling::ApproxAsyncFn => { // Register a synthetic assignment of `future = (arg0, arg1, ...)`. let rvalue = Rvalue::Aggregate( @@ -220,12 +223,11 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> } }; - let child_graph = child_constructor.construct_partial_cached(); - - let parentable_srcs = - child_graph.parentable_srcs(child_constructor.def_id, &child_constructor.body); - let parentable_dsts = - child_graph.parentable_dsts(child_constructor.def_id, &child_constructor.body); + let SubgraphDescriptor { + graph: child_graph, + parentable_srcs, + parentable_dsts, + } = &*child_descriptor; // For each source node CHILD that is parentable to PLACE, // add an edge from PLACE -> CHILD. @@ -245,7 +247,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> Inputs::Unresolved { places: vec![(parent_place, None)], }, - Either::Right(child_src), + Either::Right(*child_src), location, TargetUse::Assign, ); @@ -271,7 +273,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> results, state, Inputs::Resolved { - node: child_dst, + node: *child_dst, node_use: SourceUse::Operand, }, Either::Left(parent_place), @@ -335,7 +337,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> if matches!( constructor.determine_call_handling(location, func, args), - Some(CallHandling::Ready(_, _)) + Some(CallHandling::Ready { .. }) ) { return; } @@ -496,7 +498,7 @@ pub(crate) struct CallingContext<'tcx> { pub(crate) call_stack: Vec, } -type PdgCache<'tcx> = Rc>>>; +type PdgCache<'tcx> = Rc>>>; pub struct GraphConstructor<'tcx> { pub(crate) tcx: TyCtxt<'tcx>, @@ -1056,7 +1058,11 @@ impl<'tcx> GraphConstructor<'tcx> { self.async_info.clone(), &self.pdg_cache, ); - Some(CallHandling::Ready(child_constructor, calling_convention)) + let graph = child_constructor.construct_partial_cached(); + Some(CallHandling::Ready { + descriptor: graph, + calling_convention, + }) } /// Attempt to inline a call to a function, returning None if call is not inline-able. @@ -1074,9 +1080,10 @@ impl<'tcx> GraphConstructor<'tcx> { let preamble = self.determine_call_handling(location, func, args)?; let (child_constructor, calling_convention) = match preamble { - CallHandling::Ready(child_constructor, calling_convention) => { - (child_constructor, calling_convention) - } + CallHandling::Ready { + descriptor, + calling_convention, + } => (descriptor, calling_convention), CallHandling::ApproxAsyncFn => { // Register a synthetic assignment of `future = (arg0, arg1, ...)`. let rvalue = Rvalue::Aggregate( @@ -1099,10 +1106,7 @@ impl<'tcx> GraphConstructor<'tcx> { } }; - let child_graph = child_constructor.construct_partial_cached(); - - let parentable_dsts = - child_graph.parentable_dsts(child_constructor.def_id, &child_constructor.body); + let parentable_dsts = &child_constructor.parentable_dsts; let parent_body = &self.body; let translate_to_parent = |child: Place<'tcx>| -> Option> { calling_convention.translate_to_parent( @@ -1126,10 +1130,6 @@ impl<'tcx> GraphConstructor<'tcx> { self.apply_mutation(state, location, parent_place); } } - trace!( - " Inlined {}", - self.fmt_fn(child_constructor.def_id.to_def_id()) - ); Some(()) } @@ -1175,15 +1175,14 @@ impl<'tcx> GraphConstructor<'tcx> { } } - fn construct_partial_cached(&self) -> Rc> { + fn construct_partial_cached(&self) -> Rc> { let key = self.make_call_string(RichLocation::Start); - let pdg = self - .pdg_cache - .get(key, move |_| Rc::new(self.construct_partial())); - Rc::clone(pdg) + self.pdg_cache + .get(key, move |_| Rc::new(self.construct_partial())) + .clone() } - pub(crate) fn construct_partial(&self) -> PartialGraph<'tcx> { + pub(crate) fn construct_partial(&self) -> SubgraphDescriptor<'tcx> { if let Some(g) = self.try_handle_as_async() { return g; } @@ -1225,7 +1224,15 @@ impl<'tcx> GraphConstructor<'tcx> { } } - final_state + SubgraphDescriptor { + parentable_dsts: final_state + .parentable_dsts(self.def_id, &self.body) + .collect(), + parentable_srcs: final_state + .parentable_srcs(self.def_id, &self.body) + .collect(), + graph: final_state, + } } fn domain_to_petgraph(self, domain: &PartialGraph<'tcx>) -> DepGraph<'tcx> { @@ -1252,7 +1259,7 @@ impl<'tcx> GraphConstructor<'tcx> { pub fn construct(self) -> DepGraph<'tcx> { let partial = self.construct_partial_cached(); - self.domain_to_petgraph(&partial) + self.domain_to_petgraph(&partial.graph) } /// Determine the type of call-site. @@ -1309,9 +1316,18 @@ pub enum CallKind<'tcx> { type ApproximationHandler<'tcx> = fn(&GraphConstructor<'tcx>, &mut dyn Visitor<'tcx>, &[Operand<'tcx>], Place<'tcx>, Location); +pub(crate) struct SubgraphDescriptor<'tcx> { + graph: PartialGraph<'tcx>, + parentable_srcs: Vec<(DepNode<'tcx>, Option)>, + parentable_dsts: Vec<(DepNode<'tcx>, Option)>, +} + enum CallHandling<'tcx, 'a> { ApproxAsyncFn, - Ready(GraphConstructor<'tcx>, CallingConvention<'tcx, 'a>), + Ready { + calling_convention: CallingConvention<'tcx, 'a>, + descriptor: Rc>, + }, ApproxAsyncSM(ApproximationHandler<'tcx>), } From a187aae6ddb38f2666e478933860963f0c850200 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 10 May 2024 18:46:21 -0700 Subject: [PATCH 02/95] Independence from calling context --- crates/flowistry_pdg/src/pdg.rs | 4 + .../src/async_support.rs | 38 +- .../src/construct.rs | 460 ++++++++---------- crates/flowistry_pdg_construction/src/lib.rs | 10 +- .../flowistry_pdg_construction/tests/pdg.rs | 37 +- .../paralegal-flow/src/ana/graph_converter.rs | 23 +- crates/paralegal-flow/src/ana/mod.rs | 27 +- 7 files changed, 279 insertions(+), 320 deletions(-) diff --git a/crates/flowistry_pdg/src/pdg.rs b/crates/flowistry_pdg/src/pdg.rs index 76cf784ffe..169f2fa965 100644 --- a/crates/flowistry_pdg/src/pdg.rs +++ b/crates/flowistry_pdg/src/pdg.rs @@ -153,6 +153,10 @@ impl CallString { CallString::new(string) } + pub fn push_front(self, loc: GlobalLocation) -> Self { + CallString::new([loc].into_iter().chain(self.0.iter().copied()).collect()) + } + pub fn is_at_root(self) -> bool { self.0.len() == 1 } diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index a3b0469ba1..588eedf5bd 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -166,29 +166,19 @@ pub enum AsyncDeterminationResult { NotAsync, } -impl<'tcx> GraphConstructor<'tcx> { - pub(crate) fn try_handle_as_async(&self) -> Option> { - let (generator_fn, location) = determine_async(self.tcx, self.def_id, &self.body)?; +impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { + pub(crate) fn try_handle_as_async(&self) -> Option>> { + let (generator_fn, location) = determine_async(self.tcx(), self.def_id, &self.body)?; - let calling_context = self.calling_context_for(generator_fn.def_id(), location); - let params = self.pdg_params_for_call(generator_fn); - Some( - GraphConstructor::new( - params, - Some(calling_context), - self.async_info.clone(), - &self.pdg_cache, - ) - .construct_partial(), - ) + self.memo.construct_for(generator_fn) } - pub(crate) fn try_poll_call_kind<'a>( - &'a self, + pub(crate) fn try_poll_call_kind<'b>( + &'b self, def_id: DefId, - original_args: &'a [Operand<'tcx>], + original_args: &'b [Operand<'tcx>], ) -> AsyncDeterminationResult> { - let lang_items = self.tcx.lang_items(); + let lang_items = self.tcx().lang_items(); if lang_items.future_poll_fn() == Some(def_id) { match self.find_async_args(original_args) { Ok((fun, loc, args)) => { @@ -202,9 +192,9 @@ impl<'tcx> GraphConstructor<'tcx> { } /// Given the arguments to a `Future::poll` call, walk back through the /// body to find the original future being polled, and get the arguments to the future. - fn find_async_args<'a>( - &'a self, - args: &'a [Operand<'tcx>], + fn find_async_args<'b>( + &'b self, + args: &'b [Operand<'tcx>], ) -> Result<(FnResolution<'tcx>, Location, Place<'tcx>), String> { macro_rules! let_assert { ($p:pat = $e:expr, $($arg:tt)*) => { @@ -241,7 +231,7 @@ impl<'tcx> GraphConstructor<'tcx> { debug_assert!(new_pin_args.len() == 1); let future_aliases = self - .aliases(self.tcx.mk_place_deref(new_pin_args[0].place().unwrap())) + .aliases(self.tcx().mk_place_deref(new_pin_args[0].place().unwrap())) .collect_vec(); debug_assert!(future_aliases.len() == 1); let future = *future_aliases.first().unwrap(); @@ -308,9 +298,9 @@ impl<'tcx> GraphConstructor<'tcx> { let (op, generics, calling_convention, async_fn_call_loc) = chase_target.unwrap(); let resolution = utils::try_resolve_function( - self.tcx, + self.tcx(), op, - self.tcx.param_env_reveal_all_normalized(self.def_id), + self.tcx().param_env_reveal_all_normalized(self.def_id), generics, ); diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 5c327bea60..558fec446e 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -43,77 +43,53 @@ use crate::{ try_resolve_function, CallChangeCallback, CallChanges, CallInfo, }; -/// Top-level parameters to PDG construction. -#[derive(Clone)] -pub struct PdgParams<'tcx> { - tcx: TyCtxt<'tcx>, - root: FnResolution<'tcx>, - call_change_callback: Option + 'tcx>>, - dump_mir: bool, +pub struct MemoPdgConstructor<'tcx> { + pub(crate) tcx: TyCtxt<'tcx>, + pub(crate) call_change_callback: Option + 'tcx>>, + pub(crate) dump_mir: bool, + pub(crate) async_info: Rc, + pub(crate) pdg_cache: PdgCache<'tcx>, } -impl<'tcx> PdgParams<'tcx> { - /// Must provide the [`TyCtxt`] and the [`LocalDefId`] of the function that is the root of the PDG. - pub fn new(tcx: TyCtxt<'tcx>, root: LocalDefId) -> Result { - let root = try_resolve_function( +impl<'tcx> MemoPdgConstructor<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> Self { + Self { tcx, - root.to_def_id(), - tcx.param_env_reveal_all_normalized(root), - manufacture_substs_for(tcx, root)?, - ); - Ok(PdgParams { - tcx, - root, call_change_callback: None, dump_mir: false, - }) + async_info: AsyncInfo::make(tcx).expect("Async functions are not defined"), + pdg_cache: Default::default(), + } } - pub fn with_dump_mir(mut self, dump_mir: bool) -> Self { + pub fn with_dump_mir(&mut self, dump_mir: bool) -> &mut Self { self.dump_mir = dump_mir; self } - /// Provide a callback for changing the behavior of how the PDG generator manages function calls. - /// - /// Currently, this callback can either indicate that a function call should be skipped (i.e., not recursed into), - /// or indicate that a set of fake effects should occur at the function call. See [`CallChanges`] for details. - /// - /// For example, in this code: - /// - /// ``` - /// fn incr(x: i32) -> i32 { x + 1 } - /// fn main() { - /// let a = 0; - /// let b = incr(a); - /// } - /// ``` - /// - /// When inspecting the call `incr(a)`, the callback will be called with `f({callee: incr, call_string: [main]})`. - /// You could apply a hard limit on call string length like this: - /// - /// ``` - /// # #![feature(rustc_private)] - /// # extern crate rustc_middle; - /// # use flowistry_pdg_construction::{PdgParams, SkipCall, CallChanges, CallChangeCallbackFn}; - /// # use rustc_middle::ty::TyCtxt; - /// # const THRESHOLD: usize = 5; - /// # fn f<'tcx>(tcx: TyCtxt<'tcx>, params: PdgParams<'tcx>) -> PdgParams<'tcx> { - /// params.with_call_change_callback(CallChangeCallbackFn::new(|info| { - /// let skip = if info.call_string.len() > THRESHOLD { - /// SkipCall::Skip - /// } else { - /// SkipCall::NoSkip - /// }; - /// CallChanges::default().with_skip(skip) - /// })) - /// # } - /// ``` - pub fn with_call_change_callback(self, f: impl CallChangeCallback<'tcx> + 'tcx) -> Self { - PdgParams { - call_change_callback: Some(Rc::new(f)), - ..self - } + pub fn with_call_change_callback( + &mut self, + callback: impl CallChangeCallback<'tcx> + 'tcx, + ) -> &mut Self { + self.call_change_callback.replace(Rc::new(callback)); + self + } + + pub(crate) fn construct_for( + &self, + resolution: FnResolution<'tcx>, + ) -> Option>> { + self.pdg_cache + .get_maybe_recursive(resolution, |_| { + GraphConstructor::new(self, resolution).construct_partial() + }) + .map(Rc::clone) + } + + pub fn construct_graph(&self, function: LocalDefId) -> DepGraph<'tcx> { + self.construct_for(FnResolution::Partial(function.to_def_id())) + .unwrap() + .to_petgraph() } } @@ -191,6 +167,15 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> return None; }; let constructor = results.analysis.0; + let gloc = GlobalLocation { + location: location.into(), + function: results.analysis.0.def_id, + }; + + let extend_node = |dep: DepNode<'tcx>| DepNode { + at: dep.at.push_front(gloc), + ..dep + }; let (child_descriptor, calling_convention) = match constructor.determine_call_handling(location, func, args)? { @@ -235,8 +220,8 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> for (child_src, _kind) in parentable_srcs { if let Some(parent_place) = calling_convention.translate_to_parent( child_src.place, - &constructor.async_info, - constructor.tcx, + &constructor.async_info(), + constructor.tcx(), &constructor.body, constructor.def_id.to_def_id(), *destination, @@ -247,7 +232,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> Inputs::Unresolved { places: vec![(parent_place, None)], }, - Either::Right(*child_src), + Either::Right(extend_node(*child_src)), location, TargetUse::Assign, ); @@ -263,8 +248,8 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> for (child_dst, kind) in parentable_dsts { if let Some(parent_place) = calling_convention.translate_to_parent( child_dst.place, - &constructor.async_info, - constructor.tcx, + &constructor.async_info(), + constructor.tcx(), &constructor.body, constructor.def_id.to_def_id(), *destination, @@ -273,7 +258,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> results, state, Inputs::Resolved { - node: *child_dst, + node: extend_node(*child_dst), node_use: SourceUse::Operand, }, Either::Left(parent_place), @@ -282,8 +267,15 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> ); } } - self.nodes.extend(&child_graph.nodes); - self.edges.extend(&child_graph.edges); + self.nodes + .extend(child_graph.nodes.iter().copied().map(extend_node)); + self.edges.extend( + child_graph + .edges + .iter() + .copied() + .map(|(n1, n2, e)| (extend_node(n1), extend_node(n2), e)), + ); Some(()) }; @@ -363,6 +355,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> arg_vis.visit_terminator(terminator, location); } } + fn as_arg<'tcx>(node: &DepNode<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Option> { if node.at.leaf().function != def_id { return None; @@ -379,7 +372,7 @@ fn as_arg<'tcx>(node: &DepNode<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> impl<'tcx> PartialGraph<'tcx> { fn modular_mutation_visitor<'a>( &'a mut self, - results: &'a Results<'tcx, DfAnalysis<'_, 'tcx>>, + results: &'a Results<'tcx, DfAnalysis<'a, 'tcx>>, state: &'a InstructionState<'tcx>, ) -> ModularMutationVisitor<'a, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'a> { ModularMutationVisitor::new(&results.analysis.0.place_info, move |location, mutation| { @@ -498,21 +491,18 @@ pub(crate) struct CallingContext<'tcx> { pub(crate) call_stack: Vec, } -type PdgCache<'tcx> = Rc>>>; +type PdgCache<'tcx> = Rc, Rc>>>; -pub struct GraphConstructor<'tcx> { - pub(crate) tcx: TyCtxt<'tcx>, - pub(crate) params: PdgParams<'tcx>, +pub struct GraphConstructor<'tcx, 'a> { + pub(crate) memo: &'a MemoPdgConstructor<'tcx>, + root: FnResolution<'tcx>, body_with_facts: &'tcx BodyWithBorrowckFacts<'tcx>, pub(crate) body: Cow<'tcx, Body<'tcx>>, pub(crate) def_id: LocalDefId, place_info: PlaceInfo<'tcx>, control_dependencies: ControlDependencies, pub(crate) body_assignments: utils::BodyAssignments, - pub(crate) calling_context: Option>, start_loc: FxHashSet, - pub(crate) async_info: Rc, - pub(crate) pdg_cache: PdgCache<'tcx>, } fn other_as_arg<'tcx>(place: Place<'tcx>, body: &Body<'tcx>) -> Option { @@ -531,37 +521,23 @@ enum Inputs<'tcx> { }, } -impl<'tcx> GraphConstructor<'tcx> { - /// Creates a [`GraphConstructor`] at the root of the PDG. - pub fn root(params: PdgParams<'tcx>) -> Self { - let tcx = params.tcx; - GraphConstructor::new( - params, - None, - AsyncInfo::make(tcx).expect("async functions are not defined"), - &PdgCache::default(), - ) - } - +impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { /// Creates [`GraphConstructor`] for a function resolved as `fn_resolution` in a given `calling_context`. pub(crate) fn new( - params: PdgParams<'tcx>, - calling_context: Option>, - async_info: Rc, - pdg_cache: &PdgCache<'tcx>, - ) -> Self { - let tcx = params.tcx; - let def_id = params.root.def_id().expect_local(); + memo: &'a MemoPdgConstructor<'tcx>, + root: FnResolution<'tcx>, + ) -> GraphConstructor<'tcx, 'a> { + let tcx = memo.tcx; + let def_id = root.def_id().expect_local(); let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, def_id); - let param_env = match &calling_context { - Some(cx) => cx.param_env, - None => ParamEnv::reveal_all(), - }; - let body = params - .root - .try_monomorphize(tcx, param_env, &body_with_facts.body); - - if params.dump_mir { + let param_env = ParamEnv::reveal_all(); + // let param_env = match &calling_context { + // Some(cx) => cx.param_env, + // None => ParamEnv::reveal_all(), + // }; + let body = root.try_monomorphize(tcx, param_env, &body_with_facts.body); + + if memo.dump_mir { use std::io::Write; let path = tcx.def_path_str(def_id) + ".mir"; let mut f = std::fs::File::create(path.as_str()).unwrap(); @@ -576,21 +552,17 @@ impl<'tcx> GraphConstructor<'tcx> { start_loc.insert(RichLocation::Start); let body_assignments = utils::find_body_assignments(&body); - let pdg_cache = Rc::clone(pdg_cache); GraphConstructor { - tcx, - params, + memo, + root, body_with_facts, body, place_info, control_dependencies, start_loc, def_id, - calling_context, body_assignments, - async_info, - pdg_cache, } } @@ -602,48 +574,17 @@ impl<'tcx> GraphConstructor<'tcx> { } } - pub(crate) fn calling_context_for( - &self, - call_stack_extension: DefId, - location: Location, - ) -> CallingContext<'tcx> { - CallingContext { - call_string: self.make_call_string(location), - param_env: self.tcx.param_env_reveal_all_normalized(self.def_id), - call_stack: match &self.calling_context { - Some(cx) => { - let mut cx = cx.call_stack.clone(); - cx.push(call_stack_extension); - cx - } - None => vec![], - }, - } - } - - pub(crate) fn pdg_params_for_call(&self, root: FnResolution<'tcx>) -> PdgParams<'tcx> { - PdgParams { - root, - ..self.params.clone() - } - } - - /// Creates a [`CallString`] with the current function at the root, - /// with the rest of the string provided by the [`CallingContext`]. - fn make_call_string(&self, location: impl Into) -> CallString { - let global_loc = self.make_global_loc(location); - match &self.calling_context { - Some(cx) => cx.call_string.push(global_loc), - None => CallString::single(global_loc), - } - } - fn make_dep_node( &self, place: Place<'tcx>, location: impl Into, ) -> DepNode<'tcx> { - DepNode::new(place, self.make_call_string(location), self.tcx, &self.body) + DepNode::new( + place, + self.make_call_string(location), + self.tcx(), + &self.body, + ) } /// Returns all pairs of `(src, edge)`` such that the given `location` is control-dependent on `edge` @@ -670,7 +611,7 @@ impl<'tcx> GraphConstructor<'tcx> { continue; }; let at = self.make_call_string(ctrl_loc); - let src = DepNode::new(ctrl_place, at, self.tcx, &self.body); + let src = DepNode::new(ctrl_place, at, self.tcx(), &self.body); let edge = DepEdge::control(at, SourceUse::Operand, TargetUse::Assign); out.push((src, edge)); } @@ -679,8 +620,23 @@ impl<'tcx> GraphConstructor<'tcx> { out } + fn call_change_callback(&self) -> Option<&dyn CallChangeCallback<'tcx>> { + self.memo.call_change_callback.as_ref().map(Rc::as_ref) + } + + fn async_info(&self) -> &AsyncInfo { + &*self.memo.async_info + } + + fn make_call_string(&self, location: impl Into) -> CallString { + CallString::single(GlobalLocation { + function: self.root.def_id().expect_local(), + location: location.into(), + }) + } + /// Returns the aliases of `place`. See [`PlaceInfo::aliases`] for details. - pub(crate) fn aliases(&self, place: Place<'tcx>) -> impl Iterator> + '_ { + pub(crate) fn aliases(&'a self, place: Place<'tcx>) -> impl Iterator> + 'a { // MASSIVE HACK ALERT: // The issue is that monomorphization erases regions, due to how it's implemented in rustc. // However, Flowistry's alias analysis uses regions to figure out aliases. @@ -695,17 +651,21 @@ impl<'tcx> GraphConstructor<'tcx> { // This is a massive hack bc it's inefficient and I'm not certain that it's sound. let place_retyped = utils::retype_place( place, - self.tcx, + self.tcx(), &self.body_with_facts.body, self.def_id.to_def_id(), ); self.place_info.aliases(place_retyped).iter().map(|alias| { let mut projection = alias.projection.to_vec(); projection.extend(&place.projection[place_retyped.projection.len()..]); - Place::make(alias.local, &projection, self.tcx) + Place::make(alias.local, &projection, self.tcx()) }) } + pub(crate) fn tcx(&self) -> TyCtxt<'tcx> { + self.memo.tcx + } + /// Returns all nodes `src` such that `src` is: /// 1. Part of the value of `input` /// 2. The most-recently modified location for `src` @@ -717,7 +677,7 @@ impl<'tcx> GraphConstructor<'tcx> { // Include all sources of indirection (each reference in the chain) as relevant places. let provenance = input .refs_in_projection() - .map(|(place_ref, _)| Place::from_ref(place_ref, self.tcx)); + .map(|(place_ref, _)| Place::from_ref(place_ref, self.tcx())); let inputs = iter::once(input).chain(provenance); inputs @@ -743,8 +703,8 @@ impl<'tcx> GraphConstructor<'tcx> { let mut place = *place; if let Some((PlaceElem::Deref, rest)) = place.projection.split_last() { let mut new_place = place; - new_place.projection = self.tcx.mk_place_elems(rest); - if new_place.ty(self.body.as_ref(), self.tcx).ty.is_box() { + new_place.projection = self.tcx().mk_place_elems(rest); + if new_place.ty(self.body.as_ref(), self.tcx()).ty.is_box() { if new_place.is_indirect() { // TODO might be unsound: We assume that if // there are other indirections in here, @@ -756,7 +716,7 @@ impl<'tcx> GraphConstructor<'tcx> { } } places_conflict( - self.tcx, + self.tcx(), &self.body, place, alias, @@ -781,7 +741,7 @@ impl<'tcx> GraphConstructor<'tcx> { last_mut_locs.iter().map(move |last_mut_loc| { // Return @ as an input node. let at = self.make_call_string(*last_mut_loc); - DepNode::new(conflict, at, self.tcx, &self.body) + DepNode::new(conflict, at, self.tcx(), &self.body) }) }) }) @@ -808,7 +768,12 @@ impl<'tcx> GraphConstructor<'tcx> { // Create a destination node for (DST @ CURRENT_LOC). ( *dst, - DepNode::new(*dst, self.make_call_string(location), self.tcx, &self.body), + DepNode::new( + *dst, + self.make_call_string(location), + self.tcx(), + &self.body, + ), ) }) .collect() @@ -843,10 +808,10 @@ impl<'tcx> GraphConstructor<'tcx> { let ty = match func { Operand::Constant(func) => func.literal.ty(), Operand::Copy(place) | Operand::Move(place) => { - place.ty(&self.body.local_decls, self.tcx).ty + place.ty(&self.body.local_decls, self.tcx()).ty } }; - let ty = utils::ty_resolve(ty, self.tcx); + let ty = utils::ty_resolve(ty, self.tcx()); match ty.kind() { TyKind::FnDef(def_id, generic_args) => Some((*def_id, generic_args)), TyKind::Generator(def_id, generic_args, _) => Some((*def_id, generic_args)), @@ -858,19 +823,22 @@ impl<'tcx> GraphConstructor<'tcx> { } fn fmt_fn(&self, def_id: DefId) -> String { - self.tcx.def_path_str(def_id) + self.tcx().def_path_str(def_id) } /// Special case behavior for calls to functions used in desugaring async functions. /// /// Ensures that functions like `Pin::new_unchecked` are not modularly-approximated. - fn can_approximate_async_functions(&self, def_id: DefId) -> Option> { - let lang_items = self.tcx.lang_items(); + fn can_approximate_async_functions( + &self, + def_id: DefId, + ) -> Option> { + let lang_items = self.tcx().lang_items(); if Some(def_id) == lang_items.new_unchecked_fn() { Some(Self::approximate_new_unchecked) } else if Some(def_id) == lang_items.into_future_fn() // FIXME: better way to get retrieve this stdlib DefId? - || self.tcx.def_path_str(def_id) == "::into_future" + || self.tcx().def_path_str(def_id) == "::into_future" { Some(Self::approximate_into_future) } else { @@ -899,13 +867,14 @@ impl<'tcx> GraphConstructor<'tcx> { destination: Place<'tcx>, location: Location, ) { - let lang_items = self.tcx.lang_items(); + let lang_items = self.tcx().lang_items(); let [op] = args else { unreachable!(); }; let mut operands = IndexVec::new(); operands.push(op.clone()); - let TyKind::Adt(adt_id, generics) = destination.ty(self.body.as_ref(), self.tcx).ty.kind() + let TyKind::Adt(adt_id, generics) = + destination.ty(self.body.as_ref(), self.tcx()).ty.kind() else { unreachable!() }; @@ -917,13 +886,13 @@ impl<'tcx> GraphConstructor<'tcx> { vis.visit_assign(&destination, &rvalue, location); } - fn determine_call_handling<'a>( + fn determine_call_handling<'b>( &self, location: Location, func: &Operand<'tcx>, - args: &'a [Operand<'tcx>], - ) -> Option> { - let tcx = self.tcx; + args: &'b [Operand<'tcx>], + ) -> Option> { + let tcx = self.tcx(); let (called_def_id, generic_args) = self.operand_to_def_id(func)?; trace!("Resolved call to function: {}", self.fmt_fn(called_def_id)); @@ -931,7 +900,7 @@ impl<'tcx> GraphConstructor<'tcx> { // Monomorphize the called function with the known generic_args. let param_env = tcx.param_env_reveal_all_normalized(self.def_id); let resolved_fn = - utils::try_resolve_function(self.tcx, called_def_id, param_env, generic_args); + utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args); let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); @@ -943,14 +912,6 @@ impl<'tcx> GraphConstructor<'tcx> { return None; } - // Don't inline recursive calls. - if let Some(cx) = &self.calling_context { - if cx.call_stack.contains(&resolved_def_id) { - trace!(" Bailing due to recursive call"); - return None; - } - } - if let Some(handler) = self.can_approximate_async_functions(resolved_def_id) { return Some(CallHandling::ApproxAsyncSM(handler)); }; @@ -966,12 +927,12 @@ impl<'tcx> GraphConstructor<'tcx> { let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args) { Ok(cc) => cc, Err(async_err) => { - if let Some(cb) = self.params.call_change_callback.as_ref() { + if let Some(cb) = self.call_change_callback() { cb.on_inline_miss( resolved_fn, location, - self.params.root, - self.calling_context.as_ref().map(|s| s.call_string), + self.root, + None, InlineMissReason::Async(async_err), ) } @@ -991,21 +952,15 @@ impl<'tcx> GraphConstructor<'tcx> { ); // Recursively generate the PDG for the child function. - let params = self.pdg_params_for_call(resolved_fn); - let calling_context = self.calling_context_for(resolved_def_id, location); - let call_string = calling_context.call_string; - let cache_key = call_string.push(GlobalLocation { - function: resolved_fn.def_id().expect_local(), - location: RichLocation::Start, - }); + let cache_key = resolved_fn; - let is_cached = self.pdg_cache.is_in_cache(&cache_key); + let is_cached = self.memo.pdg_cache.is_in_cache(&cache_key); - let call_changes = self.params.call_change_callback.as_ref().map(|callback| { + let call_changes = self.call_change_callback().map(|callback| { let info = CallInfo { callee: resolved_fn, - call_string, + call_string: self.make_call_string(location), is_cached, async_parent: if let CallKind::AsyncPoll(resolution, _loc, _) = call_kind { // Special case for async. We ask for skipping not on the closure, but @@ -1051,16 +1006,9 @@ impl<'tcx> GraphConstructor<'tcx> { trace!(" Bailing because user callback said to bail"); return None; } - - let child_constructor = GraphConstructor::new( - params, - Some(calling_context), - self.async_info.clone(), - &self.pdg_cache, - ); - let graph = child_constructor.construct_partial_cached(); + let descriptor = self.memo.construct_for(cache_key)?; Some(CallHandling::Ready { - descriptor: graph, + descriptor, calling_convention, }) } @@ -1111,8 +1059,8 @@ impl<'tcx> GraphConstructor<'tcx> { let translate_to_parent = |child: Place<'tcx>| -> Option> { calling_convention.translate_to_parent( child, - &self.async_info, - self.tcx, + &self.async_info(), + self.tcx(), parent_body, self.def_id.to_def_id(), destination, @@ -1134,10 +1082,10 @@ impl<'tcx> GraphConstructor<'tcx> { Some(()) } - fn modular_mutation_visitor<'a>( - &'a self, + fn modular_mutation_visitor<'b: 'a>( + &'b self, state: &'a mut InstructionState<'tcx>, - ) -> ModularMutationVisitor<'a, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'a> { + ) -> ModularMutationVisitor<'b, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'b> { ModularMutationVisitor::new( &self.place_info, move |location, mutation: Mutation<'tcx>| { @@ -1175,20 +1123,13 @@ impl<'tcx> GraphConstructor<'tcx> { } } - fn construct_partial_cached(&self) -> Rc> { - let key = self.make_call_string(RichLocation::Start); - self.pdg_cache - .get(key, move |_| Rc::new(self.construct_partial())) - .clone() - } - - pub(crate) fn construct_partial(&self) -> SubgraphDescriptor<'tcx> { + pub(crate) fn construct_partial(&self) -> Rc> { if let Some(g) = self.try_handle_as_async() { return g; } let mut analysis = DfAnalysis(self) - .into_engine(self.tcx, &self.body) + .into_engine(self.tcx(), &self.body) .iterate_to_fixpoint(); let mut final_state = PartialGraph::default(); @@ -1224,7 +1165,7 @@ impl<'tcx> GraphConstructor<'tcx> { } } - SubgraphDescriptor { + Rc::new(SubgraphDescriptor { parentable_dsts: final_state .parentable_dsts(self.def_id, &self.body) .collect(), @@ -1232,34 +1173,7 @@ impl<'tcx> GraphConstructor<'tcx> { .parentable_srcs(self.def_id, &self.body) .collect(), graph: final_state, - } - } - - fn domain_to_petgraph(self, domain: &PartialGraph<'tcx>) -> DepGraph<'tcx> { - let mut graph: DiGraph, DepEdge> = DiGraph::new(); - let mut nodes = FxHashMap::default(); - macro_rules! add_node { - ($n:expr) => { - *nodes.entry($n).or_insert_with(|| graph.add_node($n)) - }; - } - - for node in &domain.nodes { - let _ = add_node!(*node); - } - - for (src, dst, kind) in &domain.edges { - let src_idx = add_node!(*src); - let dst_idx = add_node!(*dst); - graph.add_edge(src_idx, dst_idx, *kind); - } - - DepGraph::new(graph) - } - - pub fn construct(self) -> DepGraph<'tcx> { - let partial = self.construct_partial_cached(); - self.domain_to_petgraph(&partial.graph) + }) } /// Determine the type of call-site. @@ -1267,11 +1181,11 @@ impl<'tcx> GraphConstructor<'tcx> { /// The error case is if we tried to resolve this as async and failed. We /// know it *is* async but we couldn't determine the information needed to /// analyze the function, therefore we will have to approximate it. - fn classify_call_kind<'a>( - &'a self, + fn classify_call_kind<'b>( + &'b self, def_id: DefId, resolved_def_id: DefId, - original_args: &'a [Operand<'tcx>], + original_args: &'b [Operand<'tcx>], ) -> Result, String> { match self.try_poll_call_kind(def_id, original_args) { AsyncDeterminationResult::Resolved(r) => Ok(r), @@ -1290,14 +1204,14 @@ impl<'tcx> GraphConstructor<'tcx> { // || Some(my_trait) == lang_items.fn_mut_trait() // || Some(my_trait) == lang_items.fn_once_trait()) // .then_some(CallKind::Indirect) - self.tcx.is_closure(def_id).then_some(CallKind::Indirect) + self.tcx().is_closure(def_id).then_some(CallKind::Indirect) } - fn terminator_visitor<'a>( - &'a self, - state: &'a mut InstructionState<'tcx>, + fn terminator_visitor<'b: 'a>( + &'b self, + state: &'b mut InstructionState<'tcx>, time: Time, - ) -> ModularMutationVisitor<'a, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'a> { + ) -> ModularMutationVisitor<'b, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'b> { let mut vis = self.modular_mutation_visitor(state); vis.set_time(time); vis @@ -1313,13 +1227,43 @@ pub enum CallKind<'tcx> { AsyncPoll(FnResolution<'tcx>, Location, Place<'tcx>), } -type ApproximationHandler<'tcx> = - fn(&GraphConstructor<'tcx>, &mut dyn Visitor<'tcx>, &[Operand<'tcx>], Place<'tcx>, Location); +type ApproximationHandler<'tcx, 'a> = fn( + &GraphConstructor<'tcx, 'a>, + &mut dyn Visitor<'tcx>, + &[Operand<'tcx>], + Place<'tcx>, + Location, +); pub(crate) struct SubgraphDescriptor<'tcx> { - graph: PartialGraph<'tcx>, - parentable_srcs: Vec<(DepNode<'tcx>, Option)>, - parentable_dsts: Vec<(DepNode<'tcx>, Option)>, + pub(crate) graph: PartialGraph<'tcx>, + pub(crate) parentable_srcs: Vec<(DepNode<'tcx>, Option)>, + pub(crate) parentable_dsts: Vec<(DepNode<'tcx>, Option)>, +} + +impl<'tcx> SubgraphDescriptor<'tcx> { + pub(crate) fn to_petgraph(&self) -> DepGraph<'tcx> { + let domain = &self.graph; + let mut graph: DiGraph, DepEdge> = DiGraph::new(); + let mut nodes = FxHashMap::default(); + macro_rules! add_node { + ($n:expr) => { + *nodes.entry($n).or_insert_with(|| graph.add_node($n)) + }; + } + + for node in &domain.nodes { + let _ = add_node!(*node); + } + + for (src, dst, kind) in &domain.edges { + let src_idx = add_node!(*src); + let dst_idx = add_node!(*dst); + graph.add_edge(src_idx, dst_idx, *kind); + } + + DepGraph::new(graph) + } } enum CallHandling<'tcx, 'a> { @@ -1328,10 +1272,10 @@ enum CallHandling<'tcx, 'a> { calling_convention: CallingConvention<'tcx, 'a>, descriptor: Rc>, }, - ApproxAsyncSM(ApproximationHandler<'tcx>), + ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), } -struct DfAnalysis<'a, 'tcx>(&'a GraphConstructor<'tcx>); +struct DfAnalysis<'a, 'tcx>(&'a GraphConstructor<'tcx, 'a>); impl<'tcx> df::AnalysisDomain<'tcx> for DfAnalysis<'_, 'tcx> { type Domain = InstructionState<'tcx>; diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 4efc0f8988..23291dbc0b 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -17,12 +17,12 @@ pub use utils::FnResolution; use self::graph::DepGraph; pub use async_support::{determine_async, is_async_trait_fn, match_async_trait_assign}; -use construct::GraphConstructor; pub mod callback; +pub use crate::construct::MemoPdgConstructor; pub use callback::{ CallChangeCallback, CallChangeCallbackFn, CallChanges, CallInfo, InlineMissReason, SkipCall, }; -pub use construct::PdgParams; +use rustc_middle::ty::TyCtxt; pub use utils::{is_non_default_trait_method, try_resolve_function}; mod async_support; @@ -33,7 +33,7 @@ mod mutation; mod utils; /// Computes a global program dependence graph (PDG) starting from the root function specified by `def_id`. -pub fn compute_pdg(params: PdgParams<'_>) -> DepGraph<'_> { - let constructor = GraphConstructor::root(params); - constructor.construct() +pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: FnResolution<'tcx>) -> DepGraph<'tcx> { + let constructor = MemoPdgConstructor::new(tcx); + constructor.construct_for(params).unwrap().to_petgraph() } diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 5cbc603dc1..8d983d606a 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -9,7 +9,7 @@ use std::collections::HashSet; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepGraph}, - CallChangeCallbackFn, CallChanges, PdgParams, SkipCall, + CallChangeCallbackFn, CallChanges, MemoPdgConstructor, SkipCall, }; use itertools::Itertools; use rustc_hir::def_id::LocalDefId; @@ -32,14 +32,15 @@ fn get_main(tcx: TyCtxt<'_>) -> LocalDefId { fn pdg( input: impl Into, - configure: impl for<'tcx> FnOnce(TyCtxt<'tcx>, PdgParams<'tcx>) -> PdgParams<'tcx> + Send, + configure: impl for<'tcx> FnOnce(TyCtxt<'tcx>, &mut MemoPdgConstructor<'tcx>) + Send, tests: impl for<'tcx> FnOnce(TyCtxt<'tcx>, DepGraph<'tcx>) + Send, ) { let _ = env_logger::try_init(); rustc_utils::test_utils::compile(input, move |tcx| { let def_id = get_main(tcx); - let params = configure(tcx, PdgParams::new(tcx, def_id).unwrap()); - let pdg = flowistry_pdg_construction::compute_pdg(params); + let mut memo = MemoPdgConstructor::new(tcx); + let params = configure(tcx, &mut memo); + let pdg = memo.construct_graph(def_id); tests(tcx, pdg) }) } @@ -166,7 +167,7 @@ macro_rules! pdg_constraint { macro_rules! pdg_test { ($name:ident, { $($i:item)* }, $($cs:tt),*) => { - pdg_test!($name, { $($i)* }, |_, params| params, $($cs),*); + pdg_test!($name, { $($i)* }, |_, params| (), $($cs),*); }; ($name:ident, { $($i:item)* }, $e:expr, $($cs:tt),*) => { #[test] @@ -610,7 +611,7 @@ pdg_test! { |_, params| { params.with_call_change_callback(CallChangeCallbackFn::new( move |_| { CallChanges::default().with_skip(SkipCall::Skip) - })) + })); }, (recipients -/> sender) } @@ -636,17 +637,19 @@ pdg_test! { nested_layer_one(&mut w, z); } }, - |tcx, params| params.with_call_change_callback(CallChangeCallbackFn::new(move |info| { - let name = tcx.opt_item_name(info.callee.def_id()); - let skip = if !matches!(name.as_ref().map(|sym| sym.as_str()), Some("no_inline")) - && info.call_string.len() < 2 - { - SkipCall::NoSkip - } else { - SkipCall::Skip - }; - CallChanges::default().with_skip(skip) - })), + |tcx, params| { + params.with_call_change_callback(CallChangeCallbackFn::new(move |info| { + let name = tcx.opt_item_name(info.callee.def_id()); + let skip = if !matches!(name.as_ref().map(|sym| sym.as_str()), Some("no_inline")) + && info.call_string.len() < 2 + { + SkipCall::NoSkip + } else { + SkipCall::Skip + }; + CallChanges::default().with_skip(skip) + })); + }, (y -> x), (z -> w) } diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index c062dbef50..3bf4980f3d 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -29,7 +29,7 @@ use flowistry_pdg_construction::{ determine_async, graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, is_async_trait_fn, match_async_trait_assign, CallChangeCallback, CallChanges, CallInfo, - InlineMissReason, PdgParams, + InlineMissReason, SkipCall::Skip, }; use petgraph::{ @@ -389,15 +389,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // Make sure we count outselves record_inlining(&stat_wrap, tcx, target, false); let stat_wrap_copy = stat_wrap.clone(); - let judge = generator.inline_judge.clone(); - let params = PdgParams::new(tcx, local_def_id) - .map_err(|_| anyhow!("unable to contruct PDG for {local_def_id:?}"))? - .with_call_change_callback(MyCallback { - judge, - stat_wrap, - tcx, - }) - .with_dump_mir(generator.opts.dbg().dump_mir()); if opts.dbg().dump_mir() { let mut file = std::fs::File::create(format!( @@ -414,7 +405,9 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { )? } let flowistry_time = Instant::now(); - let pdg = flowistry_pdg_construction::compute_pdg(params); + let pdg = generator + .flowistry_constructor + .construct_graph(local_def_id); let (mut stats, _) = Rc::into_inner(stat_wrap_copy).unwrap().into_inner(); stats.construction_time = flowistry_time.elapsed(); @@ -568,10 +561,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { } } -struct MyCallback<'tcx> { - judge: InlineJudge<'tcx>, - stat_wrap: StatStracker, - tcx: TyCtxt<'tcx>, +pub(super) struct MyCallback<'tcx> { + pub(super) judge: InlineJudge<'tcx>, + pub(super) stat_wrap: StatStracker, + pub(super) tcx: TyCtxt<'tcx>, } impl<'tcx> CallChangeCallback<'tcx> for MyCallback<'tcx> { diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 9b87d47829..d1ae7aa5ce 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -14,10 +14,13 @@ use crate::{ DefId, HashMap, HashSet, LogLevelConfig, MarkerCtx, Symbol, }; -use std::time::Instant; +use std::cell::RefCell; +use std::rc::Rc; +use std::time::{Duration, Instant}; use anyhow::Result; use either::Either; +use flowistry_pdg_construction::MemoPdgConstructor; use itertools::Itertools; use petgraph::visit::GraphBase; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; @@ -38,6 +41,7 @@ pub struct SPDGGenerator<'tcx> { pub tcx: TyCtxt<'tcx>, stats: Stats, place_info_cache: PlaceInfoCache<'tcx>, + flowistry_constructor: MemoPdgConstructor<'tcx>, } impl<'tcx> SPDGGenerator<'tcx> { @@ -47,12 +51,33 @@ impl<'tcx> SPDGGenerator<'tcx> { tcx: TyCtxt<'tcx>, stats: Stats, ) -> Self { + let mut flowistry_constructor = MemoPdgConstructor::new(tcx); + let stat_wrap = Rc::new(RefCell::new(( + SPDGStats { + unique_functions: 0, + unique_locs: 0, + analyzed_functions: 0, + analyzed_locs: 0, + inlinings_performed: 0, + construction_time: Duration::ZERO, + conversion_time: Duration::ZERO, + }, + Default::default(), + ))); + flowistry_constructor + .with_call_change_callback(graph_converter::MyCallback { + judge: InlineJudge::new(marker_ctx.clone(), tcx, opts.anactrl()), + stat_wrap, + tcx, + }) + .with_dump_mir(opts.dbg().dump_mir()); Self { inline_judge: InlineJudge::new(marker_ctx, tcx, opts.anactrl()), opts, tcx, stats, place_info_cache: Default::default(), + flowistry_constructor, } } From ff439f617c8c33c70ce26e70fd3b7f3ecc8821d5 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 11 May 2024 12:49:23 -0700 Subject: [PATCH 03/95] Use the manufactured dyns again --- .../src/async_support.rs | 4 +- .../src/construct.rs | 33 ++++++-------- .../flowistry_pdg_construction/src/utils.rs | 1 + .../flowistry_pdg_construction/tests/pdg.rs | 6 +-- .../paralegal-flow/src/ana/graph_converter.rs | 45 ++++--------------- crates/paralegal-spdg/src/lib.rs | 2 - 6 files changed, 29 insertions(+), 62 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 588eedf5bd..b12934d5cd 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -12,7 +12,7 @@ use rustc_middle::{ ty::{GenericArgsRef, TyCtxt}, }; -use crate::construct::{CallKind, PartialGraph, SubgraphDescriptor}; +use crate::construct::{CallKind, SubgraphDescriptor}; use super::construct::GraphConstructor; use super::utils::{self, FnResolution}; @@ -168,7 +168,7 @@ pub enum AsyncDeterminationResult { impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { pub(crate) fn try_handle_as_async(&self) -> Option>> { - let (generator_fn, location) = determine_async(self.tcx(), self.def_id, &self.body)?; + let (generator_fn, _) = determine_async(self.tcx(), self.def_id, &self.body)?; self.memo.construct_for(generator_fn) } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 558fec446e..ba89fde269 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -22,8 +22,8 @@ use rustc_middle::{ }; use rustc_mir_dataflow::{self as df}; use rustc_span::ErrorGuaranteed; -use rustc_utils::{cache::Cache, source_map::find_bodies::find_bodies}; use rustc_utils::{ + cache::Cache, mir::{borrowck_facts, control_dependencies::ControlDependencies}, BodyExt, PlaceExt, }; @@ -35,12 +35,13 @@ use super::utils::{self, FnResolution}; use crate::{ graph::{SourceUse, TargetUse}, mutation::Time, + try_resolve_function, utils::{is_non_default_trait_method, manufacture_substs_for}, InlineMissReason, SkipCall, }; use crate::{ mutation::{ModularMutationVisitor, Mutation}, - try_resolve_function, CallChangeCallback, CallChanges, CallInfo, + CallChangeCallback, CallChanges, CallInfo, }; pub struct MemoPdgConstructor<'tcx> { @@ -86,10 +87,18 @@ impl<'tcx> MemoPdgConstructor<'tcx> { .map(Rc::clone) } - pub fn construct_graph(&self, function: LocalDefId) -> DepGraph<'tcx> { - self.construct_for(FnResolution::Partial(function.to_def_id())) + pub fn construct_graph(&self, function: LocalDefId) -> Result, ErrorGuaranteed> { + let args = manufacture_substs_for(self.tcx, function)?; + let g = self + .construct_for(try_resolve_function( + self.tcx, + function.to_def_id(), + ParamEnv::reveal_all(), + args, + )) .unwrap() - .to_petgraph() + .to_petgraph(); + Ok(g) } } @@ -485,12 +494,6 @@ impl<'tcx> PartialGraph<'tcx> { } } -pub(crate) struct CallingContext<'tcx> { - pub(crate) call_string: CallString, - pub(crate) param_env: ParamEnv<'tcx>, - pub(crate) call_stack: Vec, -} - type PdgCache<'tcx> = Rc, Rc>>>; pub struct GraphConstructor<'tcx, 'a> { @@ -566,14 +569,6 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { } } - /// Creates a [`GlobalLocation`] at the current function. - fn make_global_loc(&self, location: impl Into) -> GlobalLocation { - GlobalLocation { - function: self.def_id, - location: location.into(), - } - } - fn make_dep_node( &self, place: Place<'tcx>, diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 492fb7f2ad..71e9176935 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -257,6 +257,7 @@ pub fn ty_resolve<'tcx>(ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Ty<'tcx> { } } +#[allow(unused)] pub fn manufacture_substs_for( tcx: TyCtxt<'_>, function: LocalDefId, diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 8d983d606a..7ccac2c93f 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -39,8 +39,8 @@ fn pdg( rustc_utils::test_utils::compile(input, move |tcx| { let def_id = get_main(tcx); let mut memo = MemoPdgConstructor::new(tcx); - let params = configure(tcx, &mut memo); - let pdg = memo.construct_graph(def_id); + configure(tcx, &mut memo); + let pdg = memo.construct_graph(def_id).unwrap(); tests(tcx, pdg) }) } @@ -167,7 +167,7 @@ macro_rules! pdg_constraint { macro_rules! pdg_test { ($name:ident, { $($i:item)* }, $($cs:tt),*) => { - pdg_test!($name, { $($i)* }, |_, params| (), $($cs),*); + pdg_test!($name, { $($i)* }, |_, _| (), $($cs),*); }; ($name:ident, { $($i:item)* }, $e:expr, $($cs:tt),*) => { #[test] diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 3bf4980f3d..360ad8157b 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -13,17 +13,12 @@ use flowistry_pdg::SourceUse; use paralegal_spdg::{Node, SPDGStats}; use rustc_utils::cache::Cache; -use std::{ - cell::RefCell, - fmt::Display, - rc::Rc, - time::{Duration, Instant}, -}; +use std::{cell::RefCell, fmt::Display, rc::Rc, time::Instant}; use self::call_string_resolver::CallStringResolver; use super::{default_index, path_for_item, src_loc_for_span, SPDGGenerator}; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, bail, Result}; use either::Either; use flowistry_pdg_construction::{ determine_async, @@ -65,7 +60,6 @@ pub struct GraphConverter<'tcx, 'a, C> { spdg: SPDGImpl, marker_assignments: HashMap>, call_string_resolver: call_string_resolver::CallStringResolver<'tcx>, - stats: SPDGStats, place_info_cache: PlaceInfoCache<'tcx>, } @@ -81,7 +75,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { ) -> Result { let local_def_id = target.def_id.expect_local(); let start = Instant::now(); - let (dep_graph, stats) = Self::create_flowistry_graph(generator, local_def_id)?; + let dep_graph = Self::create_flowistry_graph(generator, local_def_id)?; generator .stats .record_timed(TimedStat::Flowistry, start.elapsed()); @@ -104,7 +98,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { spdg: Default::default(), marker_assignments: Default::default(), call_string_resolver: CallStringResolver::new(generator.tcx, local_def_id), - stats, place_info_cache, }) } @@ -368,27 +361,12 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { fn create_flowistry_graph( generator: &SPDGGenerator<'tcx>, local_def_id: LocalDefId, - ) -> Result<(DepGraph<'tcx>, SPDGStats)> { + ) -> Result> { let tcx = generator.tcx; let opts = generator.opts; - let stat_wrap = Rc::new(RefCell::new(( - SPDGStats { - unique_functions: 0, - unique_locs: 0, - analyzed_functions: 0, - analyzed_locs: 0, - inlinings_performed: 0, - construction_time: Duration::ZERO, - conversion_time: Duration::ZERO, - }, - Default::default(), - ))); // TODO: I don't like that I have to do that here. Clean this up let target = determine_async(tcx, local_def_id, &tcx.body_for_def_id(local_def_id)?.body) .map_or(local_def_id, |res| res.0.def_id().expect_local()); - // Make sure we count outselves - record_inlining(&stat_wrap, tcx, target, false); - let stat_wrap_copy = stat_wrap.clone(); if opts.dbg().dump_mir() { let mut file = std::fs::File::create(format!( @@ -404,14 +382,11 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { &mut file, )? } - let flowistry_time = Instant::now(); - let pdg = generator - .flowistry_constructor - .construct_graph(local_def_id); - let (mut stats, _) = Rc::into_inner(stat_wrap_copy).unwrap().into_inner(); - stats.construction_time = flowistry_time.elapsed(); - - Ok((pdg, stats)) + let Ok(pdg) = generator.flowistry_constructor.construct_graph(target) else { + bail!("Failed to construct the graph"); + }; + + Ok(pdg) } /// Consume the generator and compile the [`SPDG`]. @@ -423,7 +398,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { self.generator .stats .record_timed(TimedStat::Conversion, start.elapsed()); - self.stats.conversion_time = start.elapsed(); SPDG { path: path_for_item(self.local_def_id.to_def_id(), self.tcx()), graph: self.spdg, @@ -441,7 +415,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .into_iter() .map(|(k, v)| (k, Types(v.into()))) .collect(), - statistics: self.stats, } } diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 25954c91db..adb7fc9120 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -826,8 +826,6 @@ pub struct SPDG { /// that this contains multiple types for a single node, because it hold /// top-level types and subtypes that may be marked. pub type_assigns: HashMap, - /// Statistics - pub statistics: SPDGStats, } #[derive(Clone, Serialize, Deserialize, Debug)] From cad95965544a3debf53e3b29986a090c97fa5c19 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 13 May 2024 12:45:45 -0700 Subject: [PATCH 04/95] Fixing basic test cases --- .../src/construct.rs | 297 ++++++++++-------- .../flowistry_pdg_construction/tests/pdg.rs | 6 +- .../paralegal-flow/src/ana/graph_converter.rs | 65 ++-- crates/paralegal-flow/src/utils/mod.rs | 10 + 4 files changed, 215 insertions(+), 163 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index ba89fde269..55f22fc607 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -1,6 +1,5 @@ use std::{borrow::Cow, collections::HashSet, iter, rc::Rc}; -use df::{fmt::DebugWithContext, Analysis, AnalysisDomain, Results, ResultsVisitor}; use either::Either; use flowistry::mir::placeinfo::PlaceInfo; use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; @@ -20,7 +19,9 @@ use rustc_middle::{ }, ty::{GenericArg, List, ParamEnv, TyCtxt, TyKind}, }; -use rustc_mir_dataflow::{self as df}; +use rustc_mir_dataflow::{ + self as df, fmt::DebugWithContext, Analysis, AnalysisDomain, Results, ResultsVisitor, +}; use rustc_span::ErrorGuaranteed; use rustc_utils::{ cache::Cache, @@ -28,20 +29,15 @@ use rustc_utils::{ BodyExt, PlaceExt, }; -use super::async_support::*; -use super::calling_convention::*; -use super::graph::{DepEdge, DepGraph, DepNode}; -use super::utils::{self, FnResolution}; use crate::{ + async_support::*, + calling_convention::*, + graph::{DepEdge, DepGraph, DepNode}, graph::{SourceUse, TargetUse}, - mutation::Time, + mutation::{ModularMutationVisitor, Mutation, Time}, try_resolve_function, - utils::{is_non_default_trait_method, manufacture_substs_for}, - InlineMissReason, SkipCall, -}; -use crate::{ - mutation::{ModularMutationVisitor, Mutation}, - CallChangeCallback, CallChanges, CallInfo, + utils::{self, is_non_default_trait_method, manufacture_substs_for, FnResolution}, + CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall, }; pub struct MemoPdgConstructor<'tcx> { @@ -82,7 +78,9 @@ impl<'tcx> MemoPdgConstructor<'tcx> { ) -> Option>> { self.pdg_cache .get_maybe_recursive(resolution, |_| { - GraphConstructor::new(self, resolution).construct_partial() + let g = GraphConstructor::new(self, resolution).construct_partial(); + g.check_invariants(); + g }) .map(Rc::clone) } @@ -165,129 +163,6 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> terminator: &'mir rustc_middle::mir::Terminator<'tcx>, location: Location, ) { - let mut handle_as_inline = || { - let TerminatorKind::Call { - func, - args, - destination, - .. - } = &terminator.kind - else { - return None; - }; - let constructor = results.analysis.0; - let gloc = GlobalLocation { - location: location.into(), - function: results.analysis.0.def_id, - }; - - let extend_node = |dep: DepNode<'tcx>| DepNode { - at: dep.at.push_front(gloc), - ..dep - }; - - let (child_descriptor, calling_convention) = - match constructor.determine_call_handling(location, func, args)? { - CallHandling::Ready { - calling_convention, - descriptor, - } => (descriptor, calling_convention), - CallHandling::ApproxAsyncFn => { - // Register a synthetic assignment of `future = (arg0, arg1, ...)`. - let rvalue = Rvalue::Aggregate( - Box::new(AggregateKind::Tuple), - IndexVec::from_iter(args.iter().cloned()), - ); - self.modular_mutation_visitor(results, state).visit_assign( - destination, - &rvalue, - location, - ); - return Some(()); - } - CallHandling::ApproxAsyncSM(how) => { - how( - constructor, - &mut self.modular_mutation_visitor(results, state), - args, - *destination, - location, - ); - return Some(()); - } - }; - - let SubgraphDescriptor { - graph: child_graph, - parentable_srcs, - parentable_dsts, - } = &*child_descriptor; - - // For each source node CHILD that is parentable to PLACE, - // add an edge from PLACE -> CHILD. - trace!("PARENT -> CHILD EDGES:"); - for (child_src, _kind) in parentable_srcs { - if let Some(parent_place) = calling_convention.translate_to_parent( - child_src.place, - &constructor.async_info(), - constructor.tcx(), - &constructor.body, - constructor.def_id.to_def_id(), - *destination, - ) { - self.register_mutation( - results, - state, - Inputs::Unresolved { - places: vec![(parent_place, None)], - }, - Either::Right(extend_node(*child_src)), - location, - TargetUse::Assign, - ); - } - } - - // For each destination node CHILD that is parentable to PLACE, - // add an edge from CHILD -> PLACE. - // - // PRECISION TODO: for a given child place, we only want to connect - // the *last* nodes in the child function to the parent, not *all* of them. - trace!("CHILD -> PARENT EDGES:"); - for (child_dst, kind) in parentable_dsts { - if let Some(parent_place) = calling_convention.translate_to_parent( - child_dst.place, - &constructor.async_info(), - constructor.tcx(), - &constructor.body, - constructor.def_id.to_def_id(), - *destination, - ) { - self.register_mutation( - results, - state, - Inputs::Resolved { - node: extend_node(*child_dst), - node_use: SourceUse::Operand, - }, - Either::Left(parent_place), - location, - kind.map_or(TargetUse::Return, TargetUse::MutArg), - ); - } - } - self.nodes - .extend(child_graph.nodes.iter().copied().map(extend_node)); - self.edges.extend( - child_graph - .edges - .iter() - .copied() - .map(|(n1, n2, e)| (extend_node(n1), extend_node(n2), e)), - ); - Some(()) - }; - if let TerminatorKind::SwitchInt { discr, .. } = &terminator.kind { if let Some(place) = discr.place() { self.register_mutation( @@ -304,7 +179,10 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> return; } - if handle_as_inline().is_none() { + if self + .handle_as_inline(results, state, terminator, location) + .is_none() + { trace!("Handling terminator {:?} as not inlined", terminator.kind); let mut arg_vis = ModularMutationVisitor::new( &results.analysis.0.place_info, @@ -421,6 +299,139 @@ impl<'tcx> PartialGraph<'tcx> { .filter(|node| node.0.at.leaf().location.is_end()) } + fn handle_as_inline( + &mut self, + results: &Results<'tcx, DfAnalysis<'_, 'tcx>>, + state: & as AnalysisDomain<'tcx>>::Domain, + terminator: &Terminator<'tcx>, + location: Location, + ) -> Option<()> { + let TerminatorKind::Call { + func, + args, + destination, + .. + } = &terminator.kind + else { + return None; + }; + let constructor = results.analysis.0; + let gloc = GlobalLocation { + location: location.into(), + function: constructor.def_id, + }; + + let extend_node = |dep: DepNode<'tcx>| DepNode { + at: dep.at.push_front(gloc), + ..dep + }; + + let (child_descriptor, calling_convention) = + match constructor.determine_call_handling(location, func, args)? { + CallHandling::Ready { + calling_convention, + descriptor, + } => (descriptor, calling_convention), + CallHandling::ApproxAsyncFn => { + // Register a synthetic assignment of `future = (arg0, arg1, ...)`. + let rvalue = Rvalue::Aggregate( + Box::new(AggregateKind::Tuple), + IndexVec::from_iter(args.iter().cloned()), + ); + self.modular_mutation_visitor(results, state).visit_assign( + destination, + &rvalue, + location, + ); + return Some(()); + } + CallHandling::ApproxAsyncSM(how) => { + how( + constructor, + &mut self.modular_mutation_visitor(results, state), + args, + *destination, + location, + ); + return Some(()); + } + }; + + let SubgraphDescriptor { + graph: child_graph, + parentable_srcs, + parentable_dsts, + } = &*child_descriptor; + + // For each source node CHILD that is parentable to PLACE, + // add an edge from PLACE -> CHILD. + trace!("PARENT -> CHILD EDGES:"); + for (child_src, _kind) in parentable_srcs { + if let Some(parent_place) = calling_convention.translate_to_parent( + child_src.place, + &constructor.async_info(), + constructor.tcx(), + &constructor.body, + constructor.def_id.to_def_id(), + *destination, + ) { + self.register_mutation( + results, + state, + Inputs::Unresolved { + places: vec![(parent_place, None)], + }, + Either::Right(extend_node(*child_src)), + location, + TargetUse::Assign, + ); + } + } + + // For each destination node CHILD that is parentable to PLACE, + // add an edge from CHILD -> PLACE. + // + // PRECISION TODO: for a given child place, we only want to connect + // the *last* nodes in the child function to the parent, not *all* of them. + trace!("CHILD -> PARENT EDGES:"); + for (child_dst, kind) in parentable_dsts { + if let Some(parent_place) = calling_convention.translate_to_parent( + child_dst.place, + &constructor.async_info(), + constructor.tcx(), + &constructor.body, + constructor.def_id.to_def_id(), + *destination, + ) { + self.register_mutation( + results, + state, + Inputs::Resolved { + node: extend_node(*child_dst), + node_use: SourceUse::Operand, + }, + Either::Left(parent_place), + location, + kind.map_or(TargetUse::Return, TargetUse::MutArg), + ); + } + } + self.nodes + .extend(child_graph.nodes.iter().copied().map(extend_node)); + self.edges + .extend(child_graph.edges.iter().copied().map(|(n1, n2, e)| { + ( + extend_node(n1), + extend_node(n2), + DepEdge { + at: e.at.push_front(gloc), + ..e + }, + ) + })); + Some(()) + } + fn register_mutation( &mut self, results: &Results<'tcx, DfAnalysis<'_, 'tcx>>, @@ -1259,6 +1270,16 @@ impl<'tcx> SubgraphDescriptor<'tcx> { DepGraph::new(graph) } + + fn check_invariants(&self) { + let root_function = self.graph.nodes.iter().next().unwrap().at.root().function; + for n in &self.graph.nodes { + assert_eq!(n.at.root().function, root_function); + } + for (src, target, e) in &self.graph.edges { + assert_eq!(e.at.root().function, root_function); + } + } } enum CallHandling<'tcx, 'a> { diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 7ccac2c93f..4a0a142c76 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -650,8 +650,10 @@ pdg_test! { CallChanges::default().with_skip(skip) })); }, - (y -> x), - (z -> w) + (y -> x) + // TODO the way that graphs are constructed currently doesn't allow limiting + // call string depth + // (z -> w) } pdg_test! { diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 360ad8157b..baddcdaf5b 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -209,29 +209,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .graph .edges_directed(old_node, Direction::Incoming) .any(|e| { - if weight.at != e.weight().at { - // Incoming edges are either from our operation or from control flow - let at = e.weight().at; - debug_assert!( - at.leaf().function == leaf_loc.function - && if let RichLocation::Location(loc) = - at.leaf().location - { - matches!( - body.stmt_at(loc), - Either::Right(mir::Terminator { - kind: mir::TerminatorKind::SwitchInt { .. }, - .. - }) - ) - } else { - false - } - ); - false - } else { - e.weight().target_use.is_return() - } + let at = e.weight().at; + #[cfg(debug_assertions)] + assert_edge_location_invariant(self.tcx(), at, body, weight.at); + weight.at == at && e.weight().target_use.is_return() }); if needs_return_markers { @@ -534,6 +515,44 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { } } +fn assert_edge_location_invariant<'tcx>( + tcx: TyCtxt<'tcx>, + at: CallString, + body: &mir::Body<'tcx>, + location: CallString, +) { + // Normal case. The edge is introduced where the operation happens + if location == at { + return; + } + // Control flow case. The edge is introduced at the `switchInt` + if let RichLocation::Location(loc) = at.leaf().location { + if at.leaf().function == location.leaf().function + && matches!( + body.stmt_at(loc), + Either::Right(mir::Terminator { + kind: mir::TerminatorKind::SwitchInt { .. }, + .. + }) + ) + { + return; + } + } + let mut msg = tcx.sess.struct_span_fatal( + at.leaf().span(tcx), + format!( + "This operation is performed in a different location: {}", + at + ), + ); + msg.span_note( + location.leaf().span(tcx), + format!("Expected to originate here: {}", at), + ); + msg.emit() +} + pub(super) struct MyCallback<'tcx> { pub(super) judge: InlineJudge<'tcx>, pub(super) stat_wrap: StatStracker, diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 31491edaea..9ebbf4a8b5 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -1,6 +1,7 @@ //! Utility functions, general purpose structs and extension traits extern crate smallvec; +use flowistry_pdg::{GlobalLocation, RichLocation}; use thiserror::Error; use smallvec::SmallVec; @@ -880,6 +881,15 @@ impl<'tcx> Spanned<'tcx> for (LocalDefId, mir::Location) { } } +impl<'tcx> Spanned<'tcx> for GlobalLocation { + fn span(&self, tcx: TyCtxt<'tcx>) -> RustSpan { + match self.location { + RichLocation::Location(loc) => (self.function, loc).span(tcx), + _ => self.function.to_def_id().span(tcx), + } + } +} + pub fn map_either( either: Either, f: impl FnOnce(A) -> C, From eb466f8ae120e9263926e265c0847fa1556c9557 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 13 May 2024 13:36:51 -0700 Subject: [PATCH 05/95] Fix async call strings --- .../src/async_support.rs | 15 ++- .../src/construct.rs | 95 +++++++++++++++---- .../flowistry_pdg_construction/src/utils.rs | 9 +- .../paralegal-flow/src/ana/graph_converter.rs | 9 +- 4 files changed, 105 insertions(+), 23 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index b12934d5cd..0eda5fbc8d 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -1,6 +1,7 @@ use std::rc::Rc; use either::Either; +use flowistry_pdg::GlobalLocation; use itertools::Itertools; use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::{DefId, LocalDefId}; @@ -12,7 +13,7 @@ use rustc_middle::{ ty::{GenericArgsRef, TyCtxt}, }; -use crate::construct::{CallKind, SubgraphDescriptor}; +use crate::construct::{push_call_string_root, CallKind, SubgraphDescriptor}; use super::construct::GraphConstructor; use super::utils::{self, FnResolution}; @@ -168,9 +169,17 @@ pub enum AsyncDeterminationResult { impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { pub(crate) fn try_handle_as_async(&self) -> Option>> { - let (generator_fn, _) = determine_async(self.tcx(), self.def_id, &self.body)?; + let (generator_fn, location) = determine_async(self.tcx(), self.def_id, &self.body)?; - self.memo.construct_for(generator_fn) + let g = self.memo.construct_for(generator_fn)?; + let new_g = push_call_string_root( + g.as_ref(), + GlobalLocation { + function: self.def_id, + location: flowistry_pdg::RichLocation::Location(location), + }, + ); + Some(Rc::new(new_g)) } pub(crate) fn try_poll_call_kind<'b>( diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 55f22fc607..c3f0c5c71a 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -91,7 +91,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { .construct_for(try_resolve_function( self.tcx, function.to_def_id(), - ParamEnv::reveal_all(), + self.tcx.param_env_reveal_all_normalized(function), args, )) .unwrap() @@ -321,10 +321,7 @@ impl<'tcx> PartialGraph<'tcx> { function: constructor.def_id, }; - let extend_node = |dep: DepNode<'tcx>| DepNode { - at: dep.at.push_front(gloc), - ..dep - }; + let extend_node = |dep: &DepNode<'tcx>| push_call_string_root(dep, gloc); let (child_descriptor, calling_convention) = match constructor.determine_call_handling(location, func, args)? { @@ -381,7 +378,7 @@ impl<'tcx> PartialGraph<'tcx> { Inputs::Unresolved { places: vec![(parent_place, None)], }, - Either::Right(extend_node(*child_src)), + Either::Right(extend_node(child_src)), location, TargetUse::Assign, ); @@ -407,7 +404,7 @@ impl<'tcx> PartialGraph<'tcx> { results, state, Inputs::Resolved { - node: extend_node(*child_dst), + node: extend_node(child_dst), node_use: SourceUse::Operand, }, Either::Left(parent_place), @@ -416,17 +413,13 @@ impl<'tcx> PartialGraph<'tcx> { ); } } - self.nodes - .extend(child_graph.nodes.iter().copied().map(extend_node)); + self.nodes.extend(child_graph.nodes.iter().map(extend_node)); self.edges - .extend(child_graph.edges.iter().copied().map(|(n1, n2, e)| { + .extend(child_graph.edges.iter().map(|(n1, n2, e)| { ( extend_node(n1), extend_node(n2), - DepEdge { - at: e.at.push_front(gloc), - ..e - }, + push_call_string_root(e, gloc), ) })); Some(()) @@ -544,7 +537,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let tcx = memo.tcx; let def_id = root.def_id().expect_local(); let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, def_id); - let param_env = ParamEnv::reveal_all(); + let param_env = tcx.param_env_reveal_all_normalized(def_id); // let param_env = match &calling_context { // Some(cx) => cx.param_env, // None => ParamEnv::reveal_all(), @@ -1241,6 +1234,76 @@ type ApproximationHandler<'tcx, 'a> = fn( Location, ); +pub(crate) trait TransformCallString { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self; +} + +impl TransformCallString for CallString { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + f(*self) + } +} + +impl TransformCallString for DepNode<'_> { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + Self { + at: f(self.at), + ..*self + } + } +} + +impl TransformCallString for DepEdge { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + Self { + at: f(self.at), + ..*self + } + } +} + +impl<'tcx> TransformCallString for PartialGraph<'tcx> { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + let recurse_node = |n: &DepNode<'tcx>| n.transform_call_string(&f); + Self { + nodes: self.nodes.iter().map(recurse_node).collect(), + edges: self + .edges + .iter() + .map(|(from, to, e)| { + ( + recurse_node(from), + recurse_node(to), + e.transform_call_string(&f), + ) + }) + .collect(), + } + } +} + +impl<'tcx> TransformCallString for SubgraphDescriptor<'tcx> { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + let transform_vec = |v: &Vec<(DepNode<'tcx>, Option)>| { + v.iter() + .map(|(n, idx)| (n.transform_call_string(&f), *idx)) + .collect::>() + }; + Self { + graph: self.graph.transform_call_string(&f), + parentable_dsts: transform_vec(&self.parentable_dsts), + parentable_srcs: transform_vec(&self.parentable_srcs), + } + } +} + +pub(crate) fn push_call_string_root( + old: &T, + new_root: GlobalLocation, +) -> T { + old.transform_call_string(|c| c.push_front(new_root)) +} + pub(crate) struct SubgraphDescriptor<'tcx> { pub(crate) graph: PartialGraph<'tcx>, pub(crate) parentable_srcs: Vec<(DepNode<'tcx>, Option)>, @@ -1276,7 +1339,7 @@ impl<'tcx> SubgraphDescriptor<'tcx> { for n in &self.graph.nodes { assert_eq!(n.at.root().function, root_function); } - for (src, target, e) in &self.graph.edges { + for (_, _, e) in &self.graph.edges { assert_eq!(e.at.root().function, root_function); } } diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 71e9176935..c3eb736ada 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -257,7 +257,6 @@ pub fn ty_resolve<'tcx>(ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Ty<'tcx> { } } -#[allow(unused)] pub fn manufacture_substs_for( tcx: TyCtxt<'_>, function: LocalDefId, @@ -267,8 +266,12 @@ pub fn manufacture_substs_for( ExistentialTraitRef, GenericParamDefKind, ImplPolarity, ParamTy, Region, TraitPredicate, }; + trace!("Manufacturing for {function:?}"); + let generics = tcx.generics_of(function); + trace!("Found generics {generics:?}"); let predicates = tcx.predicates_of(function).instantiate_identity(tcx); + trace!("Found predicates {predicates:?}"); let types = (0..generics.count()).map(|gidx| { let param = generics.param_at(gidx, tcx); if let Some(default_val) = param.default_value(tcx) { @@ -332,5 +335,7 @@ pub fn manufacture_substs_for( ); Ok(GenericArg::from(ty)) }); - tcx.mk_args_from_iter(types) + let args = tcx.mk_args_from_iter(types); + trace!("Created args {args:?}"); + args } diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index baddcdaf5b..6104a002fb 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -246,7 +246,9 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let (last, mut rest) = locations.split_last().unwrap(); if self.entrypoint_is_async() { - let (first, tail) = rest.split_first().unwrap(); + let Some((first, tail)) = rest.split_first() else { + panic!("{at:?}.len() < 2"); + }; // The body of a top-level `async` function binds a closure to the // return place `_0`. Here we expect are looking at the statement // that does this binding. @@ -363,7 +365,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { &mut file, )? } - let Ok(pdg) = generator.flowistry_constructor.construct_graph(target) else { + let Ok(pdg) = generator + .flowistry_constructor + .construct_graph(local_def_id) + else { bail!("Failed to construct the graph"); }; From 26ed7c7f48619ee3ff6679cda7898658e46c29ac Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 13 May 2024 20:06:58 -0700 Subject: [PATCH 06/95] Decoder and Encoder instances --- crates/flowistry_pdg/src/lib.rs | 5 ++ crates/flowistry_pdg/src/pdg.rs | 77 ++++++++++++++++++- .../src/construct.rs | 6 +- .../flowistry_pdg_construction/src/graph.rs | 29 ++++++- crates/flowistry_pdg_construction/src/lib.rs | 2 + .../paralegal-flow/src/ana/graph_converter.rs | 4 - 6 files changed, 112 insertions(+), 11 deletions(-) diff --git a/crates/flowistry_pdg/src/lib.rs b/crates/flowistry_pdg/src/lib.rs index 1e77a45d93..9ff0656de5 100644 --- a/crates/flowistry_pdg/src/lib.rs +++ b/crates/flowistry_pdg/src/lib.rs @@ -11,6 +11,11 @@ pub(crate) mod rustc { pub use middle::mir; } +#[cfg(feature = "rustc")] +extern crate rustc_macros; +#[cfg(feature = "rustc")] +extern crate rustc_serialize; + mod pdg; #[cfg(feature = "rustc")] mod rustc_impls; diff --git a/crates/flowistry_pdg/src/pdg.rs b/crates/flowistry_pdg/src/pdg.rs index 169f2fa965..8b8a86f148 100644 --- a/crates/flowistry_pdg/src/pdg.rs +++ b/crates/flowistry_pdg/src/pdg.rs @@ -8,6 +8,10 @@ use serde::{Deserialize, Serialize}; use crate::rustc_portable::*; #[cfg(feature = "rustc")] use crate::rustc_proxies; +#[cfg(feature = "rustc")] +use rustc_macros::{Decodable, Encodable}; +#[cfg(feature = "rustc")] +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; /// Extends a MIR body's `Location` with `Start` (before the first instruction) and `End` (after all returns). #[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Serialize, Deserialize)] @@ -26,6 +30,35 @@ pub enum RichLocation { End, } +#[cfg(feature = "rustc")] +impl Encodable for RichLocation { + fn encode(&self, s: &mut E) { + match self { + Self::Location(loc) => s.emit_enum_variant(0, |s| { + s.emit_u32(loc.block.as_u32()); + s.emit_usize(loc.statement_index); + }), + Self::Start => s.emit_enum_variant(1, |_| ()), + Self::End => s.emit_enum_variant(2, |_| ()), + } + } +} + +#[cfg(feature = "rustc")] +impl Decodable for RichLocation { + fn decode(d: &mut D) -> Self { + match d.read_usize() { + 0 => Self::Location(Location { + block: d.read_u32().into(), + statement_index: d.read_usize().into(), + }), + 1 => Self::Start, + 2 => Self::End, + v => panic!("Unknown variant index: {v}"), + } + } +} + impl RichLocation { /// Returns true if this is a `Start` location. pub fn is_start(self) -> bool { @@ -75,6 +108,7 @@ impl From for RichLocation { /// A [`RichLocation`] within a specific point in a codebase. #[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Serialize, Deserialize)] pub struct GlobalLocation { + // TODO Change to `DefId` /// The function containing the location. #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::LocalDefId"))] pub function: LocalDefId, @@ -83,8 +117,32 @@ pub struct GlobalLocation { pub location: RichLocation, } -#[cfg(not(feature = "rustc"))] +#[cfg(feature = "rustc")] +impl Encodable for GlobalLocation { + fn encode(&self, e: &mut E) { + crate::rustc::middle::ty::tls::with(|tcx| { + tcx.def_path_hash(self.function.to_def_id()).encode(e); + self.location.encode(e); + }) + } +} + +#[cfg(feature = "rustc")] +impl Decodable for GlobalLocation { + fn decode(d: &mut D) -> Self { + use crate::rustc::span::def_id::DefPathHash; + crate::rustc::middle::ty::tls::with(|tcx| Self { + function: tcx + .def_path_hash_to_def_id(DefPathHash::decode(d), &mut || { + panic!("Could map hash to def id") + }) + .expect_local(), + location: RichLocation::decode(d), + }) + } +} +#[cfg(not(feature = "rustc"))] impl fmt::Display for GlobalLocation { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}::{}", self.function, self.location) @@ -103,6 +161,21 @@ impl fmt::Display for GlobalLocation { #[derive(PartialEq, Eq, Hash, Copy, Clone, Debug, Serialize, Deserialize)] pub struct CallString(Intern); +#[cfg(feature = "rustc")] +impl Encodable for CallString { + fn encode(&self, s: &mut S) { + let inner: &CallStringInner = &*self.0; + inner.encode(s); + } +} + +#[cfg(feature = "rustc")] +impl Decodable for CallString { + fn decode(d: &mut D) -> Self { + Self(Intern::new(CallStringInner::decode(d))) + } +} + type CallStringInner = Box<[GlobalLocation]>; impl CallString { @@ -203,6 +276,7 @@ impl fmt::Display for CallString { #[derive( PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Debug, Serialize, Deserialize, strum::EnumIs, )] +#[cfg_attr(feature = "rustc", derive(Decodable, Encodable))] pub enum SourceUse { Operand, Argument(u8), @@ -210,6 +284,7 @@ pub enum SourceUse { /// Additional information about this mutation. #[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Serialize, Deserialize, strum::EnumIs)] +#[cfg_attr(feature = "rustc", derive(Decodable, Encodable))] pub enum TargetUse { /// A function returned, assigning to it's return destination Return, diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index c3f0c5c71a..9fef01e5df 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -12,12 +12,13 @@ use rustc_borrowck::consumers::{places_conflict, BodyWithBorrowckFacts, PlaceCon use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; +use rustc_macros::{TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, }, - ty::{GenericArg, List, ParamEnv, TyCtxt, TyKind}, + ty::{GenericArg, List, TyCtxt, TyKind}, }; use rustc_mir_dataflow::{ self as df, fmt::DebugWithContext, Analysis, AnalysisDomain, Results, ResultsVisitor, @@ -117,7 +118,7 @@ impl<'tcx> df::JoinSemiLattice for InstructionState<'tcx> { } } -#[derive(Default, Debug)] +#[derive(Default, Debug, TyDecodable, TyEncodable)] pub struct PartialGraph<'tcx> { nodes: FxHashSet>, edges: FxHashSet<(DepNode<'tcx>, DepNode<'tcx>, DepEdge)>, @@ -1304,6 +1305,7 @@ pub(crate) fn push_call_string_root( old.transform_call_string(|c| c.push_front(new_root)) } +#[derive(TyEncodable)] pub(crate) struct SubgraphDescriptor<'tcx> { pub(crate) graph: PartialGraph<'tcx>, pub(crate) parentable_srcs: Vec<(DepNode<'tcx>, Option)>, diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index c64a0ded28..922e953c00 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -1,14 +1,16 @@ //! The representation of the PDG. -use std::{fmt, hash::Hash, path::Path}; +use std::{fmt, hash::Hash, ops::Deref, path::Path}; use flowistry_pdg::CallString; use internment::Intern; use petgraph::{dot, graph::DiGraph}; +use rustc_macros::{Decodable, Encodable}; use rustc_middle::{ mir::{Body, Place}, - ty::TyCtxt, + ty::{TyCtxt, TyDecoder, TyEncoder}, }; +use rustc_serialize::{Decodable, Encodable}; use rustc_utils::PlaceExt; pub use flowistry_pdg::{SourceUse, TargetUse}; @@ -31,6 +33,25 @@ pub struct DepNode<'tcx> { pub(crate) place_pretty: Option>, } +impl<'tcx, E: TyEncoder>> Encodable for DepNode<'tcx> { + fn encode(&self, e: &mut E) { + self.place.encode(e); + self.at.encode(e); + let str: Option<&String> = self.place_pretty.as_ref().map(Deref::deref); + str.encode(e); + } +} + +impl<'tcx, D: TyDecoder>> Decodable for DepNode<'tcx> { + fn decode(d: &mut D) -> Self { + Self { + place: Decodable::decode(d), + at: Decodable::decode(d), + place_pretty: >::decode(d).map(|s| Intern::new(s)), + } + } +} + impl PartialEq for DepNode<'_> { fn eq(&self, other: &Self) -> bool { // Using an explicit match here with all fields, so that should new @@ -93,7 +114,7 @@ impl fmt::Display for DepNode<'_> { } /// A kind of edge in the program dependence graph. -#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)] +#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Decodable, Encodable)] pub enum DepEdgeKind { /// X is control-dependent on Y if the value of Y influences the execution /// of statements that affect the value of X. @@ -107,7 +128,7 @@ pub enum DepEdgeKind { /// An edge in the program dependence graph. /// /// Represents an operation that induces a dependency between places. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Decodable, Encodable)] pub struct DepEdge { /// Either data or control. pub kind: DepEdgeKind, diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 23291dbc0b..f6603acd3b 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -7,8 +7,10 @@ extern crate rustc_borrowck; extern crate rustc_hash; extern crate rustc_hir; extern crate rustc_index; +extern crate rustc_macros; extern crate rustc_middle; extern crate rustc_mir_dataflow; +extern crate rustc_serialize; extern crate rustc_span; extern crate rustc_target; extern crate rustc_type_ir; diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 6104a002fb..15796e1a26 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -21,7 +21,6 @@ use super::{default_index, path_for_item, src_loc_for_span, SPDGGenerator}; use anyhow::{anyhow, bail, Result}; use either::Either; use flowistry_pdg_construction::{ - determine_async, graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, is_async_trait_fn, match_async_trait_assign, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, @@ -347,9 +346,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { ) -> Result> { let tcx = generator.tcx; let opts = generator.opts; - // TODO: I don't like that I have to do that here. Clean this up - let target = determine_async(tcx, local_def_id, &tcx.body_for_def_id(local_def_id)?.body) - .map_or(local_def_id, |res| res.0.def_id().expect_local()); if opts.dbg().dump_mir() { let mut file = std::fs::File::create(format!( From 89bacce3197dec529b6088db1fdfcb208d4ddc1e Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 17:02:30 -0700 Subject: [PATCH 07/95] Output side --- Cargo.lock | 1 + crates/flowistry_pdg/src/pdg.rs | 14 +- crates/flowistry_pdg/src/rustc_impls.rs | 2 +- crates/flowistry_pdg_construction/Cargo.toml | 1 + .../src/async_support.rs | 35 +- .../src/construct.rs | 67 ++- .../flowistry_pdg_construction/src/graph.rs | 154 +++++-- crates/flowistry_pdg_construction/src/lib.rs | 6 +- crates/flowistry_pdg_construction/src/meta.rs | 56 +++ .../flowistry_pdg_construction/src/utils.rs | 3 +- .../flowistry_pdg_construction/tests/pdg.rs | 6 +- crates/paralegal-flow/src/ana/encoder.rs | 78 ++++ .../paralegal-flow/src/ana/graph_converter.rs | 299 ++++--------- crates/paralegal-flow/src/ana/mod.rs | 393 +++++++++++++----- crates/paralegal-flow/src/ann/db.rs | 33 +- crates/paralegal-flow/src/ann/mod.rs | 58 ++- crates/paralegal-flow/src/ann/parse.rs | 15 +- crates/paralegal-flow/src/dbg.rs | 2 +- crates/paralegal-flow/src/discover.rs | 44 +- crates/paralegal-flow/src/lib.rs | 78 ++-- crates/paralegal-flow/src/test_utils.rs | 8 +- crates/paralegal-flow/src/utils/mod.rs | 87 +--- crates/paralegal-flow/src/utils/resolve.rs | 7 +- crates/paralegal-policy/src/algo/ahb.rs | 6 +- crates/paralegal-policy/src/context.rs | 19 +- crates/paralegal-policy/src/diagnostics.rs | 12 +- crates/paralegal-policy/src/test_utils.rs | 12 +- crates/paralegal-spdg/src/dot.rs | 13 +- crates/paralegal-spdg/src/lib.rs | 74 +++- crates/paralegal-spdg/src/tiny_bitset.rs | 4 + 30 files changed, 950 insertions(+), 637 deletions(-) create mode 100644 crates/flowistry_pdg_construction/src/meta.rs create mode 100644 crates/paralegal-flow/src/ana/encoder.rs diff --git a/Cargo.lock b/Cargo.lock index c504b4234d..c1d3b183e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -492,6 +492,7 @@ dependencies = [ "log", "petgraph", "rustc_utils 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=e990ded60afc928f76293fb9ad265c58405da1a7)", + "serde", ] [[package]] diff --git a/crates/flowistry_pdg/src/pdg.rs b/crates/flowistry_pdg/src/pdg.rs index 8b8a86f148..71ec52cd3b 100644 --- a/crates/flowistry_pdg/src/pdg.rs +++ b/crates/flowistry_pdg/src/pdg.rs @@ -110,8 +110,8 @@ impl From for RichLocation { pub struct GlobalLocation { // TODO Change to `DefId` /// The function containing the location. - #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::LocalDefId"))] - pub function: LocalDefId, + #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] + pub function: DefId, /// The location of an instruction in the function, or the function's start. pub location: RichLocation, @@ -121,7 +121,7 @@ pub struct GlobalLocation { impl Encodable for GlobalLocation { fn encode(&self, e: &mut E) { crate::rustc::middle::ty::tls::with(|tcx| { - tcx.def_path_hash(self.function.to_def_id()).encode(e); + tcx.def_path_hash(self.function).encode(e); self.location.encode(e); }) } @@ -132,11 +132,9 @@ impl Decodable for GlobalLocation { fn decode(d: &mut D) -> Self { use crate::rustc::span::def_id::DefPathHash; crate::rustc::middle::ty::tls::with(|tcx| Self { - function: tcx - .def_path_hash_to_def_id(DefPathHash::decode(d), &mut || { - panic!("Could map hash to def id") - }) - .expect_local(), + function: tcx.def_path_hash_to_def_id(DefPathHash::decode(d), &mut || { + panic!("Could map hash to def id") + }), location: RichLocation::decode(d), }) } diff --git a/crates/flowistry_pdg/src/rustc_impls.rs b/crates/flowistry_pdg/src/rustc_impls.rs index 665f1d75d6..c74d5c4ff1 100644 --- a/crates/flowistry_pdg/src/rustc_impls.rs +++ b/crates/flowistry_pdg/src/rustc_impls.rs @@ -77,7 +77,7 @@ impl From for def_id::DefIndex { impl fmt::Display for GlobalLocation { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { tls::with_opt(|opt_tcx| match opt_tcx { - Some(tcx) => match tcx.opt_item_name(self.function.to_def_id()) { + Some(tcx) => match tcx.opt_item_name(self.function) { Some(name) => name.fmt(f), None => write!(f, ""), }, diff --git a/crates/flowistry_pdg_construction/Cargo.toml b/crates/flowistry_pdg_construction/Cargo.toml index d2bba5ed90..acc872a72a 100644 --- a/crates/flowistry_pdg_construction/Cargo.toml +++ b/crates/flowistry_pdg_construction/Cargo.toml @@ -23,6 +23,7 @@ flowistry_pdg = { version = "0.1.0", path = "../flowistry_pdg", features = [ ] } #flowistry = { path = "../../../flowistry/crates/flowistry", default-features = false } flowistry = { workspace = true } +serde = { workspace = true, features = ["derive"] } [dev-dependencies] rustc_utils = { workspace = true, features = ["indexical", "test"] } diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 0eda5fbc8d..d5a2cf4b12 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -5,6 +5,7 @@ use flowistry_pdg::GlobalLocation; use itertools::Itertools; use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_macros::{Decodable, Encodable}; use rustc_middle::{ mir::{ AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, @@ -18,6 +19,19 @@ use crate::construct::{push_call_string_root, CallKind, SubgraphDescriptor}; use super::construct::GraphConstructor; use super::utils::{self, FnResolution}; +#[derive(Debug, Clone, Copy, Decodable, Encodable)] +pub enum Asyncness { + No, + AsyncFn, + AsyncTrait, +} + +impl Asyncness { + pub fn is_async(self) -> bool { + !matches!(self, Asyncness::No) + } +} + /// Stores ids that are needed to construct projections around async functions. pub(crate) struct AsyncInfo { pub poll_ready_variant_idx: VariantIdx, @@ -148,16 +162,19 @@ pub fn determine_async<'tcx>( tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>, -) -> Option<(FnResolution<'tcx>, Location)> { - let (generator_def_id, args, loc) = if tcx.asyncness(def_id).is_async() { - get_async_generator(body) +) -> Option<(FnResolution<'tcx>, Location, Asyncness)> { + let ((generator_def_id, args, loc), asyncness) = if tcx.asyncness(def_id).is_async() { + (get_async_generator(body), Asyncness::AsyncFn) } else { - try_as_async_trait_function(tcx, def_id.to_def_id(), body)? + ( + try_as_async_trait_function(tcx, def_id.to_def_id(), body)?, + Asyncness::AsyncTrait, + ) }; let param_env = tcx.param_env_reveal_all_normalized(def_id); let generator_fn = utils::try_resolve_function(tcx, generator_def_id.to_def_id(), param_env, args); - Some((generator_fn, loc)) + Some((generator_fn, loc, asyncness)) } #[derive(Debug, Clone, PartialEq, Eq)] @@ -169,16 +186,18 @@ pub enum AsyncDeterminationResult { impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { pub(crate) fn try_handle_as_async(&self) -> Option>> { - let (generator_fn, location) = determine_async(self.tcx(), self.def_id, &self.body)?; + let (generator_fn, location, asyncness) = + determine_async(self.tcx(), self.def_id, &self.body)?; let g = self.memo.construct_for(generator_fn)?; - let new_g = push_call_string_root( + let mut new_g = push_call_string_root( g.as_ref(), GlobalLocation { - function: self.def_id, + function: self.def_id.to_def_id(), location: flowistry_pdg::RichLocation::Location(location), }, ); + new_g.graph.asyncness = asyncness; Some(Rc::new(new_g)) } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 9fef01e5df..a1a77df503 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -10,15 +10,16 @@ use petgraph::graph::DiGraph; use rustc_abi::VariantIdx; use rustc_borrowck::consumers::{places_conflict, BodyWithBorrowckFacts, PlaceConflictBias}; use rustc_hash::{FxHashMap, FxHashSet}; -use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId}; use rustc_index::IndexVec; use rustc_macros::{TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ - visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, - Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, + visit::Visitor, AggregateKind, BasicBlock, Body, HasLocalDecls, Local, LocalDecl, + LocalDecls, LocalKind, Location, Operand, Place, PlaceElem, Rvalue, Statement, Terminator, + TerminatorEdges, TerminatorKind, RETURN_PLACE, }, - ty::{GenericArg, List, TyCtxt, TyKind}, + ty::{GenericArg, GenericArgsRef, List, TyCtxt, TyKind}, }; use rustc_mir_dataflow::{ self as df, fmt::DebugWithContext, Analysis, AnalysisDomain, Results, ResultsVisitor, @@ -33,12 +34,11 @@ use rustc_utils::{ use crate::{ async_support::*, calling_convention::*, - graph::{DepEdge, DepGraph, DepNode}, - graph::{SourceUse, TargetUse}, + graph::{DepEdge, DepGraph, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, try_resolve_function, utils::{self, is_non_default_trait_method, manufacture_substs_for, FnResolution}, - CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall, + Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall, }; pub struct MemoPdgConstructor<'tcx> { @@ -118,12 +118,6 @@ impl<'tcx> df::JoinSemiLattice for InstructionState<'tcx> { } } -#[derive(Default, Debug, TyDecodable, TyEncodable)] -pub struct PartialGraph<'tcx> { - nodes: FxHashSet>, - edges: FxHashSet<(DepNode<'tcx>, DepNode<'tcx>, DepEdge)>, -} - impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx>>> for PartialGraph<'tcx> { @@ -245,7 +239,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> } fn as_arg<'tcx>(node: &DepNode<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Option> { - if node.at.leaf().function != def_id { + if node.at.leaf().function != def_id.to_def_id() { return None; } if node.place.local == RETURN_PLACE { @@ -319,7 +313,7 @@ impl<'tcx> PartialGraph<'tcx> { let constructor = results.analysis.0; let gloc = GlobalLocation { location: location.into(), - function: constructor.def_id, + function: constructor.def_id.to_def_id(), }; let extend_node = |dep: &DepNode<'tcx>| push_call_string_root(dep, gloc); @@ -329,7 +323,11 @@ impl<'tcx> PartialGraph<'tcx> { CallHandling::Ready { calling_convention, descriptor, - } => (descriptor, calling_convention), + generic_args, + } => { + self.monos.insert(CallString::single(gloc), generic_args); + (descriptor, calling_convention) + } CallHandling::ApproxAsyncFn => { // Register a synthetic assignment of `future = (arg0, arg1, ...)`. let rvalue = Rvalue::Aggregate( @@ -584,6 +582,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { self.make_call_string(location), self.tcx(), &self.body, + !self.place_info.children(place).is_empty(), ) } @@ -611,7 +610,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { continue; }; let at = self.make_call_string(ctrl_loc); - let src = DepNode::new(ctrl_place, at, self.tcx(), &self.body); + let src = self.make_dep_node(ctrl_place, ctrl_loc); let edge = DepEdge::control(at, SourceUse::Operand, TargetUse::Assign); out.push((src, edge)); } @@ -630,7 +629,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { fn make_call_string(&self, location: impl Into) -> CallString { CallString::single(GlobalLocation { - function: self.root.def_id().expect_local(), + function: self.root.def_id(), location: location.into(), }) } @@ -740,8 +739,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { // For each last mutated location: last_mut_locs.iter().map(move |last_mut_loc| { // Return @ as an input node. - let at = self.make_call_string(*last_mut_loc); - DepNode::new(conflict, at, self.tcx(), &self.body) + self.make_dep_node(conflict, *last_mut_loc) }) }) }) @@ -766,15 +764,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { .iter() .map(|dst| { // Create a destination node for (DST @ CURRENT_LOC). - ( - *dst, - DepNode::new( - *dst, - self.make_call_string(location), - self.tcx(), - &self.body, - ), - ) + (*dst, self.make_dep_node(*dst, location)) }) .collect() } @@ -1010,6 +1000,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { Some(CallHandling::Ready { descriptor, calling_convention, + generic_args, }) } @@ -1030,6 +1021,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let (child_constructor, calling_convention) = match preamble { CallHandling::Ready { descriptor, + generic_args: _, calling_convention, } => (descriptor, calling_convention), CallHandling::ApproxAsyncFn => { @@ -1132,7 +1124,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { .into_engine(self.tcx(), &self.body) .iterate_to_fixpoint(); - let mut final_state = PartialGraph::default(); + let mut final_state = PartialGraph::new(Asyncness::No); analysis.visit_reachable_with(&self.body, &mut final_state); @@ -1267,6 +1259,7 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { let recurse_node = |n: &DepNode<'tcx>| n.transform_call_string(&f); Self { + asyncness: self.asyncness, nodes: self.nodes.iter().map(recurse_node).collect(), edges: self .edges @@ -1279,6 +1272,11 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { ) }) .collect(), + monos: self + .monos + .iter() + .map(|(cs, args)| (f(*cs), *args)) + .collect(), } } } @@ -1305,15 +1303,15 @@ pub(crate) fn push_call_string_root( old.transform_call_string(|c| c.push_front(new_root)) } -#[derive(TyEncodable)] -pub(crate) struct SubgraphDescriptor<'tcx> { - pub(crate) graph: PartialGraph<'tcx>, +#[derive(TyEncodable, TyDecodable, Debug, Clone)] +pub struct SubgraphDescriptor<'tcx> { + pub graph: PartialGraph<'tcx>, pub(crate) parentable_srcs: Vec<(DepNode<'tcx>, Option)>, pub(crate) parentable_dsts: Vec<(DepNode<'tcx>, Option)>, } impl<'tcx> SubgraphDescriptor<'tcx> { - pub(crate) fn to_petgraph(&self) -> DepGraph<'tcx> { + pub fn to_petgraph(&self) -> DepGraph<'tcx> { let domain = &self.graph; let mut graph: DiGraph, DepEdge> = DiGraph::new(); let mut nodes = FxHashMap::default(); @@ -1352,6 +1350,7 @@ enum CallHandling<'tcx, 'a> { Ready { calling_convention: CallingConvention<'tcx, 'a>, descriptor: Rc>, + generic_args: GenericArgsRef<'tcx>, }, ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), } diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 922e953c00..1d3b307dfc 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -1,25 +1,42 @@ //! The representation of the PDG. -use std::{fmt, hash::Hash, ops::Deref, path::Path}; +use std::{ + fmt::{self, Display}, + hash::Hash, + path::Path, +}; use flowistry_pdg::CallString; use internment::Intern; use petgraph::{dot, graph::DiGraph}; -use rustc_macros::{Decodable, Encodable}; +use rustc_abi::VariantIdx; +use rustc_hash::{FxHashMap, FxHashSet}; +use rustc_hir::def_id::DefIndex; +use rustc_index::IndexVec; +use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ - mir::{Body, Place}, - ty::{TyCtxt, TyDecoder, TyEncoder}, + mir::{ + BasicBlock, Body, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, Place, + }, + ty::{GenericArgs, GenericArgsRef, Ty, TyCtxt}, +}; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use rustc_span::{ + def_id::{DefId, DefPathHash}, + Span, }; -use rustc_serialize::{Decodable, Encodable}; use rustc_utils::PlaceExt; -pub use flowistry_pdg::{SourceUse, TargetUse}; +pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; +use serde::{Deserialize, Serialize}; + +use crate::Asyncness; /// A node in the program dependency graph. /// /// Represents a place at a particular call-string. /// The place is in the body of the root of the call-string. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, TyEncodable, TyDecodable)] pub struct DepNode<'tcx> { /// A place in memory in a particular body. pub place: Place<'tcx>, @@ -30,26 +47,10 @@ pub struct DepNode<'tcx> { /// Pretty representation of the place. /// This is cached as an interned string on [`DepNode`] because to compute it later, /// we would have to regenerate the entire monomorphized body for a given place. - pub(crate) place_pretty: Option>, -} - -impl<'tcx, E: TyEncoder>> Encodable for DepNode<'tcx> { - fn encode(&self, e: &mut E) { - self.place.encode(e); - self.at.encode(e); - let str: Option<&String> = self.place_pretty.as_ref().map(Deref::deref); - str.encode(e); - } -} - -impl<'tcx, D: TyDecoder>> Decodable for DepNode<'tcx> { - fn decode(d: &mut D) -> Self { - Self { - place: Decodable::decode(d), - at: Decodable::decode(d), - place_pretty: >::decode(d).map(|s| Intern::new(s)), - } - } + pub(crate) place_pretty: Option, + /// Does the PDG track subplaces of this place? + pub is_split: bool, + pub span: Span, } impl PartialEq for DepNode<'_> { @@ -61,6 +62,8 @@ impl PartialEq for DepNode<'_> { place, at, place_pretty: _, + span: _, + is_split: _, } = *self; (place, at).eq(&(other.place, other.at)) } @@ -77,6 +80,8 @@ impl Hash for DepNode<'_> { place, at, place_pretty: _, + span: _, + is_split: _, } = self; (place, at).hash(state) } @@ -87,11 +92,29 @@ impl<'tcx> DepNode<'tcx> { /// /// The `tcx` and `body` arguments are used to precompute a pretty string /// representation of the [`DepNode`]. - pub fn new(place: Place<'tcx>, at: CallString, tcx: TyCtxt<'tcx>, body: &Body<'tcx>) -> Self { + pub fn new( + place: Place<'tcx>, + at: CallString, + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + is_split: bool, + ) -> Self { + let i = at.leaf(); + let span = match i.location { + RichLocation::Location(loc) => { + let expanded_span = body + .stmt_at(loc) + .either(|s| s.source_info.span, |t| t.source_info.span); + tcx.sess.source_map().stmt_span(expanded_span, body.span) + } + RichLocation::Start | RichLocation::End => tcx.def_span(i.function), + }; DepNode { place, at, - place_pretty: place.to_string(tcx, body).map(Intern::new), + place_pretty: place.to_string(tcx, body).map(InternedString::new), + span, + is_split, } } } @@ -99,7 +122,7 @@ impl<'tcx> DepNode<'tcx> { impl DepNode<'_> { /// Returns a pretty string representation of the place, if one exists. pub fn place_pretty(&self) -> Option<&str> { - self.place_pretty.map(|s| s.as_ref().as_str()) + self.place_pretty.as_ref().map(|s| s.as_str()) } } @@ -210,3 +233,74 @@ impl<'tcx> DepGraph<'tcx> { rustc_utils::mir::body::run_dot(path.as_ref(), graph_dot.into_bytes()) } } + +#[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] +pub struct InternedString(Intern); + +impl Display for InternedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl InternedString { + pub fn new(s: String) -> Self { + Self(Intern::new(s)) + } + + pub fn as_str(&self) -> &str { + &**self.0 + } +} + +impl From for InternedString { + fn from(value: String) -> Self { + Self::new(value) + } +} + +impl From<&'_ str> for InternedString { + fn from(value: &'_ str) -> Self { + Self(Intern::from_ref(value)) + } +} + +impl std::ops::Deref for InternedString { + type Target = String; + + fn deref(&self) -> &Self::Target { + &*self.0 + } +} + +impl Encodable for InternedString { + fn encode(&self, e: &mut E) { + let s: &String = &*self.0; + s.encode(e); + } +} + +impl Decodable for InternedString { + fn decode(d: &mut D) -> Self { + Self(Intern::new(String::decode(d))) + } +} + +#[derive(Debug, Clone, TyDecodable, TyEncodable)] +pub struct PartialGraph<'tcx> { + pub nodes: FxHashSet>, + pub edges: FxHashSet<(DepNode<'tcx>, DepNode<'tcx>, DepEdge)>, + pub monos: FxHashMap>, + pub asyncness: Asyncness, +} + +impl<'tcx> PartialGraph<'tcx> { + pub fn new(asyncness: Asyncness) -> Self { + Self { + nodes: Default::default(), + edges: Default::default(), + monos: Default::default(), + asyncness, + } + } +} diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index f6603acd3b..abe02df286 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -17,13 +17,14 @@ extern crate rustc_type_ir; pub use utils::FnResolution; -use self::graph::DepGraph; -pub use async_support::{determine_async, is_async_trait_fn, match_async_trait_assign}; +pub use self::graph::DepGraph; +pub use async_support::{determine_async, is_async_trait_fn, match_async_trait_assign, Asyncness}; pub mod callback; pub use crate::construct::MemoPdgConstructor; pub use callback::{ CallChangeCallback, CallChangeCallbackFn, CallChanges, CallInfo, InlineMissReason, SkipCall, }; +pub use construct::SubgraphDescriptor; use rustc_middle::ty::TyCtxt; pub use utils::{is_non_default_trait_method, try_resolve_function}; @@ -31,6 +32,7 @@ mod async_support; mod calling_convention; mod construct; pub mod graph; +pub mod meta; mod mutation; mod utils; diff --git a/crates/flowistry_pdg_construction/src/meta.rs b/crates/flowistry_pdg_construction/src/meta.rs new file mode 100644 index 0000000000..5ca8256523 --- /dev/null +++ b/crates/flowistry_pdg_construction/src/meta.rs @@ -0,0 +1,56 @@ +use flowistry_pdg::{CallString, RichLocation}; +use rustc_hash::FxHashMap; +use rustc_hir::{ + def_id::{CrateNum, DefId, DefIndex, LocalDefId}, + intravisit::{self, FnKind}, + BodyId, +}; +use rustc_index::IndexVec; +use rustc_macros::{TyDecodable, TyEncodable}; +use rustc_middle::{ + hir::nested_filter::OnlyBodies, + mir::{ + BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, + TerminatorKind, + }, + ty::{GenericArgsRef, TyCtxt}, +}; +use rustc_span::Span; +use rustc_utils::{cache::Cache, mir::borrowck_facts}; + +use crate::{ + construct::SubgraphDescriptor, Asyncness, CallChangeCallback, DepGraph, MemoPdgConstructor, +}; + +pub struct MetadataCollector { + targets: Vec, +} + +impl MetadataCollector { + pub fn add_target(&mut self, target: LocalDefId) { + self.targets.push(target) + } + + pub fn into_metadata<'tcx>( + self, + tcx: TyCtxt<'tcx>, + ) -> FxHashMap> { + let constructor = MemoPdgConstructor::new(tcx); + self.targets + .into_iter() + .map(|t| { + ( + t.local_def_index, + (*constructor + .construct_for(crate::FnResolution::Partial(t.to_def_id())) + .unwrap()) + .clone(), + ) + }) + .collect::>() + } + + pub fn new() -> Self { + Self { targets: vec![] } + } +} diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index c3eb736ada..be5533277a 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -6,6 +6,7 @@ use itertools::Itertools; use log::{debug, trace}; use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::DefId; +use rustc_macros::{TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ tcx::PlaceTy, Body, HasLocalDecls, Local, Location, Place, ProjectionElem, Statement, @@ -19,7 +20,7 @@ use rustc_span::ErrorGuaranteed; use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; -#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, TyDecodable, TyEncodable)] pub enum FnResolution<'tcx> { Final(ty::Instance<'tcx>), Partial(DefId), diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 4a0a142c76..61eea77d14 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -91,8 +91,10 @@ fn connects<'tcx>( .edge_indices() .filter_map(|edge| { let DepEdge { at, .. } = g.graph[edge]; - let body_with_facts = - borrowck_facts::get_body_with_borrowck_facts(tcx, at.leaf().function); + let body_with_facts = borrowck_facts::get_body_with_borrowck_facts( + tcx, + at.leaf().function.expect_local(), + ); let Either::Right(Terminator { kind: TerminatorKind::Call { func, .. }, .. diff --git a/crates/paralegal-flow/src/ana/encoder.rs b/crates/paralegal-flow/src/ana/encoder.rs new file mode 100644 index 0000000000..29a6cb6cb5 --- /dev/null +++ b/crates/paralegal-flow/src/ana/encoder.rs @@ -0,0 +1,78 @@ +use std::path::Path; + +use rustc_hash::FxHashMap; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_serialize::{opaque::FileEncoder, Encoder}; +use rustc_type_ir::TyEncoder; + +macro_rules! encoder_methods { + ($($name:ident($ty:ty);)*) => { + $(fn $name(&mut self, value: $ty) { + self.file_encoder.$name(value) + })* + } +} + +pub struct ParalegalEncoder<'tcx> { + file_encoder: FileEncoder, + type_shorthands: FxHashMap, usize>, + predicate_shorthands: FxHashMap, usize>, +} + +impl<'tcx> ParalegalEncoder<'tcx> { + pub fn new(path: impl AsRef) -> Self { + Self { + file_encoder: FileEncoder::new(path).unwrap(), + type_shorthands: Default::default(), + predicate_shorthands: Default::default(), + } + } + + pub fn finish(self) { + self.file_encoder.finish().unwrap(); + } +} + +impl<'a, 'tcx> Encoder for ParalegalEncoder<'tcx> { + encoder_methods! { + emit_usize(usize); + emit_u128(u128); + emit_u64(u64); + emit_u32(u32); + emit_u16(u16); + emit_u8(u8); + + emit_isize(isize); + emit_i128(i128); + emit_i64(i64); + emit_i32(i32); + emit_i16(i16); + + emit_raw_bytes(&[u8]); + } +} + +impl<'tcx> TyEncoder for ParalegalEncoder<'tcx> { + type I = TyCtxt<'tcx>; + const CLEAR_CROSS_CRATE: bool = false; + + fn position(&self) -> usize { + self.file_encoder.position() + } + + fn type_shorthands( + &mut self, + ) -> &mut FxHashMap<::Ty, usize> { + &mut self.type_shorthands + } + + fn predicate_shorthands( + &mut self, + ) -> &mut FxHashMap<::PredicateKind, usize> { + &mut self.predicate_shorthands + } + + fn encode_alloc_id(&mut self, alloc_id: &::AllocId) { + unimplemented!() + } +} diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 15796e1a26..9f8871d078 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -1,29 +1,25 @@ use crate::{ - ana::inline_judge::InlineJudge, - ann::MarkerAnnotation, - desc::*, - discover::FnToAnalyze, - rust::{hir::def, *}, - stats::TimedStat, - utils::*, - DefId, HashMap, HashSet, MarkerCtx, + ana::inline_judge::InlineJudge, ann::MarkerAnnotation, desc::*, discover::FnToAnalyze, + stats::TimedStat, utils::*, DefId, HashMap, HashSet, MarkerCtx, }; -use flowistry::mir::placeinfo::PlaceInfo; use flowistry_pdg::SourceUse; use paralegal_spdg::{Node, SPDGStats}; -use rustc_utils::cache::Cache; +use rustc_hir::{def, def_id::LocalDefId}; +use rustc_middle::{ + mir::{self, Location}, + ty::{self, Instance, TyCtxt}, +}; use std::{cell::RefCell, fmt::Display, rc::Rc, time::Instant}; -use self::call_string_resolver::CallStringResolver; - -use super::{default_index, path_for_item, src_loc_for_span, SPDGGenerator}; +use super::{ + default_index, path_for_item, src_loc_for_span, BodyInfo, RustcInstructionKind, SPDGGenerator, +}; use anyhow::{anyhow, bail, Result}; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, - is_async_trait_fn, match_async_trait_assign, CallChangeCallback, CallChanges, CallInfo, - InlineMissReason, + CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall::Skip, }; use petgraph::{ @@ -44,7 +40,7 @@ pub struct GraphConverter<'tcx, 'a, C> { /// The flowistry graph we are converting dep_graph: Rc>, /// Same as the ID stored in self.target, but as a local def id - local_def_id: LocalDefId, + local_def_id: DefId, // Mutable fields /// Where we write every [`DefId`] we encounter into. @@ -58,26 +54,18 @@ pub struct GraphConverter<'tcx, 'a, C> { /// The converted graph we are creating spdg: SPDGImpl, marker_assignments: HashMap>, - call_string_resolver: call_string_resolver::CallStringResolver<'tcx>, - place_info_cache: PlaceInfoCache<'tcx>, } -pub type PlaceInfoCache<'tcx> = Rc>>; - impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { /// Initialize a new converter by creating an initial PDG using flowistry. pub fn new_with_flowistry( generator: &'a SPDGGenerator<'tcx>, known_def_ids: &'a mut C, target: &'a FnToAnalyze, - place_info_cache: PlaceInfoCache<'tcx>, ) -> Result { - let local_def_id = target.def_id.expect_local(); + let local_def_id = target.def_id; let start = Instant::now(); let dep_graph = Self::create_flowistry_graph(generator, local_def_id)?; - generator - .stats - .record_timed(TimedStat::Flowistry, start.elapsed()); if generator.opts.dbg().dump_flowistry_pdg() { dep_graph.generate_graphviz(format!( @@ -96,8 +84,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { types: Default::default(), spdg: Default::default(), marker_assignments: Default::default(), - call_string_resolver: CallStringResolver::new(generator.tcx, local_def_id), - place_info_cache, }) } @@ -111,7 +97,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { /// Is the top-level function (entrypoint) an `async fn` fn entrypoint_is_async(&self) -> bool { - entrypoint_is_async(self.tcx(), self.local_def_id) + self.generator + .flowistry_loader + .get_asyncness(self.local_def_id) + .is_async() } /// Insert this node into the converted graph, return it's auto-assigned id @@ -143,30 +132,24 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { } } - fn place_info(&self, def_id: LocalDefId) -> &PlaceInfo<'tcx> { - self.place_info_cache.get(def_id, |_| { - PlaceInfo::build( - self.tcx(), - def_id.to_def_id(), - self.tcx().body_for_def_id(def_id).unwrap(), - ) - }) - } - /// Find direct annotations on this node and register them in the marker map. fn node_annotations(&mut self, old_node: Node, weight: &DepNode<'tcx>) { let leaf_loc = weight.at.leaf(); let node = self.new_node_for(old_node); - let body = &self.tcx().body_for_def_id(leaf_loc.function).unwrap().body; - let graph = self.dep_graph.clone(); + let body = self + .generator + .flowistry_loader + .get_body_info(leaf_loc.function) + .unwrap(); + match leaf_loc.location { RichLocation::Start if matches!(body.local_kind(weight.place.local), mir::LocalKind::Arg) => { - let function_id = leaf_loc.function.to_def_id(); + let function_id = leaf_loc.function; let arg_num = weight.place.local.as_u32() - 1; self.known_def_ids.extend(Some(function_id)); @@ -175,27 +158,16 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { }); } RichLocation::End if weight.place.local == mir::RETURN_PLACE => { - let function_id = leaf_loc.function.to_def_id(); + let function_id = leaf_loc.function; self.known_def_ids.extend(Some(function_id)); self.register_annotations_for_function(node, function_id, |ann| { ann.refinement.on_return() }); } RichLocation::Location(loc) => { - let stmt_at_loc = body.stmt_at(loc); - if let crate::Either::Right( - term @ mir::Terminator { - kind: mir::TerminatorKind::Call { destination, .. }, - .. - }, - ) = stmt_at_loc - { - let res = self.call_string_resolver.resolve(weight.at); - let (fun, ..) = res - .try_monomorphize(self.tcx(), self.tcx().param_env(res.def_id()), term) - .as_instance_and_args(self.tcx()) - .unwrap(); - self.known_def_ids.extend(Some(fun.def_id())); + let instruction = body.instruction_at(loc); + if let RustcInstructionKind::FunctionCall(f) = instruction.kind { + self.known_def_ids.extend(Some(f.id)); // Question: Could a function with no input produce an // output that has aliases? E.g. could some place, where the @@ -203,19 +175,18 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // this function call be affected/modified by this call? If // so, that location would also need to have this marker // attached - let needs_return_markers = weight.place.local == destination.local - || graph - .graph - .edges_directed(old_node, Direction::Incoming) - .any(|e| { - let at = e.weight().at; - #[cfg(debug_assertions)] - assert_edge_location_invariant(self.tcx(), at, body, weight.at); - weight.at == at && e.weight().target_use.is_return() - }); + let needs_return_markers = graph + .graph + .edges_directed(old_node, Direction::Incoming) + .any(|e| { + let at = e.weight().at; + #[cfg(debug_assertions)] + assert_edge_location_invariant(self.tcx(), at, body, weight.at); + weight.at == at && e.weight().target_use.is_return() + }); if needs_return_markers { - self.register_annotations_for_function(node, fun.def_id(), |ann| { + self.register_annotations_for_function(node, f.id, |ann| { ann.refinement.on_return() }); } @@ -224,7 +195,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let SourceUse::Argument(arg) = e.weight().source_use else { continue; }; - self.register_annotations_for_function(node, fun.def_id(), |ann| { + self.register_annotations_for_function(node, f.id, |ann| { ann.refinement.on_argument().contains(arg as u32).unwrap() }); } @@ -241,24 +212,17 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { place: mir::PlaceRef<'tcx>, ) -> Option> { let tcx = self.tcx(); - let locations = at.iter_from_root().collect::>(); - let (last, mut rest) = locations.split_last().unwrap(); - - if self.entrypoint_is_async() { - let Some((first, tail)) = rest.split_first() else { - panic!("{at:?}.len() < 2"); - }; - // The body of a top-level `async` function binds a closure to the - // return place `_0`. Here we expect are looking at the statement - // that does this binding. - assert!(expect_stmt_at(self.tcx(), *first).is_left()); - rest = tail; - } + let body = self + .generator + .flowistry_loader + .get_body_info(at.leaf().function) + .unwrap(); + let generics = self.generator.flowistry_loader.get_mono(at); // So actually we're going to check the base place only, because // Flowistry sometimes tracks subplaces instead but we want the marker // from the base place. - let place = if self.entrypoint_is_async() && place.local.as_u32() == 1 && rest.len() == 1 { + let place = if self.entrypoint_is_async() && place.local.as_u32() == 1 && at.len() == 2 { if place.projection.is_empty() { return None; } @@ -272,12 +236,20 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { place.local.into() }; - let resolution = self.call_string_resolver.resolve(at); - - // Thread through each caller to recover generic arguments - let body = tcx.body_for_def_id(last.function).unwrap(); - let raw_ty = place.ty(&body.body, tcx); - Some(*resolution.try_monomorphize(tcx, ty::ParamEnv::reveal_all(), &raw_ty)) + let raw_ty = place.ty(body, tcx); + Some( + *FnResolution::Final( + Instance::resolve( + tcx, + ty::ParamEnv::reveal_all(), + at.leaf().function, + generics, + ) + .unwrap() + .unwrap(), + ) + .try_monomorphize(tcx, ty::ParamEnv::reveal_all(), &raw_ty), + ) } /// Fetch annotations item identified by this `id`. @@ -305,7 +277,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .flat_map(|parent| marker_ctx.combined_markers(parent)), ) .filter(|ann| filter(ann)) - .map(|ann| ann.marker), + .map(|ann| Identifier::new_intern(ann.marker.as_str())), ); self.known_def_ids.extend(parent); } @@ -317,8 +289,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let Some(place_ty) = self.determine_place_type(weight.at, weight.place.as_ref()) else { return; }; - let place_info = self.place_info(weight.at.leaf().function); - let deep = !place_info.children(weight.place).is_empty(); + let deep = !weight.is_split; let mut node_types = self.type_is_marked(place_ty, deep).collect::>(); for (p, _) in weight.place.iter_projections() { if let Some(place_ty) = self.determine_place_type(weight.at, p) { @@ -342,29 +313,12 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { /// `local_def_id`. fn create_flowistry_graph( generator: &SPDGGenerator<'tcx>, - local_def_id: LocalDefId, + def_id: DefId, ) -> Result> { let tcx = generator.tcx; let opts = generator.opts; - if opts.dbg().dump_mir() { - let mut file = std::fs::File::create(format!( - "{}.mir", - tcx.def_path_str(local_def_id.to_def_id()) - ))?; - mir::pretty::write_mir_fn( - tcx, - &tcx.body_for_def_id_default_policy(local_def_id) - .ok_or_else(|| anyhow!("Body not found"))? - .body, - &mut |_, _| Ok(()), - &mut file, - )? - } - let Ok(pdg) = generator - .flowistry_constructor - .construct_graph(local_def_id) - else { + let Some(pdg) = generator.flowistry_loader.get_pdg(def_id) else { bail!("Failed to construct the graph"); }; @@ -377,11 +331,8 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { self.make_spdg_impl(); let arguments = self.determine_arguments(); let return_ = self.determine_return(); - self.generator - .stats - .record_timed(TimedStat::Conversion, start.elapsed()); SPDG { - path: path_for_item(self.local_def_id.to_def_id(), self.tcx()), + path: path_for_item(self.local_def_id, self.tcx()), graph: self.spdg, id: self.local_def_id, name: Identifier::new(self.target.name()), @@ -409,15 +360,13 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { for (i, weight) in input.node_references() { let at = weight.at.leaf(); - let body = &tcx.body_for_def_id(at.function).unwrap().body; - let node_span = body.local_decls[weight.place.local].source_info.span; self.register_node( i, NodeInfo { at: weight.at, description: format!("{:?}", weight.place), - span: src_loc_for_span(node_span, tcx), + span: src_loc_for_span(weight.span, tcx), }, ); self.node_annotations(i, weight); @@ -519,7 +468,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { fn assert_edge_location_invariant<'tcx>( tcx: TyCtxt<'tcx>, at: CallString, - body: &mir::Body<'tcx>, + body: &BodyInfo<'tcx>, location: CallString, ) { // Normal case. The edge is introduced where the operation happens @@ -530,25 +479,22 @@ fn assert_edge_location_invariant<'tcx>( if let RichLocation::Location(loc) = at.leaf().location { if at.leaf().function == location.leaf().function && matches!( - body.stmt_at(loc), - Either::Right(mir::Terminator { - kind: mir::TerminatorKind::SwitchInt { .. }, - .. - }) + body.instruction_at(loc).kind, + RustcInstructionKind::SwitchInt ) { return; } } let mut msg = tcx.sess.struct_span_fatal( - at.leaf().span(tcx), + body.span_of(at.leaf().location), format!( "This operation is performed in a different location: {}", at ), ); msg.span_note( - location.leaf().span(tcx), + body.span_of(location.leaf().location), format!("Expected to originate here: {}", at), ); msg.emit() @@ -649,15 +595,6 @@ fn record_inlining(tracker: &StatStracker, tcx: TyCtxt<'_>, def_id: LocalDefId, } } -/// Find the statement at this location or fail. -fn expect_stmt_at(tcx: TyCtxt, loc: GlobalLocation) -> Either<&mir::Statement, &mir::Terminator> { - let body = &tcx.body_for_def_id(loc.function).unwrap().body; - let RichLocation::Location(loc) = loc.location else { - unreachable!(); - }; - body.stmt_at(loc) -} - /// If `did` is a method of an `impl` of a trait, then return the `DefId` that /// refers to the method on the trait definition. fn get_parent(tcx: TyCtxt, did: DefId) -> Option { @@ -675,97 +612,3 @@ fn get_parent(tcx: TyCtxt, did: DefId) -> Option { .def_id; Some(id) } - -fn entrypoint_is_async(tcx: TyCtxt, local_def_id: LocalDefId) -> bool { - tcx.asyncness(local_def_id).is_async() - || is_async_trait_fn( - tcx, - local_def_id.to_def_id(), - &tcx.body_for_def_id(local_def_id).unwrap().body, - ) -} - -mod call_string_resolver { - //! Resolution of [`CallString`]s to [`FnResolution`]s. - //! - //! This is a separate mod so that we can use encapsulation to preserve the - //! internal invariants of the resolver. - - use flowistry_pdg::{rustc_portable::LocalDefId, CallString}; - use flowistry_pdg_construction::{try_resolve_function, FnResolution}; - use rustc_utils::cache::Cache; - - use crate::{Either, TyCtxt}; - - use super::{map_either, match_async_trait_assign, AsFnAndArgs}; - - /// Cached resolution of [`CallString`]s to [`FnResolution`]s. - /// - /// Only valid for a single controller. Each controller should initialize a - /// new resolver. - pub struct CallStringResolver<'tcx> { - cache: Cache>, - tcx: TyCtxt<'tcx>, - entrypoint_is_async: bool, - } - - impl<'tcx> CallStringResolver<'tcx> { - /// Tries to resolve to the monomophized function in which this call - /// site exists. That is to say that `return.def_id() == - /// cs.leaf().function`. - /// - /// Unlike `Self::resolve_internal` this can be called on any valid - /// [`CallString`]. - pub fn resolve(&self, cs: CallString) -> FnResolution<'tcx> { - let (this, opt_prior_loc) = cs.pop(); - if let Some(prior_loc) = opt_prior_loc { - if prior_loc.len() == 1 && self.entrypoint_is_async { - FnResolution::Partial(this.function.to_def_id()) - } else { - self.resolve_internal(prior_loc) - } - } else { - FnResolution::Partial(this.function.to_def_id()) - } - } - - pub fn new(tcx: TyCtxt<'tcx>, entrypoint: LocalDefId) -> Self { - Self { - cache: Default::default(), - tcx, - entrypoint_is_async: super::entrypoint_is_async(tcx, entrypoint), - } - } - - /// This resolves the monomorphized function *being called at* this call - /// site. - /// - /// This function is internal because it panics if `cs.leaf().location` - /// is not either a function call or a statement where an async closure - /// is created and assigned. - fn resolve_internal(&self, cs: CallString) -> FnResolution<'tcx> { - *self.cache.get(cs, |_| { - let this = cs.leaf(); - let prior = self.resolve(cs); - - let tcx = self.tcx; - - let base_stmt = super::expect_stmt_at(tcx, this); - let param_env = tcx.param_env_reveal_all_normalized(prior.def_id()); - let normalized = map_either( - base_stmt, - |stmt| prior.try_monomorphize(tcx, param_env, stmt), - |term| prior.try_monomorphize(tcx, param_env, term), - ); - let res = match normalized { - Either::Right(term) => term.as_ref().as_instance_and_args(tcx).unwrap().0, - Either::Left(stmt) => { - let (def_id, generics) = match_async_trait_assign(stmt.as_ref()).unwrap(); - try_resolve_function(tcx, def_id, param_env, generics) - } - }; - res - }) - } - } -} diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index d1ae7aa5ce..9d72821a14 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -5,84 +5,290 @@ //! [`analyze`](SPDGGenerator::analyze). use crate::{ - ann::{Annotation, MarkerAnnotation}, + ann::{db::MarkerDatabase, Annotation, MarkerAnnotation}, desc::*, - discover::FnToAnalyze, - rust::{hir::def, *}, + discover::{CollectingVisitor, FnToAnalyze}, stats::{Stats, TimedStat}, utils::*, - DefId, HashMap, HashSet, LogLevelConfig, MarkerCtx, Symbol, + Args, DefId, HashMap, HashSet, LogLevelConfig, MarkerCtx, Symbol, }; -use std::cell::RefCell; use std::rc::Rc; use std::time::{Duration, Instant}; +use std::{cell::RefCell, path::Path}; use anyhow::Result; use either::Either; -use flowistry_pdg_construction::MemoPdgConstructor; +use flowistry_pdg_construction::{ + graph::InternedString, meta::MetadataCollector, Asyncness, DepGraph, MemoPdgConstructor, + SubgraphDescriptor, +}; use itertools::Itertools; use petgraph::visit::GraphBase; + +use rustc_hash::FxHashMap; +use rustc_hir::{ + def, + def_id::{CrateNum, DefIndex, LocalDefId}, + intravisit, +}; +use rustc_index::IndexVec; +use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; +use rustc_middle::{ + hir, + mir::{ + BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, + TerminatorKind, + }, + ty::{self, GenericArgsRef, TyCtxt}, +}; +use rustc_serialize::{opaque::FileEncoder, Decodable, Encodable}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; +mod encoder; mod graph_converter; mod inline_judge; use graph_converter::GraphConverter; +use rustc_type_ir::TyEncoder; +use rustc_utils::{cache::Cache, mir::borrowck_facts}; + +use self::{encoder::ParalegalEncoder, inline_judge::InlineJudge}; + +pub struct MetadataLoader<'tcx> { + tcx: TyCtxt<'tcx>, + cache: Cache>>, +} + +pub fn collect_and_emit_metadata<'tcx>( + tcx: TyCtxt<'tcx>, + args: &'static Args, + path: impl AsRef, +) -> (Vec, MarkerCtx<'tcx>) { + let mut collector = CollectingVisitor::new(tcx, args); + collector.run(); + let pdgs = collector.flowistry_collector.into_metadata(tcx); + let meta = Metadata::from_pdgs(tcx, pdgs, &collector.marker_ctx); + meta.write(path); + (collector.functions_to_analyze, collector.marker_ctx.into()) +} + +#[derive(Clone, Debug, TyEncodable, TyDecodable)] +pub struct Metadata<'tcx> { + pub pdgs: FxHashMap>, + pub bodies: FxHashMap>, + pub local_annotations: HashMap>, + pub reachable_markers: HashMap, Box<[InternedString]>>, +} + +impl<'tcx> Metadata<'tcx> { + fn write(&self, path: impl AsRef) { + let mut encoder = ParalegalEncoder::new(path); + self.encode(&mut encoder); + encoder.finish() + } +} + +impl<'tcx> Metadata<'tcx> { + pub fn from_pdgs( + tcx: TyCtxt<'tcx>, + pdgs: FxHashMap>, + markers: &MarkerDatabase<'tcx>, + ) -> Self { + let mut bodies: FxHashMap = Default::default(); + for pdg in pdgs + .values() + .flat_map(|d| d.graph.nodes.iter().flat_map(|n| n.at.iter())) + { + if let Some(local) = pdg.function.as_local() { + let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, local); + let body = &body_with_facts.body; + let body_info = bodies + .entry(local.local_def_index) + .or_insert_with(|| BodyInfo { + arg_count: body.arg_count, + decls: body.local_decls().to_owned(), + instructions: Default::default(), + def_span: tcx.def_span(local), + }); + if let RichLocation::Location(loc) = pdg.location { + let bb = body_info + .instructions + .ensure_contains_elem(loc.block, Default::default); + if bb.len() < loc.statement_index { + bb.resize_with(loc.statement_index, Default::default); + } + bb[loc.statement_index].get_or_insert_with(|| { + body.stmt_at(loc).either( + |s| RustcInstructionInfo { + kind: RustcInstructionKind::Statement, + span: s.source_info.span, + description: InternedString::new(format!("{:?}", s.kind)), + }, + |t| RustcInstructionInfo { + kind: if let Ok((id, ..)) = t.as_fn_and_args(tcx) { + RustcInstructionKind::FunctionCall(FunctionCallInfo { + id, + is_inlined: unimplemented!(), + }) + } else if matches!(t.kind, TerminatorKind::SwitchInt { .. }) { + RustcInstructionKind::SwitchInt + } else { + RustcInstructionKind::Terminator + }, + span: t.source_info.span, + description: InternedString::new(format!("{:?}", t.kind)), + }, + ) + }); + } + } + } + let cache_borrow = markers.reachable_markers.borrow(); + Self { + pdgs, + bodies, + local_annotations: markers.local_annotations.clone(), + reachable_markers: (&*cache_borrow) + .iter() + .filter_map(|(k, v)| Some((*k, (**(v.as_ref()?)).clone()))) + .collect(), + } + } +} + +impl<'tcx> MetadataLoader<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> Self { + Self { + tcx, + cache: Default::default(), + } + } + + pub fn get_metadata(&self, key: CrateNum) -> Option<&Metadata<'tcx>> { + self.cache.get(key, |_| unimplemented!()).as_ref() + } + + pub fn get_body_info(&self, key: DefId) -> Option<&BodyInfo<'tcx>> { + let meta = self.get_metadata(key.krate)?; + meta.bodies.get(&key.index) + } + + pub fn get_mono(&self, cs: CallString) -> GenericArgsRef<'tcx> { + let key = cs.root().function; + self.get_metadata(key.krate).unwrap().pdgs[&key.index] + .graph + .monos[&cs] + } + + pub fn get_pdg(&self, key: DefId) -> Option> { + Some( + self.get_metadata(key.krate)? + .pdgs + .get(&key.index)? + .to_petgraph(), + ) + } + + pub fn get_asyncness(&self, key: DefId) -> Asyncness { + (|| { + Some( + self.get_metadata(key.krate)? + .pdgs + .get(&key.index)? + .graph + .asyncness, + ) + })() + .unwrap_or(Asyncness::No) + } +} -use self::{graph_converter::PlaceInfoCache, inline_judge::InlineJudge}; +#[derive(Clone, Debug, TyEncodable, TyDecodable)] +pub struct BodyInfo<'tcx> { + pub arg_count: usize, + pub decls: IndexVec>, + pub instructions: IndexVec>>, + pub def_span: rustc_span::Span, +} + +#[derive(Clone, Copy, Debug, Encodable, Decodable)] +pub struct RustcInstructionInfo { + /// Classification of the instruction + pub kind: RustcInstructionKind, + /// The source code span + pub span: rustc_span::Span, + /// Textual rendering of the MIR + pub description: InternedString, +} + +/// The type of instructions we may encounter +#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, Encodable, Decodable)] +pub enum RustcInstructionKind { + /// Some type of statement + Statement, + /// A function call + FunctionCall(FunctionCallInfo), + /// A basic block terminator + Terminator, + /// The switch int terminator + SwitchInt, +} + +impl<'tcx> BodyInfo<'tcx> { + pub fn local_kind(&self, local: Local) -> LocalKind { + let local = local.as_usize(); + assert!(local < self.decls.len()); + if local == 0 { + LocalKind::ReturnPointer + } else if local < self.arg_count + 1 { + LocalKind::Arg + } else { + LocalKind::Temp + } + } + + pub fn instruction_at(&self, location: Location) -> RustcInstructionInfo { + self.instructions[location.block][location.statement_index].unwrap() + } + + pub fn span_of(&self, loc: RichLocation) -> rustc_span::Span { + match loc { + RichLocation::Location(loc) => self.instruction_at(loc).span, + _ => self.def_span, + } + } +} + +impl<'tcx> HasLocalDecls<'tcx> for BodyInfo<'tcx> { + fn local_decls(&self) -> &LocalDecls<'tcx> { + &self.decls + } +} /// Read-only database of information the analysis needs. /// /// [`Self::analyze`] serves as the main entrypoint to SPDG generation. pub struct SPDGGenerator<'tcx> { - pub inline_judge: InlineJudge<'tcx>, pub opts: &'static crate::Args, pub tcx: TyCtxt<'tcx>, - stats: Stats, - place_info_cache: PlaceInfoCache<'tcx>, - flowistry_constructor: MemoPdgConstructor<'tcx>, + marker_ctx: MarkerCtx<'tcx>, + flowistry_loader: MetadataLoader<'tcx>, } impl<'tcx> SPDGGenerator<'tcx> { - pub fn new( - marker_ctx: MarkerCtx<'tcx>, - opts: &'static crate::Args, - tcx: TyCtxt<'tcx>, - stats: Stats, - ) -> Self { - let mut flowistry_constructor = MemoPdgConstructor::new(tcx); - let stat_wrap = Rc::new(RefCell::new(( - SPDGStats { - unique_functions: 0, - unique_locs: 0, - analyzed_functions: 0, - analyzed_locs: 0, - inlinings_performed: 0, - construction_time: Duration::ZERO, - conversion_time: Duration::ZERO, - }, - Default::default(), - ))); - flowistry_constructor - .with_call_change_callback(graph_converter::MyCallback { - judge: InlineJudge::new(marker_ctx.clone(), tcx, opts.anactrl()), - stat_wrap, - tcx, - }) - .with_dump_mir(opts.dbg().dump_mir()); + pub fn new(marker_ctx: MarkerCtx<'tcx>, opts: &'static crate::Args, tcx: TyCtxt<'tcx>) -> Self { + let mut flowistry_loader = MetadataLoader::new(tcx); Self { - inline_judge: InlineJudge::new(marker_ctx, tcx, opts.anactrl()), + marker_ctx, opts, tcx, - stats, - place_info_cache: Default::default(), - flowistry_constructor, + flowistry_loader, } } pub fn marker_ctx(&self) -> &MarkerCtx<'tcx> { - self.inline_judge.marker_ctx() + &self.marker_ctx } /// Perform the analysis for one `#[paralegal_flow::analyze]` annotated function and @@ -96,14 +302,9 @@ impl<'tcx> SPDGGenerator<'tcx> { known_def_ids: &mut impl Extend, ) -> Result<(Endpoint, SPDG)> { info!("Handling target {}", self.tcx.def_path_str(target.def_id)); - let local_def_id = target.def_id.expect_local(); - - let converter = GraphConverter::new_with_flowistry( - self, - known_def_ids, - target, - self.place_info_cache.clone(), - )?; + let local_def_id = target.def_id; + + let converter = GraphConverter::new_with_flowistry(self, known_def_ids, target)?; let spdg = converter.make_spdg(); Ok((local_def_id, spdg)) @@ -145,8 +346,7 @@ impl<'tcx> SPDGGenerator<'tcx> { .map(|controllers| { let start = Instant::now(); let desc = self.make_program_description(controllers, known_def_ids, &targets); - self.stats - .record_timed(TimedStat::Conversion, start.elapsed()); + desc }) } @@ -166,7 +366,7 @@ impl<'tcx> SPDGGenerator<'tcx> { let inlined_functions = instruction_info .keys() - .filter_map(|l| l.function.to_def_id().as_local()) + .filter_map(|l| l.function.as_local()) .collect::>(); let analyzed_spans = inlined_functions .iter() @@ -237,7 +437,6 @@ impl<'tcx> SPDGGenerator<'tcx> { .all_annotations() .filter_map(|m| m.1.either(Annotation::as_marker, Some)) .count() as u32, - rustc_time: self.stats.get_timed(TimedStat::Rustc), dedup_locs, dedup_functions, seen_functions, @@ -252,58 +451,45 @@ impl<'tcx> SPDGGenerator<'tcx> { &self, controllers: &HashMap, ) -> HashMap { - let all_instructions = controllers - .values() - .flat_map(|v| { - v.graph - .node_weights() - .flat_map(|n| n.at.iter()) - .chain(v.graph.edge_weights().flat_map(|e| e.at.iter())) - }) - .collect::>(); + let all_instructions = controllers.values().flat_map(|v| v.graph.node_weights()); all_instructions .into_iter() - .map(|i| { - let body = &self.tcx.body_for_def_id(i.function).unwrap().body; - - let (kind, description) = match i.location { - RichLocation::End => (InstructionKind::Return, "start".to_owned()), - RichLocation::Start => (InstructionKind::Start, "end".to_owned()), - RichLocation::Location(loc) => match body.stmt_at(loc) { - crate::Either::Right(term) => { - let kind = if let Ok((id, ..)) = term.as_fn_and_args(self.tcx) { - InstructionKind::FunctionCall(FunctionCallInfo { - id, - is_inlined: id.is_local(), - }) - } else { - InstructionKind::Terminator - }; - (kind, format!("{:?}", term.kind)) - } - crate::Either::Left(stmt) => { - (InstructionKind::Statement, format!("{:?}", stmt.kind)) - } - }, - }; - let rust_span = match i.location { + .map(|n| { + let body = self + .flowistry_loader + .get_body_info(n.at.leaf().function) + .unwrap(); + let (kind, description, span) = match n.at.leaf().location { + RichLocation::End => { + (InstructionKind::Return, "start".to_owned(), body.def_span) + } + RichLocation::Start => { + (InstructionKind::Start, "end".to_owned(), body.def_span) + } RichLocation::Location(loc) => { - let expanded_span = match body.stmt_at(loc) { - crate::Either::Right(term) => term.source_info.span, - crate::Either::Left(stmt) => stmt.source_info.span, - }; - self.tcx - .sess - .source_map() - .stmt_span(expanded_span, body.span) + let instruction = body.instruction_at(loc); + ( + match instruction.kind { + RustcInstructionKind::SwitchInt => InstructionKind::SwitchInt, + RustcInstructionKind::FunctionCall(c) => { + InstructionKind::FunctionCall(FunctionCallInfo { + is_inlined: c.is_inlined, + id: c.id, + }) + } + RustcInstructionKind::Statement => InstructionKind::Statement, + RustcInstructionKind::Terminator => InstructionKind::Terminator, + }, + (*instruction.description).clone(), + instruction.span, + ) } - RichLocation::Start | RichLocation::End => self.tcx.def_span(i.function), }; ( - i, + n.at.leaf(), InstructionInfo { kind, - span: src_loc_for_span(rust_span, self.tcx), + span: src_loc_for_span(span, self.tcx), description: Identifier::new_intern(&description), }, ) @@ -342,8 +528,11 @@ impl<'tcx> SPDGGenerator<'tcx> { k, TypeDescription { rendering, - otypes: otypes.into(), - markers, + otypes: otypes.into_iter().map(|ot| ot.def_id).collect(), + markers: markers + .into_iter() + .map(|i| Identifier::new_intern(i.as_str())) + .collect(), }, ) }) @@ -419,7 +608,7 @@ fn path_for_item(id: DefId, tcx: TyCtxt) -> Box<[Identifier]> { let def_path = tcx.def_path(id); std::iter::once(Identifier::new(tcx.crate_name(def_path.krate))) .chain(def_path.data.iter().filter_map(|segment| { - use hir::definitions::DefPathDataName::*; + use rustc_hir::definitions::DefPathDataName::*; match segment.data.name() { Named(sym) => Some(Identifier::new(sym)), Anon { .. } => None, @@ -440,7 +629,7 @@ fn def_info_for_item(id: DefId, markers: &MarkerCtx, tcx: TyCtxt) -> DefInfo { .combined_markers(id) .cloned() .map(|ann| paralegal_spdg::MarkerAnnotation { - marker: ann.marker, + marker: Identifier::new_intern(ann.marker.as_str()), on_return: ann.refinement.on_return(), on_argument: ann.refinement.on_argument(), }) diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index d11541a6b7..e0325884cc 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -11,20 +11,19 @@ //! All interactions happen through the central database object: [`MarkerCtx`]. use crate::{ - ann::{Annotation, MarkerAnnotation}, + ann::{Annotation, MarkerAnnotation, OType}, args::{Args, MarkerControl}, - ast::Attribute, consts, - hir::def::DefKind, - mir, ty, utils::{ resolve::expect_resolve_string_to_def_id, AsFnAndArgs, FnResolution, FnResolutionExt, IntoDefId, IntoHirId, MetaItemMatch, TyCtxtExt, TyExt, }, DefId, Either, HashMap, HashSet, LocalDefId, TyCtxt, }; -use flowistry_pdg_construction::determine_async; -use paralegal_spdg::Identifier; +use flowistry_pdg_construction::{determine_async, graph::InternedString}; +use rustc_ast::Attribute; +use rustc_hir::def::DefKind; +use rustc_middle::{mir, ty}; use rustc_utils::cache::Cache; use std::rc::Rc; @@ -165,7 +164,7 @@ impl<'tcx> MarkerCtx<'tcx> { !self.get_reachable_markers(res).is_empty() } - pub fn get_reachable_markers(&self, res: FnResolution<'tcx>) -> &[Identifier] { + pub fn get_reachable_markers(&self, res: FnResolution<'tcx>) -> &[InternedString] { self.db() .reachable_markers .get_maybe_recursive(res, |_| self.compute_reachable_markers(res)) @@ -175,7 +174,7 @@ impl<'tcx> MarkerCtx<'tcx> { fn get_reachable_and_self_markers( &self, res: FnResolution<'tcx>, - ) -> impl Iterator + '_ { + ) -> impl Iterator + '_ { if res.def_id().is_local() { let mut direct_markers = self .combined_markers(res.def_id()) @@ -199,7 +198,7 @@ impl<'tcx> MarkerCtx<'tcx> { /// If the transitive marker cache did not contain the answer, this is what /// computes it. - fn compute_reachable_markers(&self, res: FnResolution<'tcx>) -> Box<[Identifier]> { + fn compute_reachable_markers(&self, res: FnResolution<'tcx>) -> Box<[InternedString]> { trace!("Computing reachable markers for {res:?}"); let Some(local) = res.def_id().as_local() else { trace!(" Is not local"); @@ -218,7 +217,7 @@ impl<'tcx> MarkerCtx<'tcx> { self.tcx().param_env_reveal_all_normalized(local), &body.body, ); - if let Some((async_fn, _)) = determine_async(self.tcx(), local, &mono_body) { + if let Some((async_fn, ..)) = determine_async(self.tcx(), local, &mono_body) { return self.get_reachable_markers(async_fn).into(); } mono_body @@ -237,7 +236,7 @@ impl<'tcx> MarkerCtx<'tcx> { &self, local_decls: &mir::LocalDecls, terminator: &mir::Terminator<'tcx>, - ) -> impl Iterator + '_ { + ) -> impl Iterator + '_ { trace!( " Finding reachable markers for terminator {:?}", terminator.kind @@ -450,7 +449,7 @@ impl<'tcx> MarkerCtx<'tcx> { } } -pub type TypeMarkerElem = (DefId, Identifier); +pub type TypeMarkerElem = (DefId, InternedString); pub type TypeMarkers = [TypeMarkerElem]; /// The structure inside of [`MarkerCtx`]. @@ -458,10 +457,10 @@ pub struct MarkerDatabase<'tcx> { tcx: TyCtxt<'tcx>, /// Cache for parsed local annotations. They are created with /// [`MarkerCtx::retrieve_local_annotations_for`]. - local_annotations: HashMap>, + pub(crate) local_annotations: HashMap>, external_annotations: ExternalMarkers, /// Cache whether markers are reachable transitively. - reachable_markers: Cache, Box<[Identifier]>>, + pub(crate) reachable_markers: Cache, Box<[InternedString]>>, /// Configuration options config: &'static MarkerControl, type_markers: Cache, Box>, @@ -516,7 +515,11 @@ fn try_parse_annotation( warn!("The `paralegal_flow::label` annotation is deprecated, use `paralegal_flow::marker` instead"); one(Annotation::Marker(ann_match_fn(i)?)) } else if let Some(i) = a.match_get_ref(&consts::OTYPE_MARKER) { - Either::Right(otype_ann_match(i, tcx)?.into_iter().map(Annotation::OType)) + Either::Right( + otype_ann_match(i, tcx)? + .into_iter() + .map(|def_id| Annotation::OType(OType { def_id })), + ) } else if let Some(i) = a.match_get_ref(&consts::EXCEPTION_MARKER) { one(Annotation::Exception(match_exception(i)?)) } else { diff --git a/crates/paralegal-flow/src/ann/mod.rs b/crates/paralegal-flow/src/ann/mod.rs index 05b772240d..64096c7b3d 100644 --- a/crates/paralegal-flow/src/ann/mod.rs +++ b/crates/paralegal-flow/src/ann/mod.rs @@ -1,3 +1,6 @@ +use flowistry_pdg_construction::graph::InternedString; +use rustc_macros::{Decodable, Encodable}; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use serde::{Deserialize, Serialize}; use paralegal_spdg::{rustc_proxies, tiny_bitset_pretty, Identifier, TinyBitSet, TypeId}; @@ -11,12 +14,47 @@ pub mod parse; /// For convenience the match methods [`Self::as_marker`], [`Self::as_otype`] /// and [`Self::as_exception`] are provided. These are particularly useful in /// conjunction with e.g. [`Iterator::filter_map`] -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize, strum::EnumIs)] +#[derive( + PartialEq, + Eq, + PartialOrd, + Ord, + Debug, + Clone, + Deserialize, + Serialize, + strum::EnumIs, + Encodable, + Decodable, +)] pub enum Annotation { Marker(MarkerAnnotation), - OType(#[serde(with = "rustc_proxies::DefId")] TypeId), + OType(OType), Exception(ExceptionAnnotation), } +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy, Deserialize, Serialize)] +pub struct OType { + #[serde(with = "rustc_proxies::DefId")] + pub def_id: TypeId, +} + +impl Encodable for OType { + fn encode(&self, s: &mut E) { + rustc_middle::ty::tls::with(|tcx| tcx.def_path_hash(self.def_id)).encode(s) + } +} + +impl Decodable for OType { + fn decode(d: &mut D) -> Self { + Self { + def_id: rustc_middle::ty::tls::with(|tcx| { + tcx.def_path_hash_to_def_id(Decodable::decode(d), &mut || { + panic!("Could not resolve def path") + }) + }), + } + } +} impl Annotation { /// If this is an [`Annotation::Marker`], returns the underlying [`MarkerAnnotation`]. @@ -30,7 +68,7 @@ impl Annotation { /// If this is an [`Annotation::OType`], returns the underlying [`TypeId`]. pub fn as_otype(&self) -> Option { match self { - Annotation::OType(t) => Some(*t), + Annotation::OType(t) => Some(t.def_id), _ => None, } } @@ -46,7 +84,9 @@ impl Annotation { pub type VerificationHash = u128; -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize)] +#[derive( + PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize, Encodable, Decodable, +)] pub struct ExceptionAnnotation { /// The value of the verification hash we found in the annotation. Is `None` /// if there was no verification hash in the annotation. @@ -54,10 +94,12 @@ pub struct ExceptionAnnotation { } /// A marker annotation and its refinements. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize)] +#[derive( + PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize, Encodable, Decodable, +)] pub struct MarkerAnnotation { /// The (unchanged) name of the marker as provided by the user - pub marker: Identifier, + pub marker: InternedString, #[serde(flatten)] pub refinement: MarkerRefinement, } @@ -69,7 +111,9 @@ fn const_false() -> bool { /// Refinements in the marker targeting. The default (no refinement provided) is /// `on_argument == vec![]` and `on_return == false`, which is also what is /// returned from [`Self::empty`]. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize)] +#[derive( + PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize, Encodable, Decodable, +)] pub struct MarkerRefinement { #[serde(default, with = "tiny_bitset_pretty")] on_argument: TinyBitSet, diff --git a/crates/paralegal-flow/src/ann/parse.rs b/crates/paralegal-flow/src/ann/parse.rs index 83c04f8d58..c91fb2477b 100644 --- a/crates/paralegal-flow/src/ann/parse.rs +++ b/crates/paralegal-flow/src/ann/parse.rs @@ -12,14 +12,15 @@ use super::{ ExceptionAnnotation, MarkerAnnotation, MarkerRefinement, MarkerRefinementKind, VerificationHash, }; use crate::{ - consts, - rust::*, - utils, + consts, utils, utils::{write_sep, Print, TinyBitSet}, Symbol, }; -use ast::{token, tokenstream}; +use flowistry_pdg_construction::graph::InternedString; use paralegal_spdg::Identifier; +use rustc_ast::{token, tokenstream, AttrArgs}; +use rustc_hir::def_id::DefId; +use rustc_middle::ty::TyCtxt; use token::*; use tokenstream::*; @@ -214,9 +215,9 @@ pub fn tiny_bitset(i: I) -> R { } /// Parser for the payload of the `#[paralegal_flow::output_type(...)]` annotation. -pub(crate) fn otype_ann_match(ann: &ast::AttrArgs, tcx: TyCtxt) -> Result, String> { +pub(crate) fn otype_ann_match(ann: &AttrArgs, tcx: TyCtxt) -> Result, String> { match ann { - ast::AttrArgs::Delimited(dargs) => { + AttrArgs::Delimited(dargs) => { let mut p = nom::multi::separated_list0( assert_token(TokenKind::Comma), nom::multi::separated_list0( @@ -315,7 +316,7 @@ pub(crate) fn ann_match_fn(ann: &rustc_ast::AttrArgs) -> Result( diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index f59f350033..4c52d7329f 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -3,16 +3,15 @@ //! //! Essentially this discovers all local `paralegal_flow::*` annotations. -use crate::{ - ana::SPDGGenerator, ann::db::MarkerDatabase, consts, desc::*, rust::*, stats::Stats, utils::*, -}; +use crate::{ana::SPDGGenerator, ann::db::MarkerDatabase, consts, desc::*, stats::Stats, utils::*}; -use hir::{ - def_id::DefId, +use flowistry_pdg_construction::meta::MetadataCollector; +use rustc_hir::{ + def_id::{DefId, LocalDefId}, intravisit::{self, FnKind}, BodyId, }; -use rustc_middle::hir::nested_filter::OnlyBodies; +use rustc_middle::{hir::nested_filter::OnlyBodies, ty::TyCtxt}; use rustc_span::{symbol::Ident, Span, Symbol}; use anyhow::Result; @@ -36,9 +35,9 @@ pub struct CollectingVisitor<'tcx> { /// later perform the analysis pub functions_to_analyze: Vec, - stats: Stats, - pub marker_ctx: MarkerDatabase<'tcx>, + + pub flowistry_collector: MetadataCollector, } /// A function we will be targeting to analyze with @@ -56,7 +55,7 @@ impl FnToAnalyze { } impl<'tcx> CollectingVisitor<'tcx> { - pub(crate) fn new(tcx: TyCtxt<'tcx>, opts: &'static crate::Args, stats: Stats) -> Self { + pub(crate) fn new(tcx: TyCtxt<'tcx>, opts: &'static crate::Args) -> Self { let functions_to_analyze = opts .anactrl() .selected_targets() @@ -78,24 +77,22 @@ impl<'tcx> CollectingVisitor<'tcx> { opts, functions_to_analyze, marker_ctx: MarkerDatabase::init(tcx, opts), - stats, + flowistry_collector: MetadataCollector::new(), } } /// After running the discovery with `visit_all_item_likes_in_crate`, create /// the read-only [`SPDGGenerator`] upon which the analysis will run. fn into_generator(self) -> SPDGGenerator<'tcx> { - SPDGGenerator::new(self.marker_ctx.into(), self.opts, self.tcx, self.stats) + SPDGGenerator::new(self.marker_ctx.into(), self.opts, self.tcx) } /// Driver function. Performs the data collection via visit, then calls /// [`Self::analyze`] to construct the Forge friendly description of all /// endpoints. - pub fn run(mut self) -> Result { + pub fn run(&mut self) { let tcx = self.tcx; - tcx.hir().visit_all_item_likes_in_crate(&mut self); - let targets = std::mem::take(&mut self.functions_to_analyze); - self.into_generator().analyze(targets) + tcx.hir().visit_all_item_likes_in_crate(self) } /// Does the function named by this id have the `paralegal_flow::analyze` annotation @@ -133,13 +130,16 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { id: LocalDefId, ) { match &kind { - FnKind::ItemFn(name, _, _) | FnKind::Method(name, _) - if self.should_analyze_function(id) => - { - self.functions_to_analyze.push(FnToAnalyze { - name: *name, - def_id: id.to_def_id(), - }); + FnKind::ItemFn(name, _, _) | FnKind::Method(name, _) => { + if self.should_analyze_function(id) { + self.functions_to_analyze.push(FnToAnalyze { + name: *name, + def_id: id.to_def_id(), + }); + } + if self.tcx.generics_of(id).count() == 0 { + self.flowistry_collector.add_target(id) + } } _ => (), } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 078d32a5af..f888b6c737 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -23,48 +23,38 @@ extern crate petgraph; extern crate num_derive; extern crate num_traits; -pub extern crate rustc_index; +extern crate rustc_abi; +extern crate rustc_arena; +extern crate rustc_ast; +extern crate rustc_borrowck; +extern crate rustc_data_structures; +extern crate rustc_driver; +extern crate rustc_hash; +extern crate rustc_hir; +extern crate rustc_index; +extern crate rustc_interface; +extern crate rustc_macros; +extern crate rustc_middle; +extern crate rustc_mir_dataflow; +extern crate rustc_query_system; extern crate rustc_serialize; +extern crate rustc_span; +extern crate rustc_target; +extern crate rustc_type_ir; -pub mod rust { - //! Exposes the rustc external crates (this mod is just to tidy things up). - pub extern crate rustc_abi; - pub extern crate rustc_arena; - pub extern crate rustc_ast; - pub extern crate rustc_borrowck; - pub extern crate rustc_data_structures; - pub extern crate rustc_driver; - pub extern crate rustc_hir; - pub extern crate rustc_interface; - pub extern crate rustc_middle; - pub extern crate rustc_mir_dataflow; - pub extern crate rustc_query_system; - pub extern crate rustc_serialize; - pub extern crate rustc_span; - pub extern crate rustc_target; - pub extern crate rustc_type_ir; - pub use super::rustc_index; - pub use rustc_type_ir::sty; - - pub use rustc_ast as ast; - pub mod mir { - pub use super::rustc_abi::FieldIdx as Field; - pub use super::rustc_middle::mir::*; - } - pub use rustc_hir as hir; - pub use rustc_middle::ty; +pub use rustc_type_ir::sty; - pub use rustc_middle::dep_graph::DepGraph; - pub use ty::TyCtxt; +pub use rustc_middle::ty; - pub use hir::def_id::{DefId, LocalDefId}; - pub use hir::BodyId; - pub use mir::Location; -} +pub use rustc_middle::dep_graph::DepGraph; +pub use ty::TyCtxt; + +pub use rustc_hir::def_id::{DefId, LocalDefId}; +pub use rustc_hir::BodyId; +pub use rustc_middle::mir::Location; use args::{ClapArgs, LogLevelConfig}; use desc::utils::write_sep; -use rust::*; use rustc_plugin::CrateFilter; use rustc_utils::mir::borrowck_facts; @@ -93,6 +83,7 @@ pub mod test_utils; pub use paralegal_spdg as desc; +use crate::ana::{collect_and_emit_metadata, SPDGGenerator}; pub use crate::ann::db::MarkerCtx; pub use args::{AnalysisCtrl, Args, BuildConfig, DepConfig, DumpArgs, ModelCtrl}; @@ -148,7 +139,7 @@ impl rustc_driver::Callbacks for Callbacks { // that (when retrieving the MIR bodies for instance) fn after_expansion<'tcx>( &mut self, - _compiler: &rustc_interface::interface::Compiler, + compiler: &rustc_interface::interface::Compiler, queries: &'tcx rustc_interface::Queries<'tcx>, ) -> rustc_driver::Compilation { self.stats @@ -158,11 +149,20 @@ impl rustc_driver::Callbacks for Callbacks { .unwrap() .enter(|tcx| { tcx.sess.abort_if_errors(); - let desc = - discover::CollectingVisitor::new(tcx, self.opts, self.stats.clone()).run()?; - info!("All elems walked"); + + let (analysis_targets, mctx) = collect_and_emit_metadata( + tcx, + self.opts, + compiler + .build_output_filenames(tcx.sess, &[]) + .with_extension(".para"), + ); tcx.sess.abort_if_errors(); + let mut gen = SPDGGenerator::new(mctx, self.opts, tcx); + + let desc = gen.analyze(analysis_targets)?; + if self.opts.dbg().dump_spdg() { let out = std::fs::File::create("call-only-flow.gv").unwrap(); paralegal_spdg::dot::dump(&desc, out).unwrap(); diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 77069981a9..74895005b7 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -15,7 +15,7 @@ use std::process::Command; use paralegal_spdg::{ rustc_portable::DefId, traverse::{generic_flows_to, EdgeSelection}, - DefInfo, EdgeInfo, Node, SPDG, + DefInfo, EdgeInfo, Endpoint, Node, SPDG, }; use flowistry_pdg::rustc_portable::LocalDefId; @@ -212,7 +212,7 @@ pub trait HasGraph<'g>: Sized + Copy { } } - fn ctrl_hashed(self, name: &str) -> LocalDefId { + fn ctrl_hashed(self, name: &str) -> DefId { let candidates = self .graph() .desc @@ -277,7 +277,7 @@ impl PreFrg { #[derive(Clone)] pub struct CtrlRef<'g> { graph: &'g PreFrg, - id: LocalDefId, + id: Endpoint, ctrl: &'g SPDG, } @@ -325,7 +325,7 @@ impl<'g> CtrlRef<'g> { } } - pub fn id(&self) -> LocalDefId { + pub fn id(&self) -> Endpoint { self.id } pub fn spdg(&self) -> &'g SPDG { diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 9ebbf4a8b5..80bb613097 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -2,35 +2,32 @@ extern crate smallvec; use flowistry_pdg::{GlobalLocation, RichLocation}; +use rustc_target::spec::abi::Abi; use thiserror::Error; use smallvec::SmallVec; -use crate::{ - desc::Identifier, - rust::{ - ast, - hir::{ - self, - def::Res, - def_id::{DefId, LocalDefId}, - hir_id::HirId, - BodyId, - }, - mir::{self, Location, Place, ProjectionElem}, - rustc_borrowck::consumers::BodyWithBorrowckFacts, - rustc_data_structures::intern::Interned, - rustc_span::Span as RustSpan, - rustc_span::{symbol::Ident, Span}, - rustc_target::spec::abi::Abi, - ty, - }, - rustc_span::ErrorGuaranteed, - Either, Symbol, TyCtxt, -}; +use crate::{desc::Identifier, rustc_span::ErrorGuaranteed, Either, Symbol, TyCtxt}; + pub use flowistry_pdg_construction::{is_non_default_trait_method, FnResolution}; pub use paralegal_spdg::{ShortHash, TinyBitSet}; +use rustc_ast as ast; +use rustc_borrowck::consumers::BodyWithBorrowckFacts; +use rustc_data_structures::intern::Interned; +use rustc_hir::{ + self as hir, + def::Res, + def_id::{DefId, LocalDefId}, + hir_id::HirId, + BodyId, +}; +use rustc_middle::{ + mir::{self, Location, Place, ProjectionElem}, + ty, +}; +use rustc_span::{symbol::Ident, Span as RustSpan}; + use std::{cmp::Ordering, hash::Hash}; mod print; @@ -844,52 +841,6 @@ impl IntoBodyId for DefId { } } -pub trait Spanned<'tcx> { - fn span(&self, tcx: TyCtxt<'tcx>) -> Span; -} - -impl<'tcx> Spanned<'tcx> for mir::Terminator<'tcx> { - fn span(&self, _tcx: TyCtxt<'tcx>) -> Span { - self.source_info.span - } -} - -impl<'tcx> Spanned<'tcx> for mir::Statement<'tcx> { - fn span(&self, _tcx: TyCtxt<'tcx>) -> Span { - self.source_info.span - } -} - -impl<'tcx> Spanned<'tcx> for (&mir::Body<'tcx>, mir::Location) { - fn span(&self, tcx: TyCtxt<'tcx>) -> Span { - self.0 - .stmt_at(self.1) - .either(|e| e.span(tcx), |e| e.span(tcx)) - } -} - -impl<'tcx> Spanned<'tcx> for DefId { - fn span(&self, tcx: TyCtxt<'tcx>) -> Span { - tcx.def_span(*self) - } -} - -impl<'tcx> Spanned<'tcx> for (LocalDefId, mir::Location) { - fn span(&self, tcx: TyCtxt<'tcx>) -> Span { - let body = tcx.body_for_def_id(self.0).unwrap(); - (&body.body, self.1).span(tcx) - } -} - -impl<'tcx> Spanned<'tcx> for GlobalLocation { - fn span(&self, tcx: TyCtxt<'tcx>) -> RustSpan { - match self.location { - RichLocation::Location(loc) => (self.function, loc).span(tcx), - _ => self.function.to_def_id().span(tcx), - } - } -} - pub fn map_either( either: Either, f: impl FnOnce(A) -> C, diff --git a/crates/paralegal-flow/src/utils/resolve.rs b/crates/paralegal-flow/src/utils/resolve.rs index 122e4d078e..b81931c20d 100644 --- a/crates/paralegal-flow/src/utils/resolve.rs +++ b/crates/paralegal-flow/src/utils/resolve.rs @@ -1,12 +1,15 @@ -use crate::{ast, hir, ty, DefId, Symbol, TyCtxt}; use ast::Mutability; -use hir::{ +use rustc_ast as ast; +use rustc_hir::def_id::DefId; +use rustc_hir::{ def::{self, DefKind}, def_id::CrateNum, def_id::LocalDefId, def_id::LOCAL_CRATE, ImplItemRef, ItemKind, Node, PrimTy, TraitItemRef, }; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_span::Symbol; use ty::{fast_reject::SimplifiedType, FloatTy, IntTy, UintTy}; #[derive(Debug, Clone, Copy)] diff --git a/crates/paralegal-policy/src/algo/ahb.rs b/crates/paralegal-policy/src/algo/ahb.rs index b17ea76729..ad8c32f8ff 100644 --- a/crates/paralegal-policy/src/algo/ahb.rs +++ b/crates/paralegal-policy/src/algo/ahb.rs @@ -5,7 +5,7 @@ use std::{collections::HashSet, sync::Arc}; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; -use paralegal_spdg::{GlobalNode, Identifier, Node, SPDGImpl}; +use paralegal_spdg::{Endpoint, GlobalNode, Identifier, Node, SPDGImpl}; use anyhow::{ensure, Result}; use itertools::Itertools; @@ -196,7 +196,7 @@ pub enum TraceLevel { struct Tracer<'a> { tree: Box<[Node]>, trace: &'a mut Trace, - ctrl_id: LocalDefId, + ctrl_id: Endpoint, } enum Trace { @@ -279,7 +279,7 @@ impl<'a> Tracer<'a> { trace: &'a mut Trace, node_bound: usize, initials: impl IntoIterator, - ctrl_id: LocalDefId, + ctrl_id: Endpoint, ) -> Self { Self { tree: if matches!(trace, Trace::None(_)) { diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 24006ab727..18a286f9d6 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -30,8 +30,6 @@ use crate::{assert_warning, diagnostics::DiagnosticsRecorder}; /// User-defined PDG markers. pub type Marker = Identifier; -/// The type identifying a controller -pub type ControllerId = LocalDefId; /// The type identifying a function that is used in call sites. pub type FunctionId = DefId; @@ -39,7 +37,7 @@ pub type FunctionId = DefId; pub type MarkableId = GlobalNode; type MarkerIndex = HashMap; -type FlowsTo = HashMap; +type FlowsTo = HashMap; /// Collection of entities a particular marker has been applied to #[derive(Clone, Debug, Default)] @@ -68,7 +66,7 @@ fn bfs_iter< G: IntoNeighbors + GraphRef + Visitable::Map>, >( g: G, - controller_id: LocalDefId, + controller_id: Endpoint, start: impl IntoIterator, ) -> impl Iterator { let mut discovered = g.visit_map(); @@ -367,7 +365,7 @@ impl Context { /// /// If the controller with this id does not exist *or* the controller has /// fewer than `index` arguments. - pub fn controller_argument(&self, ctrl_id: ControllerId, index: u32) -> Option { + pub fn controller_argument(&self, ctrl_id: Endpoint, index: u32) -> Option { let ctrl = self.desc.controllers.get(&ctrl_id)?; let inner = *ctrl.arguments.get(index as usize)?; @@ -430,10 +428,7 @@ impl Context { } /// Returns all DataSources, DataSinks, and CallSites for a Controller as Nodes. - pub fn all_nodes_for_ctrl( - &self, - ctrl_id: ControllerId, - ) -> impl Iterator + '_ { + pub fn all_nodes_for_ctrl(&self, ctrl_id: Endpoint) -> impl Iterator + '_ { let ctrl = &self.desc.controllers[&ctrl_id]; ctrl.graph .node_indices() @@ -443,7 +438,7 @@ impl Context { /// Returns an iterator over the data sources within controller `c` that have type `t`. pub fn srcs_with_type( &self, - ctrl_id: ControllerId, + ctrl_id: Endpoint, t: DefId, ) -> impl Iterator + '_ { self.desc.controllers[&ctrl_id] @@ -459,7 +454,7 @@ impl Context { /// Returns an iterator over all nodes that do not have any influencers of the given edge_type. pub fn roots( &self, - ctrl_id: ControllerId, + ctrl_id: Endpoint, edge_type: EdgeSelection, ) -> impl Iterator + '_ { let g = &self.desc.controllers[&ctrl_id].graph; @@ -530,7 +525,7 @@ impl Context { } /// Iterate over all defined controllers - pub fn all_controllers(&self) -> impl Iterator { + pub fn all_controllers(&self) -> impl Iterator { self.desc().controllers.iter().map(|(k, v)| (*k, v)) } diff --git a/crates/paralegal-policy/src/diagnostics.rs b/crates/paralegal-policy/src/diagnostics.rs index e49b0ef965..3ddf132953 100644 --- a/crates/paralegal-policy/src/diagnostics.rs +++ b/crates/paralegal-policy/src/diagnostics.rs @@ -95,9 +95,9 @@ use indexmap::IndexMap; use std::rc::Rc; use std::{io::Write, sync::Arc}; -use paralegal_spdg::{GlobalNode, Identifier, Span, SpanCoord, SPDG}; +use paralegal_spdg::{Endpoint, GlobalNode, Identifier, Span, SpanCoord, SPDG}; -use crate::{Context, ControllerId, NodeExt}; +use crate::{Context, NodeExt}; /// Check the condition and emit a [`Diagnostics::error`] if it fails. #[macro_export] @@ -785,7 +785,7 @@ impl PolicyContext { /// diagnostic context management. pub fn named_controller( self: Arc, - id: ControllerId, + id: Endpoint, policy: impl FnOnce(Arc) -> A, ) -> A { policy(Arc::new(ControllerContext { @@ -820,7 +820,7 @@ impl HasDiagnosticsBase for PolicyContext { /// See the [module level documentation][self] for more information on /// diagnostic context management. pub struct ControllerContext { - id: ControllerId, + id: Endpoint, inner: Arc, } @@ -863,7 +863,7 @@ impl ControllerContext { } /// Access the id for the controller of this context - pub fn id(&self) -> ControllerId { + pub fn id(&self) -> Endpoint { self.id } @@ -974,7 +974,7 @@ impl Context { /// diagnostic context management. pub fn named_controller( self: Arc, - id: ControllerId, + id: Endpoint, policy: impl FnOnce(Arc) -> A, ) -> A { policy(Arc::new(ControllerContext { diff --git a/crates/paralegal-policy/src/test_utils.rs b/crates/paralegal-policy/src/test_utils.rs index df07e9d7dd..9f322d6c4a 100644 --- a/crates/paralegal-policy/src/test_utils.rs +++ b/crates/paralegal-policy/src/test_utils.rs @@ -1,6 +1,6 @@ use crate::Context; -use crate::ControllerId; use paralegal_flow::test_utils::PreFrg; +use paralegal_spdg::Endpoint; use paralegal_spdg::IntoIterGlobalNodes; use paralegal_spdg::NodeCluster; use paralegal_spdg::{Identifier, InstructionKind, Node as SPDGNode, SPDG}; @@ -21,7 +21,7 @@ pub fn test_ctx() -> Arc { pub fn get_callsite_or_datasink_node<'a>( ctx: &'a Context, - controller: ControllerId, + controller: Endpoint, name: &'a str, ) -> NodeCluster { get_callsite_node(ctx, controller, name) @@ -29,11 +29,7 @@ pub fn get_callsite_or_datasink_node<'a>( .unwrap() } -pub fn get_callsite_node<'a>( - ctx: &'a Context, - controller: ControllerId, - name: &'a str, -) -> NodeCluster { +pub fn get_callsite_node<'a>(ctx: &'a Context, controller: Endpoint, name: &'a str) -> NodeCluster { let name = Identifier::new_intern(name); let ctrl = &ctx.desc().controllers[&controller]; let inner = ctrl @@ -57,7 +53,7 @@ fn is_at_function_call_with_name( ) } -pub fn get_sink_node<'a>(ctx: &'a Context, controller: ControllerId, name: &'a str) -> NodeCluster { +pub fn get_sink_node<'a>(ctx: &'a Context, controller: Endpoint, name: &'a str) -> NodeCluster { let name = Identifier::new_intern(name); let ctrl = &ctx.desc().controllers[&controller]; let inner = ctrl diff --git a/crates/paralegal-spdg/src/dot.rs b/crates/paralegal-spdg/src/dot.rs index a6d14ca504..7425481d9e 100644 --- a/crates/paralegal-spdg/src/dot.rs +++ b/crates/paralegal-spdg/src/dot.rs @@ -1,6 +1,6 @@ //! Display SPDGs as dot graphs -use crate::{GlobalEdge, InstructionKind, Node, ProgramDescription}; +use crate::{Endpoint, GlobalEdge, InstructionKind, Node, ProgramDescription}; use dot::{CompassPoint, Edges, Id, LabelText, Nodes}; use flowistry_pdg::rustc_portable::LocalDefId; use flowistry_pdg::{CallString, RichLocation}; @@ -11,14 +11,14 @@ use std::collections::HashMap; struct DotPrintableProgramDescription<'d> { spdg: &'d ProgramDescription, - call_sites: HashMap)>, - selected_controllers: Vec, + call_sites: HashMap)>, + selected_controllers: Vec, } impl<'d> DotPrintableProgramDescription<'d> { pub fn new_for_selection( spdg: &'d ProgramDescription, - mut selector: impl FnMut(LocalDefId) -> bool, + mut selector: impl FnMut(Endpoint) -> bool, ) -> Self { let selected_controllers: Vec<_> = spdg .controllers @@ -127,6 +127,7 @@ impl<'a, 'd> dot::Labeller<'a, CallString, GlobalEdge> for DotPrintableProgramDe s.push('*'); } InstructionKind::Return => s.push_str("end"), + InstructionKind::SwitchInt => s.push('C'), }; for &n in nodes { @@ -197,7 +198,7 @@ pub fn dump(spdg: &ProgramDescription, out: W) -> std::io::Re pub fn dump_for_controller( spdg: &ProgramDescription, out: impl std::io::Write, - controller_id: LocalDefId, + controller_id: Endpoint, ) -> std::io::Result<()> { let mut found = false; dump_for_selection(spdg, out, |l| { @@ -218,7 +219,7 @@ pub fn dump_for_controller( pub fn dump_for_selection( spdg: &ProgramDescription, mut out: impl std::io::Write, - selector: impl FnMut(LocalDefId) -> bool, + selector: impl FnMut(Endpoint) -> bool, ) -> std::io::Result<()> { let printable = DotPrintableProgramDescription::new_for_selection(spdg, selector); dot::render(&printable, &mut out) diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index adb7fc9120..759da2143d 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -19,6 +19,11 @@ pub(crate) mod rustc { pub use middle::mir; } +#[cfg(feature = "rustc")] +extern crate rustc_macros; +#[cfg(feature = "rustc")] +extern crate rustc_serialize; + extern crate strum; pub use flowistry_pdg::*; @@ -48,8 +53,11 @@ use petgraph::visit::IntoNodeIdentifiers; pub use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; +#[cfg(feature = "rustc")] +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + /// The types of identifiers that identify an entrypoint -pub type Endpoint = LocalDefId; +pub type Endpoint = DefId; /// Identifiers for types pub type TypeId = DefId; /// Identifiers for functions @@ -281,14 +289,38 @@ pub enum InstructionKind { Statement, /// A function call FunctionCall(FunctionCallInfo), - /// A basic block terminator, usually switchInt + /// Some other terminator Terminator, + /// A switch int terminator + SwitchInt, /// The beginning of a function Start, /// The merged exit points of a function Return, } +#[cfg(feature = "rustc")] +impl Encodable for FunctionCallInfo { + fn encode(&self, s: &mut E) { + self.is_inlined.encode(s); + rustc::middle::ty::tls::with(|tcx| tcx.def_path_hash(self.id).encode(s)) + } +} + +#[cfg(feature = "rustc")] +impl Decodable for FunctionCallInfo { + fn decode(d: &mut D) -> Self { + Self { + is_inlined: Decodable::decode(d), + id: rustc::middle::ty::tls::with(|tcx| { + tcx.def_path_hash_to_def_id(Decodable::decode(d), &mut || { + panic!("Could not translate def path hash") + }) + }), + } + } +} + impl InstructionKind { /// If this identifies a function call, return the information inside. pub fn as_function_call(self) -> Option { @@ -320,7 +352,7 @@ pub type ControllerMap = HashMap; #[derive(Serialize, Deserialize, Debug)] pub struct ProgramDescription { /// Entry points we analyzed and their PDGs - #[cfg_attr(feature = "rustc", serde(with = "ser_localdefid_map"))] + #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] #[cfg_attr(not(feature = "rustc"), serde(with = "serde_map_via_vec"))] pub controllers: ControllerMap, @@ -340,8 +372,8 @@ pub struct ProgramDescription { pub def_info: HashMap, /// How many marker annotations were found pub marker_annotation_count: u32, - /// How long rustc ran before out plugin executed - pub rustc_time: Duration, + // /// How long rustc ran before out plugin executed + //pub rustc_time: Duration, /// The number of functions we produced a PDG for pub dedup_functions: u32, /// The lines of code corresponding to the functions from @@ -499,7 +531,7 @@ pub fn hash_pls(t: T) -> u64 { /// Return type of [`IntoIterGlobalNodes::iter_global_nodes`]. pub struct GlobalNodeIter { - controller_id: LocalDefId, + controller_id: DefId, iter: I::Iter, } @@ -526,7 +558,7 @@ pub trait IntoIterGlobalNodes: Sized + Copy { fn iter_nodes(self) -> Self::Iter; /// The controller id all of these nodes are located in. - fn controller_id(self) -> LocalDefId; + fn controller_id(self) -> DefId; /// Iterate all nodes as globally identified one's. /// @@ -565,13 +597,13 @@ pub type Node = NodeIndex; #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct GlobalNode { node: Node, - controller_id: LocalDefId, + controller_id: DefId, } impl GlobalNode { /// Create a new node with no guarantee that it exists in the SPDG of the /// controller. - pub fn unsafe_new(ctrl_id: LocalDefId, index: usize) -> Self { + pub fn unsafe_new(ctrl_id: DefId, index: usize) -> Self { GlobalNode { controller_id: ctrl_id, node: crate::Node::new(index), @@ -582,7 +614,7 @@ impl GlobalNode { /// particular SPDG with it's controller id. /// /// Meant for internal use only. - pub fn from_local_node(ctrl_id: LocalDefId, node: Node) -> Self { + pub fn from_local_node(ctrl_id: DefId, node: Node) -> Self { GlobalNode { controller_id: ctrl_id, node, @@ -595,7 +627,7 @@ impl GlobalNode { } /// The identifier for the SPDG this node is contained in - pub fn controller_id(self) -> LocalDefId { + pub fn controller_id(self) -> DefId { self.controller_id } } @@ -606,7 +638,7 @@ impl IntoIterGlobalNodes for GlobalNode { std::iter::once(self.local_node()) } - fn controller_id(self) -> LocalDefId { + fn controller_id(self) -> DefId { self.controller_id } } @@ -615,7 +647,7 @@ impl IntoIterGlobalNodes for GlobalNode { pub mod node_cluster { use std::ops::Range; - use flowistry_pdg::rustc_portable::LocalDefId; + use flowistry_pdg::rustc_portable::DefId; use crate::{GlobalNode, IntoIterGlobalNodes, Node}; @@ -626,7 +658,7 @@ pub mod node_cluster { /// individual [`GlobalNode`]s #[derive(Debug, Hash, Clone)] pub struct NodeCluster { - controller_id: LocalDefId, + controller_id: DefId, nodes: Box<[Node]>, } @@ -665,7 +697,7 @@ pub mod node_cluster { self.iter() } - fn controller_id(self) -> LocalDefId { + fn controller_id(self) -> DefId { self.controller_id } } @@ -683,7 +715,7 @@ pub mod node_cluster { impl NodeCluster { /// Create a new cluster. This for internal use. - pub fn new(controller_id: LocalDefId, nodes: impl IntoIterator) -> Self { + pub fn new(controller_id: DefId, nodes: impl IntoIterator) -> Self { Self { controller_id, nodes: nodes.into_iter().collect::>().into(), @@ -698,7 +730,7 @@ pub mod node_cluster { } /// Controller that these nodes belong to - pub fn controller_id(&self) -> LocalDefId { + pub fn controller_id(&self) -> DefId { self.controller_id } @@ -731,12 +763,12 @@ pub use node_cluster::NodeCluster; #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct GlobalEdge { index: EdgeIndex, - controller_id: LocalDefId, + controller_id: Endpoint, } impl GlobalEdge { /// The id of the controller that this edge is located in - pub fn controller_id(self) -> LocalDefId { + pub fn controller_id(self) -> Endpoint { self.controller_id } } @@ -812,8 +844,8 @@ pub struct SPDG { /// The module path to this controller function pub path: Box<[Identifier]>, /// The id - #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::LocalDefId"))] - pub id: LocalDefId, + #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] + pub id: DefId, /// The PDG pub graph: SPDGImpl, /// Nodes to which markers are assigned. diff --git a/crates/paralegal-spdg/src/tiny_bitset.rs b/crates/paralegal-spdg/src/tiny_bitset.rs index 8fd3b2eca2..3b1343e403 100644 --- a/crates/paralegal-spdg/src/tiny_bitset.rs +++ b/crates/paralegal-spdg/src/tiny_bitset.rs @@ -1,10 +1,14 @@ use crate::utils::display_list; use std::fmt::{Display, Formatter}; +#[cfg(feature = "rustc")] +use rustc_macros::{Decodable, Encodable}; + /// A bit-set implemented with a [`u16`], supporting domains up to 16 elements. #[derive( Clone, Eq, PartialEq, PartialOrd, Ord, Hash, Copy, serde::Serialize, serde::Deserialize, )] +#[cfg_attr(feature = "rustc", derive(Encodable, Decodable))] pub struct TinyBitSet(u16); impl Default for TinyBitSet { From d10230b9ba0f511f5399725e89ebf0cfccfd189f Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 17:08:17 -0700 Subject: [PATCH 08/95] DefId is actually encodable --- crates/flowistry_pdg/src/pdg.rs | 24 +----------------------- crates/paralegal-flow/src/ana/mod.rs | 2 +- crates/paralegal-flow/src/ann/db.rs | 8 ++------ crates/paralegal-flow/src/ann/mod.rs | 27 ++------------------------- 4 files changed, 6 insertions(+), 55 deletions(-) diff --git a/crates/flowistry_pdg/src/pdg.rs b/crates/flowistry_pdg/src/pdg.rs index 71ec52cd3b..ddf170c566 100644 --- a/crates/flowistry_pdg/src/pdg.rs +++ b/crates/flowistry_pdg/src/pdg.rs @@ -107,6 +107,7 @@ impl From for RichLocation { /// A [`RichLocation`] within a specific point in a codebase. #[derive(PartialEq, Eq, Hash, Clone, Copy, Debug, Serialize, Deserialize)] +#[cfg_attr(feature = "rustc", derive(Encodable, Decodable))] pub struct GlobalLocation { // TODO Change to `DefId` /// The function containing the location. @@ -117,29 +118,6 @@ pub struct GlobalLocation { pub location: RichLocation, } -#[cfg(feature = "rustc")] -impl Encodable for GlobalLocation { - fn encode(&self, e: &mut E) { - crate::rustc::middle::ty::tls::with(|tcx| { - tcx.def_path_hash(self.function).encode(e); - self.location.encode(e); - }) - } -} - -#[cfg(feature = "rustc")] -impl Decodable for GlobalLocation { - fn decode(d: &mut D) -> Self { - use crate::rustc::span::def_id::DefPathHash; - crate::rustc::middle::ty::tls::with(|tcx| Self { - function: tcx.def_path_hash_to_def_id(DefPathHash::decode(d), &mut || { - panic!("Could map hash to def id") - }), - location: RichLocation::decode(d), - }) - } -} - #[cfg(not(feature = "rustc"))] impl fmt::Display for GlobalLocation { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 9d72821a14..a14bf8e1dc 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -528,7 +528,7 @@ impl<'tcx> SPDGGenerator<'tcx> { k, TypeDescription { rendering, - otypes: otypes.into_iter().map(|ot| ot.def_id).collect(), + otypes: otypes.into(), markers: markers .into_iter() .map(|i| Identifier::new_intern(i.as_str())) diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index e0325884cc..6aa15e9a8d 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -11,7 +11,7 @@ //! All interactions happen through the central database object: [`MarkerCtx`]. use crate::{ - ann::{Annotation, MarkerAnnotation, OType}, + ann::{Annotation, MarkerAnnotation}, args::{Args, MarkerControl}, consts, utils::{ @@ -515,11 +515,7 @@ fn try_parse_annotation( warn!("The `paralegal_flow::label` annotation is deprecated, use `paralegal_flow::marker` instead"); one(Annotation::Marker(ann_match_fn(i)?)) } else if let Some(i) = a.match_get_ref(&consts::OTYPE_MARKER) { - Either::Right( - otype_ann_match(i, tcx)? - .into_iter() - .map(|def_id| Annotation::OType(OType { def_id })), - ) + Either::Right(otype_ann_match(i, tcx)?.into_iter().map(Annotation::OType)) } else if let Some(i) = a.match_get_ref(&consts::EXCEPTION_MARKER) { one(Annotation::Exception(match_exception(i)?)) } else { diff --git a/crates/paralegal-flow/src/ann/mod.rs b/crates/paralegal-flow/src/ann/mod.rs index 64096c7b3d..295822539a 100644 --- a/crates/paralegal-flow/src/ann/mod.rs +++ b/crates/paralegal-flow/src/ann/mod.rs @@ -29,32 +29,9 @@ pub mod parse; )] pub enum Annotation { Marker(MarkerAnnotation), - OType(OType), + OType(#[serde(with = "rustc_proxies::DefId")] TypeId), Exception(ExceptionAnnotation), } -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy, Deserialize, Serialize)] -pub struct OType { - #[serde(with = "rustc_proxies::DefId")] - pub def_id: TypeId, -} - -impl Encodable for OType { - fn encode(&self, s: &mut E) { - rustc_middle::ty::tls::with(|tcx| tcx.def_path_hash(self.def_id)).encode(s) - } -} - -impl Decodable for OType { - fn decode(d: &mut D) -> Self { - Self { - def_id: rustc_middle::ty::tls::with(|tcx| { - tcx.def_path_hash_to_def_id(Decodable::decode(d), &mut || { - panic!("Could not resolve def path") - }) - }), - } - } -} impl Annotation { /// If this is an [`Annotation::Marker`], returns the underlying [`MarkerAnnotation`]. @@ -68,7 +45,7 @@ impl Annotation { /// If this is an [`Annotation::OType`], returns the underlying [`TypeId`]. pub fn as_otype(&self) -> Option { match self { - Annotation::OType(t) => Some(t.def_id), + Annotation::OType(t) => Some(*t), _ => None, } } From dee9ee66187828781acb6a6017406e20d282efd9 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 17:26:13 -0700 Subject: [PATCH 09/95] Decoder --- crates/paralegal-flow/src/ana/encoder.rs | 109 ++++++++++++++++++++++- crates/paralegal-flow/src/ana/mod.rs | 6 +- 2 files changed, 108 insertions(+), 7 deletions(-) diff --git a/crates/paralegal-flow/src/ana/encoder.rs b/crates/paralegal-flow/src/ana/encoder.rs index 29a6cb6cb5..1c029267f6 100644 --- a/crates/paralegal-flow/src/ana/encoder.rs +++ b/crates/paralegal-flow/src/ana/encoder.rs @@ -1,9 +1,13 @@ use std::path::Path; use rustc_hash::FxHashMap; -use rustc_middle::ty::{self, TyCtxt}; -use rustc_serialize::{opaque::FileEncoder, Encoder}; -use rustc_type_ir::TyEncoder; +use rustc_hir::def_id::DefId; +use rustc_middle::ty::{self, Ty, TyCtxt}; +use rustc_serialize::{ + opaque::{FileEncoder, MemDecoder}, + Decodable, Decoder, Encodable, Encoder, +}; +use rustc_type_ir::{TyDecoder, TyEncoder}; macro_rules! encoder_methods { ($($name:ident($ty:ty);)*) => { @@ -14,14 +18,16 @@ macro_rules! encoder_methods { } pub struct ParalegalEncoder<'tcx> { + tcx: TyCtxt<'tcx>, file_encoder: FileEncoder, type_shorthands: FxHashMap, usize>, predicate_shorthands: FxHashMap, usize>, } impl<'tcx> ParalegalEncoder<'tcx> { - pub fn new(path: impl AsRef) -> Self { + pub fn new(path: impl AsRef, tcx: TyCtxt<'tcx>) -> Self { Self { + tcx, file_encoder: FileEncoder::new(path).unwrap(), type_shorthands: Default::default(), predicate_shorthands: Default::default(), @@ -76,3 +82,98 @@ impl<'tcx> TyEncoder for ParalegalEncoder<'tcx> { unimplemented!() } } + +impl<'tcx> Encodable> for DefId { + fn encode(&self, s: &mut ParalegalEncoder<'tcx>) { + s.tcx.def_path_hash(*self).encode(s) + } +} + +pub struct ParalegalDecoder<'tcx, 'a> { + tcx: TyCtxt<'tcx>, + mem_decoder: MemDecoder<'a>, + shorthand_map: FxHashMap>, +} + +impl<'tcx, 'a> TyDecoder for ParalegalDecoder<'tcx, 'a> { + const CLEAR_CROSS_CRATE: bool = true; + + type I = TyCtxt<'tcx>; + + fn interner(&self) -> Self::I { + self.tcx + } + + fn cached_ty_for_shorthand( + &mut self, + shorthand: usize, + or_insert_with: F, + ) -> ::Ty + where + F: FnOnce(&mut Self) -> ::Ty, + { + if let Some(ty) = self.shorthand_map.get(&shorthand) { + return *ty; + } + let ty = or_insert_with(self); + self.shorthand_map.insert(shorthand, ty); + ty + } + + fn decode_alloc_id(&mut self) -> ::AllocId { + unimplemented!() + } + + fn with_position(&mut self, pos: usize, f: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + let new_opaque = MemDecoder::new(self.mem_decoder.data(), pos); + let old_opaque = std::mem::replace(&mut self.mem_decoder, new_opaque); + let r = f(self); + self.mem_decoder = old_opaque; + r + } +} + +macro_rules! decoder_methods { + ($($name:ident($ty:ty);)*) => { + $(fn $name(&mut self) -> $ty { + self.mem_decoder.$name() + })* + } +} + +impl<'tcx, 'a> Decoder for ParalegalDecoder<'tcx, 'a> { + decoder_methods! { + read_usize(usize); + read_u128(u128); + read_u64(u64); + read_u32(u32); + read_u16(u16); + read_u8(u8); + read_isize(isize); + read_i128(i128); + read_i64(i64); + read_i32(i32); + read_i16(i16); + } + fn position(&self) -> usize { + self.mem_decoder.position() + } + fn peek_byte(&self) -> u8 { + self.mem_decoder.peek_byte() + } + fn read_raw_bytes(&mut self, len: usize) -> &[u8] { + self.mem_decoder.read_raw_bytes(len) + } +} + +impl<'tcx, 'a> Decodable> for DefId { + fn decode(d: &mut ParalegalDecoder<'tcx, 'a>) -> Self { + d.tcx + .def_path_hash_to_def_id(Decodable::decode(d), &mut || { + panic!("Could not translate hash") + }) + } +} diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index a14bf8e1dc..49cb432f8a 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -69,7 +69,7 @@ pub fn collect_and_emit_metadata<'tcx>( collector.run(); let pdgs = collector.flowistry_collector.into_metadata(tcx); let meta = Metadata::from_pdgs(tcx, pdgs, &collector.marker_ctx); - meta.write(path); + meta.write(path, tcx); (collector.functions_to_analyze, collector.marker_ctx.into()) } @@ -82,8 +82,8 @@ pub struct Metadata<'tcx> { } impl<'tcx> Metadata<'tcx> { - fn write(&self, path: impl AsRef) { - let mut encoder = ParalegalEncoder::new(path); + fn write(&self, path: impl AsRef, tcx: TyCtxt<'tcx>) { + let mut encoder = ParalegalEncoder::new(path, tcx); self.encode(&mut encoder); encoder.finish() } From aba0f6d3d40b0a0af48fcf112f68b4ef7423c779 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 19:27:04 -0700 Subject: [PATCH 10/95] Wiring for laoder --- .../src/async_support.rs | 6 +- .../src/construct.rs | 73 +++++++++++++--- crates/flowistry_pdg_construction/src/lib.rs | 4 +- crates/flowistry_pdg_construction/src/meta.rs | 4 +- .../flowistry_pdg_construction/tests/pdg.rs | 4 +- crates/paralegal-flow/src/ana/mod.rs | 87 ++++++++++++++----- crates/paralegal-flow/src/ann/db.rs | 40 +++++---- crates/paralegal-flow/src/discover.rs | 23 +++-- crates/paralegal-flow/src/lib.rs | 9 +- 9 files changed, 179 insertions(+), 71 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index d5a2cf4b12..e74f228000 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -185,20 +185,20 @@ pub enum AsyncDeterminationResult { } impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { - pub(crate) fn try_handle_as_async(&self) -> Option>> { + pub(crate) fn try_handle_as_async(&self) -> Option> { let (generator_fn, location, asyncness) = determine_async(self.tcx(), self.def_id, &self.body)?; let g = self.memo.construct_for(generator_fn)?; let mut new_g = push_call_string_root( - g.as_ref(), + g, GlobalLocation { function: self.def_id.to_def_id(), location: flowistry_pdg::RichLocation::Location(location), }, ); new_g.graph.asyncness = asyncness; - Some(Rc::new(new_g)) + Some(new_g) } pub(crate) fn try_poll_call_kind<'b>( diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index a1a77df503..c4e775382d 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -41,22 +41,48 @@ use crate::{ Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall, }; +pub trait PDGLoader<'tcx> { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>>; +} + +pub struct NoLoader; + +impl<'tcx> PDGLoader<'tcx> for NoLoader { + fn load(&self, _: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + None + } +} + +impl<'tcx, T: PDGLoader<'tcx>> PDGLoader<'tcx> for Rc { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + (&**self).load(function) + } +} + +impl<'tcx, T: PDGLoader<'tcx>> PDGLoader<'tcx> for Box { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + (&**self).load(function) + } +} + pub struct MemoPdgConstructor<'tcx> { pub(crate) tcx: TyCtxt<'tcx>, pub(crate) call_change_callback: Option + 'tcx>>, pub(crate) dump_mir: bool, pub(crate) async_info: Rc, pub(crate) pdg_cache: PdgCache<'tcx>, + pub(crate) loader: Box + 'tcx>, } impl<'tcx> MemoPdgConstructor<'tcx> { - pub fn new(tcx: TyCtxt<'tcx>) -> Self { + pub fn new(tcx: TyCtxt<'tcx>, loader: impl PDGLoader<'tcx> + 'tcx) -> Self { Self { tcx, call_change_callback: None, dump_mir: false, async_info: AsyncInfo::make(tcx).expect("Async functions are not defined"), pdg_cache: Default::default(), + loader: Box::new(loader), } } @@ -73,17 +99,35 @@ impl<'tcx> MemoPdgConstructor<'tcx> { self } - pub(crate) fn construct_for( - &self, + pub(crate) fn construct_for<'a>( + &'a self, resolution: FnResolution<'tcx>, - ) -> Option>> { - self.pdg_cache - .get_maybe_recursive(resolution, |_| { + ) -> Option<&'a SubgraphDescriptor<'tcx>> { + let (def_id, generics) = match resolution { + FnResolution::Final(instance) => (instance.def_id(), Some(instance.args)), + FnResolution::Partial(def_id) => (def_id, None), + }; + if let Some(local) = def_id.as_local() { + self.pdg_cache.get_maybe_recursive((local, generics), |_| { let g = GraphConstructor::new(self, resolution).construct_partial(); g.check_invariants(); g }) - .map(Rc::clone) + } else { + self.loader.load(def_id) + } + } + + pub fn is_in_cache(&self, resolution: FnResolution<'tcx>) -> bool { + let (def_id, generics) = match resolution { + FnResolution::Final(instance) => (instance.def_id(), Some(instance.args)), + FnResolution::Partial(def_id) => (def_id, None), + }; + if let Some(local) = def_id.as_local() { + self.pdg_cache.is_in_cache(&(local, generics)) + } else { + self.loader.load(def_id).is_some() + } } pub fn construct_graph(&self, function: LocalDefId) -> Result, ErrorGuaranteed> { @@ -497,7 +541,8 @@ impl<'tcx> PartialGraph<'tcx> { } } -type PdgCache<'tcx> = Rc, Rc>>>; +type PdgCache<'tcx> = + Rc>), SubgraphDescriptor<'tcx>>>; pub struct GraphConstructor<'tcx, 'a> { pub(crate) memo: &'a MemoPdgConstructor<'tcx>, @@ -877,7 +922,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { } fn determine_call_handling<'b>( - &self, + &'b self, location: Location, func: &Operand<'tcx>, args: &'b [Operand<'tcx>], @@ -945,7 +990,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let cache_key = resolved_fn; - let is_cached = self.memo.pdg_cache.is_in_cache(&cache_key); + let is_cached = self.memo.is_in_cache(cache_key); let call_changes = self.call_change_callback().map(|callback| { let info = CallInfo { @@ -1115,7 +1160,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { } } - pub(crate) fn construct_partial(&self) -> Rc> { + pub(crate) fn construct_partial(&self) -> SubgraphDescriptor<'tcx> { if let Some(g) = self.try_handle_as_async() { return g; } @@ -1157,7 +1202,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { } } - Rc::new(SubgraphDescriptor { + SubgraphDescriptor { parentable_dsts: final_state .parentable_dsts(self.def_id, &self.body) .collect(), @@ -1165,7 +1210,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { .parentable_srcs(self.def_id, &self.body) .collect(), graph: final_state, - }) + } } /// Determine the type of call-site. @@ -1349,7 +1394,7 @@ enum CallHandling<'tcx, 'a> { ApproxAsyncFn, Ready { calling_convention: CallingConvention<'tcx, 'a>, - descriptor: Rc>, + descriptor: &'a SubgraphDescriptor<'tcx>, generic_args: GenericArgsRef<'tcx>, }, ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index abe02df286..ed82073022 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -24,7 +24,7 @@ pub use crate::construct::MemoPdgConstructor; pub use callback::{ CallChangeCallback, CallChangeCallbackFn, CallChanges, CallInfo, InlineMissReason, SkipCall, }; -pub use construct::SubgraphDescriptor; +pub use construct::{NoLoader, PDGLoader, SubgraphDescriptor}; use rustc_middle::ty::TyCtxt; pub use utils::{is_non_default_trait_method, try_resolve_function}; @@ -38,6 +38,6 @@ mod utils; /// Computes a global program dependence graph (PDG) starting from the root function specified by `def_id`. pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: FnResolution<'tcx>) -> DepGraph<'tcx> { - let constructor = MemoPdgConstructor::new(tcx); + let constructor = MemoPdgConstructor::new(tcx, NoLoader); constructor.construct_for(params).unwrap().to_petgraph() } diff --git a/crates/flowistry_pdg_construction/src/meta.rs b/crates/flowistry_pdg_construction/src/meta.rs index 5ca8256523..151dc29d51 100644 --- a/crates/flowistry_pdg_construction/src/meta.rs +++ b/crates/flowistry_pdg_construction/src/meta.rs @@ -20,6 +20,7 @@ use rustc_utils::{cache::Cache, mir::borrowck_facts}; use crate::{ construct::SubgraphDescriptor, Asyncness, CallChangeCallback, DepGraph, MemoPdgConstructor, + PDGLoader, }; pub struct MetadataCollector { @@ -34,8 +35,9 @@ impl MetadataCollector { pub fn into_metadata<'tcx>( self, tcx: TyCtxt<'tcx>, + loader: impl PDGLoader<'tcx> + 'tcx, ) -> FxHashMap> { - let constructor = MemoPdgConstructor::new(tcx); + let constructor = MemoPdgConstructor::new(tcx, loader); self.targets .into_iter() .map(|t| { diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 61eea77d14..36309021fc 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -9,7 +9,7 @@ use std::collections::HashSet; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepGraph}, - CallChangeCallbackFn, CallChanges, MemoPdgConstructor, SkipCall, + CallChangeCallbackFn, CallChanges, MemoPdgConstructor, NoLoader, SkipCall, }; use itertools::Itertools; use rustc_hir::def_id::LocalDefId; @@ -38,7 +38,7 @@ fn pdg( let _ = env_logger::try_init(); rustc_utils::test_utils::compile(input, move |tcx| { let def_id = get_main(tcx); - let mut memo = MemoPdgConstructor::new(tcx); + let mut memo = MemoPdgConstructor::new(tcx, NoLoader); configure(tcx, &mut memo); let pdg = memo.construct_graph(def_id).unwrap(); tests(tcx, pdg) diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 49cb432f8a..0d2cb32337 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -21,7 +21,7 @@ use anyhow::Result; use either::Either; use flowistry_pdg_construction::{ graph::InternedString, meta::MetadataCollector, Asyncness, DepGraph, MemoPdgConstructor, - SubgraphDescriptor, + PDGLoader, SubgraphDescriptor, }; use itertools::Itertools; use petgraph::visit::GraphBase; @@ -40,6 +40,7 @@ use rustc_middle::{ BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, TerminatorKind, }, + query::AsLocalKey, ty::{self, GenericArgsRef, TyCtxt}, }; use rustc_serialize::{opaque::FileEncoder, Decodable, Encodable}; @@ -60,25 +61,48 @@ pub struct MetadataLoader<'tcx> { cache: Cache>>, } -pub fn collect_and_emit_metadata<'tcx>( - tcx: TyCtxt<'tcx>, - args: &'static Args, - path: impl AsRef, -) -> (Vec, MarkerCtx<'tcx>) { - let mut collector = CollectingVisitor::new(tcx, args); - collector.run(); - let pdgs = collector.flowistry_collector.into_metadata(tcx); - let meta = Metadata::from_pdgs(tcx, pdgs, &collector.marker_ctx); - meta.write(path, tcx); - (collector.functions_to_analyze, collector.marker_ctx.into()) +impl<'tcx> PDGLoader<'tcx> for MetadataLoader<'tcx> { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + self.get_metadata(function.krate)?.pdgs.get(&function.index) + } +} + +impl<'tcx> MetadataLoader<'tcx> { + pub fn collect_and_emit_metadata( + self: Rc, + args: &'static Args, + path: impl AsRef, + ) -> (Vec, MarkerCtx<'tcx>) { + let tcx = self.tcx; + let mut collector = CollectingVisitor::new(tcx, args, self.clone()); + collector.run(); + let pdgs = collector + .flowistry_collector + .into_metadata(tcx, self.clone()); + let meta = Metadata::from_pdgs(tcx, pdgs, &collector.marker_ctx); + meta.write(path, tcx); + (collector.functions_to_analyze, collector.marker_ctx.into()) + } + + pub fn get_annotations(&self, key: DefId) -> &[Annotation] { + (|| { + Some( + self.get_metadata(key.krate)? + .local_annotations + .get(&key.index)? + .as_slice(), + ) + })() + .unwrap_or(&[]) + } } #[derive(Clone, Debug, TyEncodable, TyDecodable)] pub struct Metadata<'tcx> { pub pdgs: FxHashMap>, pub bodies: FxHashMap>, - pub local_annotations: HashMap>, - pub reachable_markers: HashMap, Box<[InternedString]>>, + pub local_annotations: HashMap>, + pub reachable_markers: HashMap<(DefIndex, Option>), Box<[InternedString]>>, } impl<'tcx> Metadata<'tcx> { @@ -148,21 +172,34 @@ impl<'tcx> Metadata<'tcx> { Self { pdgs, bodies, - local_annotations: markers.local_annotations.clone(), + local_annotations: markers + .local_annotations + .iter() + .map(|(k, v)| (k.local_def_index, v.clone())) + .collect(), reachable_markers: (&*cache_borrow) .iter() - .filter_map(|(k, v)| Some((*k, (**(v.as_ref()?)).clone()))) + .filter_map(|(k, v)| { + let (id, args) = match k { + FnResolution::Partial(d) => (*d, None), + FnResolution::Final(inst) => (inst.def_id(), Some(inst.args)), + }; + Some(( + (id.as_local()?.local_def_index, args), + (**(v.as_ref()?)).clone(), + )) + }) .collect(), } } } impl<'tcx> MetadataLoader<'tcx> { - pub fn new(tcx: TyCtxt<'tcx>) -> Self { - Self { + pub fn new(tcx: TyCtxt<'tcx>) -> Rc { + Rc::new(Self { tcx, cache: Default::default(), - } + }) } pub fn get_metadata(&self, key: CrateNum) -> Option<&Metadata<'tcx>> { @@ -273,17 +310,21 @@ pub struct SPDGGenerator<'tcx> { pub opts: &'static crate::Args, pub tcx: TyCtxt<'tcx>, marker_ctx: MarkerCtx<'tcx>, - flowistry_loader: MetadataLoader<'tcx>, + flowistry_loader: Rc>, } impl<'tcx> SPDGGenerator<'tcx> { - pub fn new(marker_ctx: MarkerCtx<'tcx>, opts: &'static crate::Args, tcx: TyCtxt<'tcx>) -> Self { - let mut flowistry_loader = MetadataLoader::new(tcx); + pub fn new( + marker_ctx: MarkerCtx<'tcx>, + opts: &'static crate::Args, + tcx: TyCtxt<'tcx>, + loader: Rc>, + ) -> Self { Self { marker_ctx, opts, tcx, - flowistry_loader, + flowistry_loader: loader, } } diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 6aa15e9a8d..0a6984bc27 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -11,6 +11,7 @@ //! All interactions happen through the central database object: [`MarkerCtx`]. use crate::{ + ana::MetadataLoader, ann::{Annotation, MarkerAnnotation}, args::{Args, MarkerControl}, consts, @@ -21,7 +22,7 @@ use crate::{ DefId, Either, HashMap, HashSet, LocalDefId, TyCtxt, }; use flowistry_pdg_construction::{determine_async, graph::InternedString}; -use rustc_ast::Attribute; +use rustc_ast::{AnonConst, Attribute}; use rustc_hir::def::DefKind; use rustc_middle::{mir, ty}; use rustc_utils::cache::Cache; @@ -64,18 +65,22 @@ impl<'tcx> MarkerCtx<'tcx> { /// are present an empty slice is returned. /// /// Query is cached. - pub fn local_annotations(&self, def_id: LocalDefId) -> &[Annotation] { - self.db() - .local_annotations - .get(&self.defid_rewrite(def_id.to_def_id()).expect_local()) - .map_or(&[], |o| o.as_slice()) + fn attribute_annotations(&self, key: DefId) -> &[Annotation] { + if let Some(local) = key.as_local() { + self.db() + .local_annotations + .get(&self.defid_rewrite(key).expect_local()) + .map_or(&[], Vec::as_slice) + } else { + self.0.loader.get_annotations(key) + } } /// Retrieves any external markers on this item. If there are not such /// markers an empty slice is returned. /// /// THe external marker database is populated at construction. - pub fn external_markers(&self, did: D) -> &[MarkerAnnotation] { + fn external_markers(&self, did: D) -> &[MarkerAnnotation] { self.db() .external_annotations .get(&self.defid_rewrite(did.into_def_id(self.tcx()))) @@ -86,11 +91,9 @@ impl<'tcx> MarkerCtx<'tcx> { /// /// Queries are cached/precomputed so calling this repeatedly is cheap. pub fn combined_markers(&self, def_id: DefId) -> impl Iterator { - def_id - .as_local() - .map(|ldid| self.local_annotations(ldid)) + self.attribute_annotations(def_id) .into_iter() - .flat_map(|anns| anns.iter().flat_map(Annotation::as_marker)) + .filter_map(Annotation::as_marker) .chain(self.external_markers(def_id).iter()) } @@ -111,13 +114,13 @@ impl<'tcx> MarkerCtx<'tcx> { } /// Are there any external markers on this item? - pub fn is_externally_marked(&self, did: D) -> bool { + fn is_externally_marked(&self, did: D) -> bool { !self.external_markers(did).is_empty() } /// Are there any local markers on this item? - pub fn is_locally_marked(&self, def_id: LocalDefId) -> bool { - self.local_annotations(def_id) + fn is_attribute_marked(&self, def_id: DefId) -> bool { + self.attribute_annotations(def_id) .iter() .any(Annotation::is_marker) } @@ -127,8 +130,9 @@ impl<'tcx> MarkerCtx<'tcx> { /// This is in contrast to [`Self::marker_is_reachable`] which also reports /// if markers are reachable from the body of this function (if it is one). pub fn is_marked(&self, did: D) -> bool { - matches!(did.into_def_id(self.tcx()).as_local(), Some(ldid) if self.is_locally_marked(ldid)) - || self.is_externally_marked(did) + let did = did.into_def_id(self.tcx()); + + self.is_attribute_marked(did) || self.is_externally_marked(did) } /// Return a complete set of local annotations that were discovered. @@ -464,11 +468,12 @@ pub struct MarkerDatabase<'tcx> { /// Configuration options config: &'static MarkerControl, type_markers: Cache, Box>, + loader: Rc>, } impl<'tcx> MarkerDatabase<'tcx> { /// Construct a new database, loading external markers. - pub fn init(tcx: TyCtxt<'tcx>, args: &'static Args) -> Self { + pub fn init(tcx: TyCtxt<'tcx>, args: &'static Args, loader: Rc>) -> Self { Self { tcx, local_annotations: HashMap::default(), @@ -476,6 +481,7 @@ impl<'tcx> MarkerDatabase<'tcx> { reachable_markers: Default::default(), config: args.marker_control(), type_markers: Default::default(), + loader, } } diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 4c52d7329f..e16ff9da9f 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -3,7 +3,16 @@ //! //! Essentially this discovers all local `paralegal_flow::*` annotations. -use crate::{ana::SPDGGenerator, ann::db::MarkerDatabase, consts, desc::*, stats::Stats, utils::*}; +use std::rc::Rc; + +use crate::{ + ana::{MetadataLoader, SPDGGenerator}, + ann::db::MarkerDatabase, + consts, + desc::*, + stats::Stats, + utils::*, +}; use flowistry_pdg_construction::meta::MetadataCollector; use rustc_hir::{ @@ -55,7 +64,11 @@ impl FnToAnalyze { } impl<'tcx> CollectingVisitor<'tcx> { - pub(crate) fn new(tcx: TyCtxt<'tcx>, opts: &'static crate::Args) -> Self { + pub(crate) fn new( + tcx: TyCtxt<'tcx>, + opts: &'static crate::Args, + loader: Rc>, + ) -> Self { let functions_to_analyze = opts .anactrl() .selected_targets() @@ -76,15 +89,15 @@ impl<'tcx> CollectingVisitor<'tcx> { tcx, opts, functions_to_analyze, - marker_ctx: MarkerDatabase::init(tcx, opts), + marker_ctx: MarkerDatabase::init(tcx, opts, loader), flowistry_collector: MetadataCollector::new(), } } /// After running the discovery with `visit_all_item_likes_in_crate`, create /// the read-only [`SPDGGenerator`] upon which the analysis will run. - fn into_generator(self) -> SPDGGenerator<'tcx> { - SPDGGenerator::new(self.marker_ctx.into(), self.opts, self.tcx) + fn into_generator(self, loader: Rc>) -> SPDGGenerator<'tcx> { + SPDGGenerator::new(self.marker_ctx.into(), self.opts, self.tcx, loader) } /// Driver function. Performs the data collection via visit, then calls diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index f888b6c737..0881cd9d05 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -83,7 +83,7 @@ pub mod test_utils; pub use paralegal_spdg as desc; -use crate::ana::{collect_and_emit_metadata, SPDGGenerator}; +use crate::ana::{MetadataLoader, SPDGGenerator}; pub use crate::ann::db::MarkerCtx; pub use args::{AnalysisCtrl, Args, BuildConfig, DepConfig, DumpArgs, ModelCtrl}; @@ -150,8 +150,9 @@ impl rustc_driver::Callbacks for Callbacks { .enter(|tcx| { tcx.sess.abort_if_errors(); - let (analysis_targets, mctx) = collect_and_emit_metadata( - tcx, + let loader = MetadataLoader::new(tcx); + + let (analysis_targets, mctx) = loader.clone().collect_and_emit_metadata( self.opts, compiler .build_output_filenames(tcx.sess, &[]) @@ -159,7 +160,7 @@ impl rustc_driver::Callbacks for Callbacks { ); tcx.sess.abort_if_errors(); - let mut gen = SPDGGenerator::new(mctx, self.opts, tcx); + let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, loader.clone()); let desc = gen.analyze(analysis_targets)?; From e69046cc803f0dd12de7ba6a25553d3859e37fc6 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 20:23:43 -0700 Subject: [PATCH 11/95] Reuse memo cosntructor --- .../src/construct.rs | 2 +- .../paralegal-flow/src/ana/graph_converter.rs | 13 +++---- crates/paralegal-flow/src/ana/mod.rs | 34 ++++++++++++++----- crates/paralegal-flow/src/discover.rs | 14 +++----- crates/paralegal-flow/src/lib.rs | 16 +++++---- 5 files changed, 45 insertions(+), 34 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index c4e775382d..3c18e342c9 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -99,7 +99,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { self } - pub(crate) fn construct_for<'a>( + pub fn construct_for<'a>( &'a self, resolution: FnResolution<'tcx>, ) -> Option<&'a SubgraphDescriptor<'tcx>> { diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 9f8871d078..d5f8cf627b 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -98,7 +98,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { /// Is the top-level function (entrypoint) an `async fn` fn entrypoint_is_async(&self) -> bool { self.generator - .flowistry_loader + .metadata_loader .get_asyncness(self.local_def_id) .is_async() } @@ -141,7 +141,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let body = self .generator - .flowistry_loader + .metadata_loader .get_body_info(leaf_loc.function) .unwrap(); @@ -214,10 +214,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let tcx = self.tcx(); let body = self .generator - .flowistry_loader + .metadata_loader .get_body_info(at.leaf().function) .unwrap(); - let generics = self.generator.flowistry_loader.get_mono(at); + let generics = self.generator.metadata_loader.get_mono(at); // So actually we're going to check the base place only, because // Flowistry sometimes tracks subplaces instead but we want the marker @@ -315,10 +315,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { generator: &SPDGGenerator<'tcx>, def_id: DefId, ) -> Result> { - let tcx = generator.tcx; - let opts = generator.opts; - - let Some(pdg) = generator.flowistry_loader.get_pdg(def_id) else { + let Some(pdg) = generator.metadata_loader.get_pdg(def_id) else { bail!("Failed to construct the graph"); }; diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 0d2cb32337..faa4faaed7 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -72,16 +72,31 @@ impl<'tcx> MetadataLoader<'tcx> { self: Rc, args: &'static Args, path: impl AsRef, - ) -> (Vec, MarkerCtx<'tcx>) { + ) -> (Vec, MarkerCtx<'tcx>, MemoPdgConstructor<'tcx>) { let tcx = self.tcx; let mut collector = CollectingVisitor::new(tcx, args, self.clone()); collector.run(); - let pdgs = collector - .flowistry_collector - .into_metadata(tcx, self.clone()); + let emit_targets = collector.emit_target_collector; + let constructor = MemoPdgConstructor::new(tcx, self.clone()); + let pdgs = emit_targets + .into_iter() + .map(|t| { + ( + t.local_def_index, + (*constructor + .construct_for(FnResolution::Partial(t.to_def_id())) + .unwrap()) + .clone(), + ) + }) + .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, &collector.marker_ctx); meta.write(path, tcx); - (collector.functions_to_analyze, collector.marker_ctx.into()) + ( + collector.functions_to_analyze, + collector.marker_ctx.into(), + constructor, + ) } pub fn get_annotations(&self, key: DefId) -> &[Annotation] { @@ -310,7 +325,8 @@ pub struct SPDGGenerator<'tcx> { pub opts: &'static crate::Args, pub tcx: TyCtxt<'tcx>, marker_ctx: MarkerCtx<'tcx>, - flowistry_loader: Rc>, + flowistry_loader: MemoPdgConstructor<'tcx>, + metadata_loader: Rc>, } impl<'tcx> SPDGGenerator<'tcx> { @@ -318,13 +334,15 @@ impl<'tcx> SPDGGenerator<'tcx> { marker_ctx: MarkerCtx<'tcx>, opts: &'static crate::Args, tcx: TyCtxt<'tcx>, - loader: Rc>, + loader: MemoPdgConstructor<'tcx>, + metadata_loader: Rc>, ) -> Self { Self { marker_ctx, opts, tcx, flowistry_loader: loader, + metadata_loader, } } @@ -497,7 +515,7 @@ impl<'tcx> SPDGGenerator<'tcx> { .into_iter() .map(|n| { let body = self - .flowistry_loader + .metadata_loader .get_body_info(n.at.leaf().function) .unwrap(); let (kind, description, span) = match n.at.leaf().location { diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index e16ff9da9f..d3c19ce45d 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -20,7 +20,7 @@ use rustc_hir::{ intravisit::{self, FnKind}, BodyId, }; -use rustc_middle::{hir::nested_filter::OnlyBodies, ty::TyCtxt}; +use rustc_middle::{hir::nested_filter::OnlyBodies, mir::Local, ty::TyCtxt}; use rustc_span::{symbol::Ident, Span, Symbol}; use anyhow::Result; @@ -46,7 +46,7 @@ pub struct CollectingVisitor<'tcx> { pub marker_ctx: MarkerDatabase<'tcx>, - pub flowistry_collector: MetadataCollector, + pub emit_target_collector: Vec, } /// A function we will be targeting to analyze with @@ -90,16 +90,10 @@ impl<'tcx> CollectingVisitor<'tcx> { opts, functions_to_analyze, marker_ctx: MarkerDatabase::init(tcx, opts, loader), - flowistry_collector: MetadataCollector::new(), + emit_target_collector: vec![], } } - /// After running the discovery with `visit_all_item_likes_in_crate`, create - /// the read-only [`SPDGGenerator`] upon which the analysis will run. - fn into_generator(self, loader: Rc>) -> SPDGGenerator<'tcx> { - SPDGGenerator::new(self.marker_ctx.into(), self.opts, self.tcx, loader) - } - /// Driver function. Performs the data collection via visit, then calls /// [`Self::analyze`] to construct the Forge friendly description of all /// endpoints. @@ -151,7 +145,7 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { }); } if self.tcx.generics_of(id).count() == 0 { - self.flowistry_collector.add_target(id) + self.emit_target_collector.push(id) } } _ => (), diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 0881cd9d05..50d1554100 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -152,15 +152,17 @@ impl rustc_driver::Callbacks for Callbacks { let loader = MetadataLoader::new(tcx); - let (analysis_targets, mctx) = loader.clone().collect_and_emit_metadata( - self.opts, - compiler - .build_output_filenames(tcx.sess, &[]) - .with_extension(".para"), - ); + let (analysis_targets, mctx, pdg_constructor) = + loader.clone().collect_and_emit_metadata( + self.opts, + compiler + .build_output_filenames(tcx.sess, &[]) + .with_extension(".para"), + ); tcx.sess.abort_if_errors(); - let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, loader.clone()); + let mut gen = + SPDGGenerator::new(mctx, self.opts, tcx, pdg_constructor, loader.clone()); let desc = gen.analyze(analysis_targets)?; From c540163d5afd022463c6fa25b647454641178265 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 20:41:11 -0700 Subject: [PATCH 12/95] Pass the judge to PDG construction --- .../paralegal-flow/src/ana/graph_converter.rs | 9 ----- crates/paralegal-flow/src/ana/mod.rs | 27 ++++++------- crates/paralegal-flow/src/ann/db.rs | 2 +- crates/paralegal-flow/src/lib.rs | 39 ++++++++++--------- crates/paralegal-spdg/src/lib.rs | 28 ++----------- 5 files changed, 37 insertions(+), 68 deletions(-) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index d5f8cf627b..46f3c10b5e 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -324,7 +324,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { /// Consume the generator and compile the [`SPDG`]. pub fn make_spdg(mut self) -> SPDG { - let start = Instant::now(); self.make_spdg_impl(); let arguments = self.determine_arguments(); let return_ = self.determine_return(); @@ -499,7 +498,6 @@ fn assert_edge_location_invariant<'tcx>( pub(super) struct MyCallback<'tcx> { pub(super) judge: InlineJudge<'tcx>, - pub(super) stat_wrap: StatStracker, pub(super) tcx: TyCtxt<'tcx>, } @@ -520,13 +518,6 @@ impl<'tcx> CallChangeCallback<'tcx> for MyCallback<'tcx> { if skip { changes = changes.with_skip(Skip); - } else { - record_inlining( - &self.stat_wrap, - self.tcx, - info.callee.def_id().expect_local(), - info.is_cached, - ) } changes } diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index faa4faaed7..5f1ac45b8d 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -54,7 +54,7 @@ use graph_converter::GraphConverter; use rustc_type_ir::TyEncoder; use rustc_utils::{cache::Cache, mir::borrowck_facts}; -use self::{encoder::ParalegalEncoder, inline_judge::InlineJudge}; +use self::{encoder::ParalegalEncoder, graph_converter::MyCallback, inline_judge::InlineJudge}; pub struct MetadataLoader<'tcx> { tcx: TyCtxt<'tcx>, @@ -77,7 +77,12 @@ impl<'tcx> MetadataLoader<'tcx> { let mut collector = CollectingVisitor::new(tcx, args, self.clone()); collector.run(); let emit_targets = collector.emit_target_collector; - let constructor = MemoPdgConstructor::new(tcx, self.clone()); + let marker_ctx: MarkerCtx = collector.marker_ctx.into(); + let mut constructor = MemoPdgConstructor::new(tcx, self.clone()); + constructor.with_call_change_callback(MyCallback { + tcx, + judge: InlineJudge::new(marker_ctx.clone(), tcx, args.anactrl()), + }); let pdgs = emit_targets .into_iter() .map(|t| { @@ -90,13 +95,9 @@ impl<'tcx> MetadataLoader<'tcx> { ) }) .collect::>(); - let meta = Metadata::from_pdgs(tcx, pdgs, &collector.marker_ctx); + let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); meta.write(path, tcx); - ( - collector.functions_to_analyze, - collector.marker_ctx.into(), - constructor, - ) + (collector.functions_to_analyze, marker_ctx, constructor) } pub fn get_annotations(&self, key: DefId) -> &[Annotation] { @@ -166,10 +167,7 @@ impl<'tcx> Metadata<'tcx> { }, |t| RustcInstructionInfo { kind: if let Ok((id, ..)) = t.as_fn_and_args(tcx) { - RustcInstructionKind::FunctionCall(FunctionCallInfo { - id, - is_inlined: unimplemented!(), - }) + RustcInstructionKind::FunctionCall(FunctionCallInfo { id }) } else if matches!(t.kind, TerminatorKind::SwitchInt { .. }) { RustcInstructionKind::SwitchInt } else { @@ -531,10 +529,7 @@ impl<'tcx> SPDGGenerator<'tcx> { match instruction.kind { RustcInstructionKind::SwitchInt => InstructionKind::SwitchInt, RustcInstructionKind::FunctionCall(c) => { - InstructionKind::FunctionCall(FunctionCallInfo { - is_inlined: c.is_inlined, - id: c.id, - }) + InstructionKind::FunctionCall(c) } RustcInstructionKind::Statement => InstructionKind::Statement, RustcInstructionKind::Terminator => InstructionKind::Terminator, diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 0a6984bc27..6d4c0eeb5f 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -57,7 +57,7 @@ impl<'tcx> MarkerCtx<'tcx> { } #[inline] - fn db(&self) -> &MarkerDatabase<'tcx> { + pub fn db(&self) -> &MarkerDatabase<'tcx> { &self.0 } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 50d1554100..b6ba70a6e6 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -164,25 +164,28 @@ impl rustc_driver::Callbacks for Callbacks { let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, pdg_constructor, loader.clone()); - let desc = gen.analyze(analysis_targets)?; - - if self.opts.dbg().dump_spdg() { - let out = std::fs::File::create("call-only-flow.gv").unwrap(); - paralegal_spdg::dot::dump(&desc, out).unwrap(); - } - - let ser = Instant::now(); - desc.canonical_write(self.opts.result_path()).unwrap(); - self.stats - .record_timed(TimedStat::Serialization, ser.elapsed()); - - println!("Analysis finished with timing: {}", self.stats); - - anyhow::Ok(if self.opts.abort_after_analysis() { - rustc_driver::Compilation::Stop + if !analysis_targets.is_empty() { + let desc = gen.analyze(analysis_targets)?; + + if self.opts.dbg().dump_spdg() { + let out = std::fs::File::create("call-only-flow.gv").unwrap(); + paralegal_spdg::dot::dump(&desc, out).unwrap(); + } + + let ser = Instant::now(); + desc.canonical_write(self.opts.result_path()).unwrap(); + self.stats + .record_timed(TimedStat::Serialization, ser.elapsed()); + + println!("Analysis finished with timing: {}", self.stats); + anyhow::Ok(if self.opts.abort_after_analysis() { + rustc_driver::Compilation::Stop + } else { + rustc_driver::Compilation::Continue + }) } else { - rustc_driver::Compilation::Continue - }) + Ok(rustc_driver::Compilation::Continue) + } }) .unwrap() } diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 759da2143d..6c75e2bd79 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -36,6 +36,7 @@ pub mod utils; use internment::Intern; use itertools::Itertools; +use rustc_macros::{Decodable, Encodable}; use rustc_portable::DefId; use serde::{Deserialize, Serialize}; use std::time::Duration; @@ -272,9 +273,10 @@ impl Span { /// Metadata on a function call. #[derive(Debug, Clone, Copy, Serialize, Deserialize, Eq, Ord, PartialOrd, PartialEq)] +#[cfg_attr(feature = "rustc", derive(Encodable, Decodable))] pub struct FunctionCallInfo { - /// Has this call been inlined - pub is_inlined: bool, + // /// Has this call been inlined + // pub is_inlined: bool, /// What is the ID of the item that was called here. #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] pub id: DefId, @@ -299,28 +301,6 @@ pub enum InstructionKind { Return, } -#[cfg(feature = "rustc")] -impl Encodable for FunctionCallInfo { - fn encode(&self, s: &mut E) { - self.is_inlined.encode(s); - rustc::middle::ty::tls::with(|tcx| tcx.def_path_hash(self.id).encode(s)) - } -} - -#[cfg(feature = "rustc")] -impl Decodable for FunctionCallInfo { - fn decode(d: &mut D) -> Self { - Self { - is_inlined: Decodable::decode(d), - id: rustc::middle::ty::tls::with(|tcx| { - tcx.def_path_hash_to_def_id(Decodable::decode(d), &mut || { - panic!("Could not translate def path hash") - }) - }), - } - } -} - impl InstructionKind { /// If this identifies a function call, return the information inside. pub fn as_function_call(self) -> Option { From 431fd2d2dc9635eb1af7a095bafb01989a8bffa5 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 21:41:49 -0700 Subject: [PATCH 13/95] Loading --- crates/paralegal-flow/src/ana/encoder.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/paralegal-flow/src/ana/encoder.rs b/crates/paralegal-flow/src/ana/encoder.rs index 1c029267f6..4f39e09795 100644 --- a/crates/paralegal-flow/src/ana/encoder.rs +++ b/crates/paralegal-flow/src/ana/encoder.rs @@ -78,7 +78,7 @@ impl<'tcx> TyEncoder for ParalegalEncoder<'tcx> { &mut self.predicate_shorthands } - fn encode_alloc_id(&mut self, alloc_id: &::AllocId) { + fn encode_alloc_id(&mut self, _alloc_id: &::AllocId) { unimplemented!() } } @@ -95,6 +95,16 @@ pub struct ParalegalDecoder<'tcx, 'a> { shorthand_map: FxHashMap>, } +impl<'tcx, 'a> ParalegalDecoder<'tcx, 'a> { + pub fn new(tcx: TyCtxt<'tcx>, buf: &'a [u8]) -> Self { + Self { + tcx, + mem_decoder: MemDecoder::new(buf, 0), + shorthand_map: Default::default(), + } + } +} + impl<'tcx, 'a> TyDecoder for ParalegalDecoder<'tcx, 'a> { const CLEAR_CROSS_CRATE: bool = true; From 4c249a2ee3c2a79dce899153649631f0c6c96b7b Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 21:42:06 -0700 Subject: [PATCH 14/95] Unused imports --- .../src/construct.rs | 11 ++--- .../flowistry_pdg_construction/src/graph.rs | 14 ++---- crates/flowistry_pdg_construction/src/meta.rs | 25 ++-------- .../flowistry_pdg_construction/src/utils.rs | 7 ++- .../flowistry_pdg_construction/tests/pdg.rs | 2 +- .../paralegal-flow/src/ana/graph_converter.rs | 13 +++-- crates/paralegal-flow/src/ana/mod.rs | 48 ++++++++++++------- crates/paralegal-flow/src/ann/db.rs | 4 +- crates/paralegal-flow/src/ann/mod.rs | 3 +- crates/paralegal-flow/src/ann/parse.rs | 2 - crates/paralegal-flow/src/discover.rs | 14 +----- crates/paralegal-flow/src/stats.rs | 1 + crates/paralegal-flow/src/test_utils.rs | 1 - crates/paralegal-flow/src/utils/mod.rs | 1 - crates/paralegal-spdg/src/dot.rs | 1 - crates/paralegal-spdg/src/lib.rs | 3 -- 16 files changed, 59 insertions(+), 91 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 3c18e342c9..aa203eee5b 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -10,14 +10,13 @@ use petgraph::graph::DiGraph; use rustc_abi::VariantIdx; use rustc_borrowck::consumers::{places_conflict, BodyWithBorrowckFacts, PlaceConflictBias}; use rustc_hash::{FxHashMap, FxHashSet}; -use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId}; +use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; use rustc_macros::{TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ - visit::Visitor, AggregateKind, BasicBlock, Body, HasLocalDecls, Local, LocalDecl, - LocalDecls, LocalKind, Location, Operand, Place, PlaceElem, Rvalue, Statement, Terminator, - TerminatorEdges, TerminatorKind, RETURN_PLACE, + visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, + Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, }, ty::{GenericArg, GenericArgsRef, List, TyCtxt, TyKind}, }; @@ -130,12 +129,12 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } - pub fn construct_graph(&self, function: LocalDefId) -> Result, ErrorGuaranteed> { + pub fn construct_graph(&self, function: DefId) -> Result, ErrorGuaranteed> { let args = manufacture_substs_for(self.tcx, function)?; let g = self .construct_for(try_resolve_function( self.tcx, - function.to_def_id(), + function, self.tcx.param_env_reveal_all_normalized(function), args, )) diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 1d3b307dfc..92e982b54e 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -9,22 +9,14 @@ use std::{ use flowistry_pdg::CallString; use internment::Intern; use petgraph::{dot, graph::DiGraph}; -use rustc_abi::VariantIdx; use rustc_hash::{FxHashMap, FxHashSet}; -use rustc_hir::def_id::DefIndex; -use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ - mir::{ - BasicBlock, Body, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, Place, - }, - ty::{GenericArgs, GenericArgsRef, Ty, TyCtxt}, + mir::{Body, Place}, + ty::{GenericArgsRef, TyCtxt}, }; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; -use rustc_span::{ - def_id::{DefId, DefPathHash}, - Span, -}; +use rustc_span::Span; use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; diff --git a/crates/flowistry_pdg_construction/src/meta.rs b/crates/flowistry_pdg_construction/src/meta.rs index 151dc29d51..b7a8806367 100644 --- a/crates/flowistry_pdg_construction/src/meta.rs +++ b/crates/flowistry_pdg_construction/src/meta.rs @@ -1,27 +1,8 @@ -use flowistry_pdg::{CallString, RichLocation}; use rustc_hash::FxHashMap; -use rustc_hir::{ - def_id::{CrateNum, DefId, DefIndex, LocalDefId}, - intravisit::{self, FnKind}, - BodyId, -}; -use rustc_index::IndexVec; -use rustc_macros::{TyDecodable, TyEncodable}; -use rustc_middle::{ - hir::nested_filter::OnlyBodies, - mir::{ - BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, - TerminatorKind, - }, - ty::{GenericArgsRef, TyCtxt}, -}; -use rustc_span::Span; -use rustc_utils::{cache::Cache, mir::borrowck_facts}; +use rustc_hir::def_id::{DefIndex, LocalDefId}; +use rustc_middle::ty::TyCtxt; -use crate::{ - construct::SubgraphDescriptor, Asyncness, CallChangeCallback, DepGraph, MemoPdgConstructor, - PDGLoader, -}; +use crate::{construct::SubgraphDescriptor, MemoPdgConstructor, PDGLoader}; pub struct MetadataCollector { targets: Vec, diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index be5533277a..33ef11056c 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -1,7 +1,6 @@ use std::{borrow::Cow, collections::hash_map::Entry, hash::Hash}; use either::Either; -use flowistry_pdg::rustc_portable::LocalDefId; use itertools::Itertools; use log::{debug, trace}; use rustc_hash::{FxHashMap, FxHashSet}; @@ -260,7 +259,7 @@ pub fn ty_resolve<'tcx>(ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Ty<'tcx> { pub fn manufacture_substs_for( tcx: TyCtxt<'_>, - function: LocalDefId, + function: DefId, ) -> Result<&List>, ErrorGuaranteed> { use rustc_middle::ty::{ Binder, BoundRegionKind, DynKind, ExistentialPredicate, ExistentialProjection, @@ -283,7 +282,7 @@ pub fn manufacture_substs_for( GenericParamDefKind::Lifetime => { return Ok(GenericArg::from(Region::new_free( tcx, - function.to_def_id(), + function, BoundRegionKind::BrAnon(None), ))) } @@ -331,7 +330,7 @@ pub fn manufacture_substs_for( let ty = Ty::new_dynamic( tcx, tcx.mk_poly_existential_predicates_from_iter(constraints)?, - Region::new_free(tcx, function.to_def_id(), BoundRegionKind::BrAnon(None)), + Region::new_free(tcx, function, BoundRegionKind::BrAnon(None)), DynKind::Dyn, ); Ok(GenericArg::from(ty)) diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 36309021fc..1300314e66 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -40,7 +40,7 @@ fn pdg( let def_id = get_main(tcx); let mut memo = MemoPdgConstructor::new(tcx, NoLoader); configure(tcx, &mut memo); - let pdg = memo.construct_graph(def_id).unwrap(); + let pdg = memo.construct_graph(def_id.to_def_id()).unwrap(); tests(tcx, pdg) }) } diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 46f3c10b5e..6a1a76dfd4 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -1,6 +1,6 @@ use crate::{ ana::inline_judge::InlineJudge, ann::MarkerAnnotation, desc::*, discover::FnToAnalyze, - stats::TimedStat, utils::*, DefId, HashMap, HashSet, MarkerCtx, + utils::*, DefId, HashMap, HashSet, MarkerCtx, }; use flowistry_pdg::SourceUse; use paralegal_spdg::{Node, SPDGStats}; @@ -10,12 +10,12 @@ use rustc_middle::{ ty::{self, Instance, TyCtxt}, }; -use std::{cell::RefCell, fmt::Display, rc::Rc, time::Instant}; +use std::{cell::RefCell, fmt::Display, rc::Rc}; use super::{ default_index, path_for_item, src_loc_for_span, BodyInfo, RustcInstructionKind, SPDGGenerator, }; -use anyhow::{anyhow, bail, Result}; +use anyhow::{bail, Result}; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, @@ -64,7 +64,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { target: &'a FnToAnalyze, ) -> Result { let local_def_id = target.def_id; - let start = Instant::now(); let dep_graph = Self::create_flowistry_graph(generator, local_def_id)?; if generator.opts.dbg().dump_flowistry_pdg() { @@ -315,7 +314,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { generator: &SPDGGenerator<'tcx>, def_id: DefId, ) -> Result> { - let Some(pdg) = generator.metadata_loader.get_pdg(def_id) else { + let Ok(pdg) = generator.flowistry_loader.construct_graph(def_id) else { bail!("Failed to construct the graph"); }; @@ -355,8 +354,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let tcx = self.tcx(); for (i, weight) in input.node_references() { - let at = weight.at.leaf(); - self.register_node( i, NodeInfo { @@ -557,8 +554,10 @@ impl<'tcx> CallChangeCallback<'tcx> for MyCallback<'tcx> { } } +#[allow(dead_code)] type StatStracker = Rc)>>; +#[allow(dead_code)] fn record_inlining(tracker: &StatStracker, tcx: TyCtxt<'_>, def_id: LocalDefId, is_in_cache: bool) { let mut borrow = tracker.borrow_mut(); let (stats, loc_set) = &mut *borrow; diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 5f1ac45b8d..2c93b9527a 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -8,20 +8,17 @@ use crate::{ ann::{db::MarkerDatabase, Annotation, MarkerAnnotation}, desc::*, discover::{CollectingVisitor, FnToAnalyze}, - stats::{Stats, TimedStat}, utils::*, Args, DefId, HashMap, HashSet, LogLevelConfig, MarkerCtx, Symbol, }; -use std::rc::Rc; -use std::time::{Duration, Instant}; -use std::{cell::RefCell, path::Path}; +use std::path::Path; +use std::{fs::File, io::Read, rc::Rc}; use anyhow::Result; use either::Either; use flowistry_pdg_construction::{ - graph::InternedString, meta::MetadataCollector, Asyncness, DepGraph, MemoPdgConstructor, - PDGLoader, SubgraphDescriptor, + graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, PDGLoader, SubgraphDescriptor, }; use itertools::Itertools; use petgraph::visit::GraphBase; @@ -29,21 +26,18 @@ use petgraph::visit::GraphBase; use rustc_hash::FxHashMap; use rustc_hir::{ def, - def_id::{CrateNum, DefIndex, LocalDefId}, - intravisit, + def_id::{CrateNum, DefIndex}, }; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ - hir, mir::{ BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, TerminatorKind, }, - query::AsLocalKey, - ty::{self, GenericArgsRef, TyCtxt}, + ty::{GenericArgsRef, TyCtxt}, }; -use rustc_serialize::{opaque::FileEncoder, Decodable, Encodable}; +use rustc_serialize::{Decodable, Encodable}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; mod encoder; @@ -51,10 +45,13 @@ mod graph_converter; mod inline_judge; use graph_converter::GraphConverter; -use rustc_type_ir::TyEncoder; use rustc_utils::{cache::Cache, mir::borrowck_facts}; -use self::{encoder::ParalegalEncoder, graph_converter::MyCallback, inline_judge::InlineJudge}; +use self::{ + encoder::{ParalegalDecoder, ParalegalEncoder}, + graph_converter::MyCallback, + inline_judge::InlineJudge, +}; pub struct MetadataLoader<'tcx> { tcx: TyCtxt<'tcx>, @@ -96,6 +93,8 @@ impl<'tcx> MetadataLoader<'tcx> { }) .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); + let path = path.as_ref(); + println!("Writing metadata to {}", path.display()); meta.write(path, tcx); (collector.functions_to_analyze, marker_ctx, constructor) } @@ -216,7 +215,25 @@ impl<'tcx> MetadataLoader<'tcx> { } pub fn get_metadata(&self, key: CrateNum) -> Option<&Metadata<'tcx>> { - self.cache.get(key, |_| unimplemented!()).as_ref() + self.cache + .get(key, |_| { + let paths = self.tcx.crate_extern_paths(key); + for path in paths { + let path = path.with_extension("para"); + println!("Trying to load file {}", path.display()); + let Ok(mut file) = File::open(path) else { + continue; + }; + let mut buf = Vec::new(); + file.read_to_end(&mut buf).unwrap(); + let mut decoder = ParalegalDecoder::new(self.tcx, buf.as_slice()); + let meta = Metadata::decode(&mut decoder); + println!("Successfully loaded"); + return Some(meta); + } + None + }) + .as_ref() } pub fn get_body_info(&self, key: DefId) -> Option<&BodyInfo<'tcx>> { @@ -401,7 +418,6 @@ impl<'tcx> SPDGGenerator<'tcx> { }) .collect::>>() .map(|controllers| { - let start = Instant::now(); let desc = self.make_program_description(controllers, known_def_ids, &targets); desc diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 6d4c0eeb5f..e3e7ce8bd9 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -22,7 +22,7 @@ use crate::{ DefId, Either, HashMap, HashSet, LocalDefId, TyCtxt, }; use flowistry_pdg_construction::{determine_async, graph::InternedString}; -use rustc_ast::{AnonConst, Attribute}; +use rustc_ast::Attribute; use rustc_hir::def::DefKind; use rustc_middle::{mir, ty}; use rustc_utils::cache::Cache; @@ -66,7 +66,7 @@ impl<'tcx> MarkerCtx<'tcx> { /// /// Query is cached. fn attribute_annotations(&self, key: DefId) -> &[Annotation] { - if let Some(local) = key.as_local() { + if key.is_local() { self.db() .local_annotations .get(&self.defid_rewrite(key).expect_local()) diff --git a/crates/paralegal-flow/src/ann/mod.rs b/crates/paralegal-flow/src/ann/mod.rs index 295822539a..011293ca42 100644 --- a/crates/paralegal-flow/src/ann/mod.rs +++ b/crates/paralegal-flow/src/ann/mod.rs @@ -1,9 +1,8 @@ use flowistry_pdg_construction::graph::InternedString; use rustc_macros::{Decodable, Encodable}; -use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use serde::{Deserialize, Serialize}; -use paralegal_spdg::{rustc_proxies, tiny_bitset_pretty, Identifier, TinyBitSet, TypeId}; +use paralegal_spdg::{rustc_proxies, tiny_bitset_pretty, TinyBitSet, TypeId}; pub mod db; pub mod parse; diff --git a/crates/paralegal-flow/src/ann/parse.rs b/crates/paralegal-flow/src/ann/parse.rs index c91fb2477b..3dfb043fc9 100644 --- a/crates/paralegal-flow/src/ann/parse.rs +++ b/crates/paralegal-flow/src/ann/parse.rs @@ -16,8 +16,6 @@ use crate::{ utils::{write_sep, Print, TinyBitSet}, Symbol, }; -use flowistry_pdg_construction::graph::InternedString; -use paralegal_spdg::Identifier; use rustc_ast::{token, tokenstream, AttrArgs}; use rustc_hir::def_id::DefId; use rustc_middle::ty::TyCtxt; diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index d3c19ce45d..15e9e20b11 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -5,26 +5,16 @@ use std::rc::Rc; -use crate::{ - ana::{MetadataLoader, SPDGGenerator}, - ann::db::MarkerDatabase, - consts, - desc::*, - stats::Stats, - utils::*, -}; +use crate::{ana::MetadataLoader, ann::db::MarkerDatabase, consts, utils::*}; -use flowistry_pdg_construction::meta::MetadataCollector; use rustc_hir::{ def_id::{DefId, LocalDefId}, intravisit::{self, FnKind}, BodyId, }; -use rustc_middle::{hir::nested_filter::OnlyBodies, mir::Local, ty::TyCtxt}; +use rustc_middle::{hir::nested_filter::OnlyBodies, ty::TyCtxt}; use rustc_span::{symbol::Ident, Span, Symbol}; -use anyhow::Result; - use self::resolve::expect_resolve_string_to_def_id; /// Values of this type can be matched against Rust attributes diff --git a/crates/paralegal-flow/src/stats.rs b/crates/paralegal-flow/src/stats.rs index d88816bc74..4eca2560eb 100644 --- a/crates/paralegal-flow/src/stats.rs +++ b/crates/paralegal-flow/src/stats.rs @@ -45,6 +45,7 @@ impl Stats { self.inner_mut().record_timed(stat, duration) } + #[allow(dead_code)] pub fn get_timed(&self, stat: TimedStat) -> Duration { self.0.lock().unwrap().timed[stat].unwrap_or(Duration::ZERO) } diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 74895005b7..a2aa119619 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -18,7 +18,6 @@ use paralegal_spdg::{ DefInfo, EdgeInfo, Endpoint, Node, SPDG, }; -use flowistry_pdg::rustc_portable::LocalDefId; use flowistry_pdg::CallString; use itertools::Itertools; use petgraph::visit::{Control, Data, DfsEvent, EdgeRef, FilterEdge, GraphBase, IntoEdges}; diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 80bb613097..f674ff03df 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -1,7 +1,6 @@ //! Utility functions, general purpose structs and extension traits extern crate smallvec; -use flowistry_pdg::{GlobalLocation, RichLocation}; use rustc_target::spec::abi::Abi; use thiserror::Error; diff --git a/crates/paralegal-spdg/src/dot.rs b/crates/paralegal-spdg/src/dot.rs index 7425481d9e..754c549da5 100644 --- a/crates/paralegal-spdg/src/dot.rs +++ b/crates/paralegal-spdg/src/dot.rs @@ -2,7 +2,6 @@ use crate::{Endpoint, GlobalEdge, InstructionKind, Node, ProgramDescription}; use dot::{CompassPoint, Edges, Id, LabelText, Nodes}; -use flowistry_pdg::rustc_portable::LocalDefId; use flowistry_pdg::{CallString, RichLocation}; use petgraph::prelude::EdgeRef; diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 6c75e2bd79..fe66687ed7 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -54,9 +54,6 @@ use petgraph::visit::IntoNodeIdentifiers; pub use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; -#[cfg(feature = "rustc")] -use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; - /// The types of identifiers that identify an entrypoint pub type Endpoint = DefId; /// Identifiers for types From f8beb0e74d6a6ac6dcea212aded1b08ab31c3741 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 14 May 2024 23:07:38 -0700 Subject: [PATCH 15/95] Create cross crate test case --- crates/paralegal-flow/src/ana/encoder.rs | 14 ++++- crates/paralegal-flow/src/ana/mod.rs | 7 ++- crates/paralegal-flow/tests/cross-crate.rs | 31 ++++++++++ .../tests/cross-crate/Cargo.lock | 57 +++++++++++++++++++ .../tests/cross-crate/Cargo.toml | 2 + .../tests/cross-crate/dependency/Cargo.toml | 6 ++ .../tests/cross-crate/dependency/src/lib.rs | 3 + .../tests/cross-crate/entry/Cargo.toml | 7 +++ .../tests/cross-crate/entry/src/main.rs | 21 +++++++ 9 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 crates/paralegal-flow/tests/cross-crate.rs create mode 100644 crates/paralegal-flow/tests/cross-crate/Cargo.lock create mode 100644 crates/paralegal-flow/tests/cross-crate/Cargo.toml create mode 100644 crates/paralegal-flow/tests/cross-crate/dependency/Cargo.toml create mode 100644 crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs create mode 100644 crates/paralegal-flow/tests/cross-crate/entry/Cargo.toml create mode 100644 crates/paralegal-flow/tests/cross-crate/entry/src/main.rs diff --git a/crates/paralegal-flow/src/ana/encoder.rs b/crates/paralegal-flow/src/ana/encoder.rs index 4f39e09795..3c2442143f 100644 --- a/crates/paralegal-flow/src/ana/encoder.rs +++ b/crates/paralegal-flow/src/ana/encoder.rs @@ -1,7 +1,7 @@ use std::path::Path; use rustc_hash::FxHashMap; -use rustc_hir::def_id::DefId; +use rustc_hir::def_id::{DefId, DefIndex}; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_serialize::{ opaque::{FileEncoder, MemDecoder}, @@ -187,3 +187,15 @@ impl<'tcx, 'a> Decodable> for DefId { }) } } + +impl<'tcx> Encodable> for DefIndex { + fn encode(&self, s: &mut ParalegalEncoder<'tcx>) { + self.as_u32().encode(s) + } +} + +impl<'tcx, 'a> Decodable> for DefIndex { + fn decode(d: &mut ParalegalDecoder<'tcx, 'a>) -> Self { + Self::from_u32(u32::decode(d)) + } +} diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 2c93b9527a..43fbed63c0 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -26,7 +26,7 @@ use petgraph::visit::GraphBase; use rustc_hash::FxHashMap; use rustc_hir::{ def, - def_id::{CrateNum, DefIndex}, + def_id::{CrateNum, DefIndex, LOCAL_CRATE}, }; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; @@ -96,6 +96,7 @@ impl<'tcx> MetadataLoader<'tcx> { let path = path.as_ref(); println!("Writing metadata to {}", path.display()); meta.write(path, tcx); + self.cache.get(LOCAL_CRATE, |_| Some(meta)); (collector.functions_to_analyze, marker_ctx, constructor) } @@ -154,8 +155,8 @@ impl<'tcx> Metadata<'tcx> { let bb = body_info .instructions .ensure_contains_elem(loc.block, Default::default); - if bb.len() < loc.statement_index { - bb.resize_with(loc.statement_index, Default::default); + if bb.len() <= loc.statement_index { + bb.resize_with(loc.statement_index + 1, Default::default); } bb[loc.statement_index].get_or_insert_with(|| { body.stmt_at(loc).either( diff --git a/crates/paralegal-flow/tests/cross-crate.rs b/crates/paralegal-flow/tests/cross-crate.rs new file mode 100644 index 0000000000..4631e43f6e --- /dev/null +++ b/crates/paralegal-flow/tests/cross-crate.rs @@ -0,0 +1,31 @@ +#![feature(rustc_private)] +#[macro_use] +extern crate lazy_static; + +use paralegal_flow::test_utils::*; + +const CRATE_DIR: &str = "tests/cross-crate/entry"; + +lazy_static! { + static ref TEST_CRATE_ANALYZED: bool = run_paralegal_flow_with_flow_graph_dump(CRATE_DIR); +} + +macro_rules! define_test { + ($name:ident: $ctrl:ident -> $block:block) => { + define_test!($name: $ctrl, $name -> $block); + }; + ($name:ident: $ctrl:ident, $ctrl_name:ident -> $block:block) => { + paralegal_flow::define_flow_test_template!(TEST_CRATE_ANALYZED, CRATE_DIR, $name: $ctrl, $ctrl_name -> $block); + }; +} + +define_test!(basic : graph -> { + let src_fn = graph.function("src"); + let src = graph.call_site(&src_fn); + let not_src_fn = graph.function("not_src"); + let not_src = graph.call_site(¬_src_fn); + let target_fn = graph.function("target"); + let target = graph.call_site(&target_fn); + assert!(src.output().flows_to_data(&target.input())); + assert!(!not_src.output().flows_to_data(&target.input())); +}); diff --git a/crates/paralegal-flow/tests/cross-crate/Cargo.lock b/crates/paralegal-flow/tests/cross-crate/Cargo.lock new file mode 100644 index 0000000000..6478b31dbc --- /dev/null +++ b/crates/paralegal-flow/tests/cross-crate/Cargo.lock @@ -0,0 +1,57 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "dependency" +version = "0.0.1" +dependencies = [ + "paralegal", +] + +[[package]] +name = "entry" +version = "0.0.1" +dependencies = [ + "dependency", + "paralegal", +] + +[[package]] +name = "paralegal" +version = "0.1.0" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro2" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/crates/paralegal-flow/tests/cross-crate/Cargo.toml b/crates/paralegal-flow/tests/cross-crate/Cargo.toml new file mode 100644 index 0000000000..6e5676e925 --- /dev/null +++ b/crates/paralegal-flow/tests/cross-crate/Cargo.toml @@ -0,0 +1,2 @@ +[workspace] +members = ["dependency", "entry"] diff --git a/crates/paralegal-flow/tests/cross-crate/dependency/Cargo.toml b/crates/paralegal-flow/tests/cross-crate/dependency/Cargo.toml new file mode 100644 index 0000000000..9cd274b7d0 --- /dev/null +++ b/crates/paralegal-flow/tests/cross-crate/dependency/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "dependency" +version = "0.0.1" + +[dependencies] +paralegal = { path = "../../../../paralegal" } diff --git a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs new file mode 100644 index 0000000000..f653ba6da9 --- /dev/null +++ b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs @@ -0,0 +1,3 @@ +pub fn find_me(a: T, b: T) -> T { + b +} diff --git a/crates/paralegal-flow/tests/cross-crate/entry/Cargo.toml b/crates/paralegal-flow/tests/cross-crate/entry/Cargo.toml new file mode 100644 index 0000000000..52bdc7a83f --- /dev/null +++ b/crates/paralegal-flow/tests/cross-crate/entry/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "entry" +version = "0.0.1" + +[dependencies] +dependency = { path = "../dependency" } +paralegal = { path = "../../../../paralegal" } diff --git a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs new file mode 100644 index 0000000000..70c6428ef7 --- /dev/null +++ b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs @@ -0,0 +1,21 @@ +extern crate dependency; + +use dependency::find_me; + +#[paralegal::marker(source)] +fn src() -> usize { + 0 +} + +#[paralegal::marker(not_source)] +fn not_source() -> usize { + 1 +} + +#[paralegal::marker(target)] +fn target(u: usize) {} + +#[paralegal::analyze] +fn main() { + target(find_me(src(), not_source())) +} From 8dba09ab83f1fe542e04efc398385fc761778bb9 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 15 May 2024 10:39:04 -0700 Subject: [PATCH 16/95] Local crate fixes --- .../src/construct.rs | 10 ++++- .../flowistry_pdg_construction/src/graph.rs | 4 +- crates/paralegal-flow/src/ana/mod.rs | 45 ++++++++++++------- crates/paralegal-flow/src/consts.rs | 2 + crates/paralegal-flow/src/lib.rs | 3 +- crates/paralegal-flow/src/test_utils.rs | 16 ++++++- crates/paralegal-flow/tests/cross-crate.rs | 2 +- .../tests/cross-crate/entry/src/main.rs | 4 +- 8 files changed, 64 insertions(+), 22 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index aa203eee5b..87e1c2477f 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -1130,6 +1130,13 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { ) } + fn generic_args(&self) -> GenericArgsRef<'tcx> { + match self.root { + FnResolution::Final(inst) => inst.args, + _ => List::empty(), + } + } + fn handle_terminator( &self, terminator: &Terminator<'tcx>, @@ -1168,7 +1175,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { .into_engine(self.tcx(), &self.body) .iterate_to_fixpoint(); - let mut final_state = PartialGraph::new(Asyncness::No); + let mut final_state = PartialGraph::new(Asyncness::No, self.generic_args()); analysis.visit_reachable_with(&self.body, &mut final_state); @@ -1303,6 +1310,7 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { let recurse_node = |n: &DepNode<'tcx>| n.transform_call_string(&f); Self { + generics: self.generics, asyncness: self.asyncness, nodes: self.nodes.iter().map(recurse_node).collect(), edges: self diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 92e982b54e..f39bcc4dc7 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -283,15 +283,17 @@ pub struct PartialGraph<'tcx> { pub nodes: FxHashSet>, pub edges: FxHashSet<(DepNode<'tcx>, DepNode<'tcx>, DepEdge)>, pub monos: FxHashMap>, + pub generics: GenericArgsRef<'tcx>, pub asyncness: Asyncness, } impl<'tcx> PartialGraph<'tcx> { - pub fn new(asyncness: Asyncness) -> Self { + pub fn new(asyncness: Asyncness, generics: GenericArgsRef<'tcx>) -> Self { Self { nodes: Default::default(), edges: Default::default(), monos: Default::default(), + generics, asyncness, } } diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 43fbed63c0..df952008d9 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -6,6 +6,7 @@ use crate::{ ann::{db::MarkerDatabase, Annotation, MarkerAnnotation}, + consts::INTERMEDIATE_ARTIFACT_EXT, desc::*, discover::{CollectingVisitor, FnToAnalyze}, utils::*, @@ -35,7 +36,7 @@ use rustc_middle::{ BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, TerminatorKind, }, - ty::{GenericArgsRef, TyCtxt}, + ty::{GenericArgsRef, List, TyCtxt}, }; use rustc_serialize::{Decodable, Encodable}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; @@ -136,10 +137,14 @@ impl<'tcx> Metadata<'tcx> { markers: &MarkerDatabase<'tcx>, ) -> Self { let mut bodies: FxHashMap = Default::default(); - for pdg in pdgs - .values() - .flat_map(|d| d.graph.nodes.iter().flat_map(|n| n.at.iter())) - { + for pdg in pdgs.values().flat_map(|d| { + d.graph + .nodes + .iter() + .map(|n| &n.at) + .chain(d.graph.edges.iter().map(|e| &e.2.at)) + .flat_map(|at| at.iter()) + }) { if let Some(local) = pdg.function.as_local() { let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, local); let body = &body_with_facts.body; @@ -220,7 +225,7 @@ impl<'tcx> MetadataLoader<'tcx> { .get(key, |_| { let paths = self.tcx.crate_extern_paths(key); for path in paths { - let path = path.with_extension("para"); + let path = path.with_extension(INTERMEDIATE_ARTIFACT_EXT); println!("Trying to load file {}", path.display()); let Ok(mut file) = File::open(path) else { continue; @@ -243,10 +248,12 @@ impl<'tcx> MetadataLoader<'tcx> { } pub fn get_mono(&self, cs: CallString) -> GenericArgsRef<'tcx> { + let get_graph = |key: DefId| &self.get_metadata(key.krate).unwrap().pdgs[&key.index].graph; + let Some(cs) = cs.caller() else { + return get_graph(cs.leaf().function).generics; + }; let key = cs.root().function; - self.get_metadata(key.krate).unwrap().pdgs[&key.index] - .graph - .monos[&cs] + get_graph(key).monos[&cs] } pub fn get_pdg(&self, key: DefId) -> Option> { @@ -525,15 +532,21 @@ impl<'tcx> SPDGGenerator<'tcx> { &self, controllers: &HashMap, ) -> HashMap { - let all_instructions = controllers.values().flat_map(|v| v.graph.node_weights()); + let all_instructions = controllers + .values() + .flat_map(|v| { + v.graph + .node_weights() + .map(|n| &n.at) + .chain(v.graph.edge_weights().map(|e| &e.at)) + }) + .flat_map(|at| at.iter()) + .collect::>(); all_instructions .into_iter() .map(|n| { - let body = self - .metadata_loader - .get_body_info(n.at.leaf().function) - .unwrap(); - let (kind, description, span) = match n.at.leaf().location { + let body = self.metadata_loader.get_body_info(n.function).unwrap(); + let (kind, description, span) = match n.location { RichLocation::End => { (InstructionKind::Return, "start".to_owned(), body.def_span) } @@ -557,7 +570,7 @@ impl<'tcx> SPDGGenerator<'tcx> { } }; ( - n.at.leaf(), + n, InstructionInfo { kind, span: src_loc_for_span(span, self.tcx), diff --git a/crates/paralegal-flow/src/consts.rs b/crates/paralegal-flow/src/consts.rs index f7841bebf5..4ef5da4f4d 100644 --- a/crates/paralegal-flow/src/consts.rs +++ b/crates/paralegal-flow/src/consts.rs @@ -34,3 +34,5 @@ lazy_static! { /// [`MetaItemMatch::match_extract`](crate::utils::MetaItemMatch::match_extract) pub static ref EXCEPTION_MARKER: AttrMatchT = sym_vec!["paralegal_flow", "exception"]; } + +pub const INTERMEDIATE_ARTIFACT_EXT: &str = "para"; diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index b6ba70a6e6..57dd025978 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -42,6 +42,7 @@ extern crate rustc_span; extern crate rustc_target; extern crate rustc_type_ir; +use consts::INTERMEDIATE_ARTIFACT_EXT; pub use rustc_type_ir::sty; pub use rustc_middle::ty; @@ -157,7 +158,7 @@ impl rustc_driver::Callbacks for Callbacks { self.opts, compiler .build_output_filenames(tcx.sess, &[]) - .with_extension(".para"), + .with_extension(INTERMEDIATE_ARTIFACT_EXT), ); tcx.sess.abort_if_errors(); diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index a2aa119619..59e82db5ba 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -6,6 +6,7 @@ extern crate rustc_span; use crate::{ desc::{Identifier, ProgramDescription}, + utils::Print, HashSet, }; use std::fmt::{Debug, Formatter}; @@ -341,7 +342,20 @@ impl<'g> CtrlRef<'g> { .map(|v| v.at) .chain(self.ctrl.graph.node_weights().map(|info| info.at)) .filter(|m| { - instruction_info[&m.leaf()] + instruction_info + .get(&m.leaf()) + .unwrap_or_else(|| { + panic!( + "Could not find instruction {} in\n{}", + m.leaf(), + Print(|fmt| { + for (k, v) in instruction_info.iter() { + writeln!(fmt, " {k}: {v:?}")?; + } + Ok(()) + }) + ) + }) .kind .as_function_call() .map_or(false, |i| i.id == fun.ident) diff --git a/crates/paralegal-flow/tests/cross-crate.rs b/crates/paralegal-flow/tests/cross-crate.rs index 4631e43f6e..4a3de239b9 100644 --- a/crates/paralegal-flow/tests/cross-crate.rs +++ b/crates/paralegal-flow/tests/cross-crate.rs @@ -4,7 +4,7 @@ extern crate lazy_static; use paralegal_flow::test_utils::*; -const CRATE_DIR: &str = "tests/cross-crate/entry"; +const CRATE_DIR: &str = "tests/cross-crate"; lazy_static! { static ref TEST_CRATE_ANALYZED: bool = run_paralegal_flow_with_flow_graph_dump(CRATE_DIR); diff --git a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs index 70c6428ef7..68e577e58c 100644 --- a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs +++ b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs @@ -16,6 +16,8 @@ fn not_source() -> usize { fn target(u: usize) {} #[paralegal::analyze] -fn main() { +fn basic() { target(find_me(src(), not_source())) } + +fn main() {} From c3337de04ce857955142e273047f860bd3d29dbb Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 15 May 2024 10:47:25 -0700 Subject: [PATCH 17/95] Make the instruction info creation more robust --- crates/paralegal-flow/src/ana/mod.rs | 66 ++++++++++++++-------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index df952008d9..f3ee5de6aa 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -36,7 +36,7 @@ use rustc_middle::{ BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, TerminatorKind, }, - ty::{GenericArgsRef, List, TyCtxt}, + ty::{GenericArgsRef, TyCtxt}, }; use rustc_serialize::{Decodable, Encodable}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; @@ -148,42 +148,42 @@ impl<'tcx> Metadata<'tcx> { if let Some(local) = pdg.function.as_local() { let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, local); let body = &body_with_facts.body; - let body_info = bodies + bodies .entry(local.local_def_index) .or_insert_with(|| BodyInfo { arg_count: body.arg_count, decls: body.local_decls().to_owned(), - instructions: Default::default(), + instructions: body + .basic_blocks + .iter() + .map(|bb| { + let t = bb.terminator(); + bb.statements + .iter() + .map(|s| RustcInstructionInfo { + kind: RustcInstructionKind::Statement, + span: s.source_info.span, + description: format!("{:?}", s.kind).into(), + }) + .chain([RustcInstructionInfo { + kind: if let Ok((id, ..)) = t.as_fn_and_args(tcx) { + RustcInstructionKind::FunctionCall(FunctionCallInfo { + id, + }) + } else if matches!(t.kind, TerminatorKind::SwitchInt { .. }) + { + RustcInstructionKind::SwitchInt + } else { + RustcInstructionKind::Terminator + }, + span: t.source_info.span, + description: format!("{:?}", t.kind).into(), + }]) + .collect() + }) + .collect(), def_span: tcx.def_span(local), }); - if let RichLocation::Location(loc) = pdg.location { - let bb = body_info - .instructions - .ensure_contains_elem(loc.block, Default::default); - if bb.len() <= loc.statement_index { - bb.resize_with(loc.statement_index + 1, Default::default); - } - bb[loc.statement_index].get_or_insert_with(|| { - body.stmt_at(loc).either( - |s| RustcInstructionInfo { - kind: RustcInstructionKind::Statement, - span: s.source_info.span, - description: InternedString::new(format!("{:?}", s.kind)), - }, - |t| RustcInstructionInfo { - kind: if let Ok((id, ..)) = t.as_fn_and_args(tcx) { - RustcInstructionKind::FunctionCall(FunctionCallInfo { id }) - } else if matches!(t.kind, TerminatorKind::SwitchInt { .. }) { - RustcInstructionKind::SwitchInt - } else { - RustcInstructionKind::Terminator - }, - span: t.source_info.span, - description: InternedString::new(format!("{:?}", t.kind)), - }, - ) - }); - } } } let cache_borrow = markers.reachable_markers.borrow(); @@ -283,7 +283,7 @@ impl<'tcx> MetadataLoader<'tcx> { pub struct BodyInfo<'tcx> { pub arg_count: usize, pub decls: IndexVec>, - pub instructions: IndexVec>>, + pub instructions: IndexVec>, pub def_span: rustc_span::Span, } @@ -324,7 +324,7 @@ impl<'tcx> BodyInfo<'tcx> { } pub fn instruction_at(&self, location: Location) -> RustcInstructionInfo { - self.instructions[location.block][location.statement_index].unwrap() + self.instructions[location.block][location.statement_index] } pub fn span_of(&self, loc: RichLocation) -> rustc_span::Span { From c98687e19ac4212d8fcc308ce683d01d1c2940a1 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 15 May 2024 13:24:15 -0700 Subject: [PATCH 18/95] Hack filenames --- crates/paralegal-flow/src/lib.rs | 174 ++++++++++++++++++------------- 1 file changed, 103 insertions(+), 71 deletions(-) diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 57dd025978..3895812d40 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -42,32 +42,43 @@ extern crate rustc_span; extern crate rustc_target; extern crate rustc_type_ir; -use consts::INTERMEDIATE_ARTIFACT_EXT; -pub use rustc_type_ir::sty; - -pub use rustc_middle::ty; +pub extern crate either; -pub use rustc_middle::dep_graph::DepGraph; -pub use ty::TyCtxt; +use std::borrow::Cow; +pub use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; +use std::{fmt::Display, time::Instant}; pub use rustc_hir::def_id::{DefId, LocalDefId}; pub use rustc_hir::BodyId; +pub use rustc_middle::dep_graph::DepGraph; pub use rustc_middle::mir::Location; +pub use rustc_middle::ty; +pub use rustc_span::Symbol; +pub use rustc_type_ir::sty; +pub use ty::TyCtxt; -use args::{ClapArgs, LogLevelConfig}; -use desc::utils::write_sep; - +use rustc_driver::Compilation; use rustc_plugin::CrateFilter; use rustc_utils::mir::borrowck_facts; -pub use std::collections::{HashMap, HashSet}; -use std::{fmt::Display, time::Instant}; +pub use paralegal_spdg as desc; + +use crate::{ + ana::{MetadataLoader, SPDGGenerator}, + ann::db::MarkerCtx, + stats::{Stats, TimedStat}, + utils::Print, +}; +pub use args::{AnalysisCtrl, Args, BuildConfig, DepConfig, DumpArgs, ModelCtrl}; +use args::{ClapArgs, LogLevelConfig}; +use consts::INTERMEDIATE_ARTIFACT_EXT; +use desc::utils::write_sep; + +use anyhow::{anyhow, Context as _, Result}; +pub use either::Either; // This import is sort of special because it comes from the private rustc // dependencies and not from our `Cargo.toml`. -pub extern crate either; -pub use either::Either; - -pub use rustc_span::Symbol; pub mod ana; pub mod ann; @@ -82,17 +93,6 @@ pub mod consts; #[cfg(feature = "test")] pub mod test_utils; -pub use paralegal_spdg as desc; - -use crate::ana::{MetadataLoader, SPDGGenerator}; -pub use crate::ann::db::MarkerCtx; -pub use args::{AnalysisCtrl, Args, BuildConfig, DepConfig, DumpArgs, ModelCtrl}; - -use crate::{ - stats::{Stats, TimedStat}, - utils::Print, -}; - /// A struct so we can implement [`rustc_plugin::RustcPlugin`] pub struct DfppPlugin; @@ -121,7 +121,76 @@ struct ArgWrapper { struct Callbacks { opts: &'static Args, stats: Stats, - start: Instant, +} + +/// Create the name of the file in which to store intermediate artifacts. +/// +/// HACK(Justus): `TyCtxt::output_filenames` returns a file stem of +/// `lib-`, whereas `OutputFiles::with_extension` returns a file +/// stem of `-`. I haven't found a clean way to get the same +/// name in both places, so i just assume that these two will always have this +/// relation and prepend the `"lib"` here. +fn intermediate_out_file_path(tcx: TyCtxt) -> Result { + let rustc_out_file = tcx + .output_filenames(()) + .with_extension(INTERMEDIATE_ARTIFACT_EXT); + let dir = rustc_out_file + .parent() + .ok_or_else(|| anyhow!("{} has no parent", rustc_out_file.display()))?; + let file = rustc_out_file + .file_name() + .ok_or_else(|| anyhow!("has no file name")) + .and_then(|s| s.to_str().ok_or_else(|| anyhow!("not utf8"))) + .with_context(|| format!("{}", rustc_out_file.display()))?; + + let file = if file.starts_with("lib") { + Cow::Borrowed(file) + } else { + format!("lib{file}").into() + }; + + Ok(dir.join(file.as_ref())) +} + +impl Callbacks { + fn in_context(&mut self, tcx: TyCtxt) -> Result { + tcx.sess.abort_if_errors(); + + let loader = MetadataLoader::new(tcx); + + let intermediate_out_file = intermediate_out_file_path(tcx)?; + + let (analysis_targets, mctx, pdg_constructor) = loader + .clone() + .collect_and_emit_metadata(self.opts, intermediate_out_file); + tcx.sess.abort_if_errors(); + + let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, pdg_constructor, loader.clone()); + + let compilation = if !analysis_targets.is_empty() { + let desc = gen.analyze(analysis_targets)?; + + if self.opts.dbg().dump_spdg() { + let out = std::fs::File::create("call-only-flow.gv").unwrap(); + paralegal_spdg::dot::dump(&desc, out).unwrap(); + } + + let ser = Instant::now(); + desc.canonical_write(self.opts.result_path()).unwrap(); + self.stats + .record_timed(TimedStat::Serialization, ser.elapsed()); + + println!("Analysis finished with timing: {}", self.stats); + if self.opts.abort_after_analysis() { + rustc_driver::Compilation::Stop + } else { + rustc_driver::Compilation::Continue + } + } else { + rustc_driver::Compilation::Continue + }; + Ok(compilation) + } } struct NoopCallbacks {} @@ -140,54 +209,13 @@ impl rustc_driver::Callbacks for Callbacks { // that (when retrieving the MIR bodies for instance) fn after_expansion<'tcx>( &mut self, - compiler: &rustc_interface::interface::Compiler, + _compiler: &rustc_interface::interface::Compiler, queries: &'tcx rustc_interface::Queries<'tcx>, ) -> rustc_driver::Compilation { - self.stats - .record_timed(TimedStat::Rustc, self.start.elapsed()); queries .global_ctxt() .unwrap() - .enter(|tcx| { - tcx.sess.abort_if_errors(); - - let loader = MetadataLoader::new(tcx); - - let (analysis_targets, mctx, pdg_constructor) = - loader.clone().collect_and_emit_metadata( - self.opts, - compiler - .build_output_filenames(tcx.sess, &[]) - .with_extension(INTERMEDIATE_ARTIFACT_EXT), - ); - tcx.sess.abort_if_errors(); - - let mut gen = - SPDGGenerator::new(mctx, self.opts, tcx, pdg_constructor, loader.clone()); - - if !analysis_targets.is_empty() { - let desc = gen.analyze(analysis_targets)?; - - if self.opts.dbg().dump_spdg() { - let out = std::fs::File::create("call-only-flow.gv").unwrap(); - paralegal_spdg::dot::dump(&desc, out).unwrap(); - } - - let ser = Instant::now(); - desc.canonical_write(self.opts.result_path()).unwrap(); - self.stats - .record_timed(TimedStat::Serialization, ser.elapsed()); - - println!("Analysis finished with timing: {}", self.stats); - anyhow::Ok(if self.opts.abort_after_analysis() { - rustc_driver::Compilation::Stop - } else { - rustc_driver::Compilation::Continue - }) - } else { - Ok(rustc_driver::Compilation::Continue) - } - }) + .enter(|tcx| self.in_context(tcx)) .unwrap() } } @@ -340,10 +368,15 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { .as_ref() .map_or(false, |n| n == "build_script_build"); + println!("Handling {}", crate_name.unwrap_or("".to_owned())); + if !is_target || is_build_script { + println!("Is not target, skipping"); return rustc_driver::RunCompiler::new(&compiler_args, &mut NoopCallbacks {}).run(); } + println!("Is target, compiling"); + let lvl = plugin_args.verbosity(); // //let lvl = log::LevelFilter::Debug; simple_logger::SimpleLogger::new() @@ -373,7 +406,6 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { &mut Callbacks { opts, stats: Default::default(), - start: Instant::now(), }, ) .run() From 1195be4d229f3f048e12f0941ab9b952553fc84b Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 15 May 2024 13:33:45 -0700 Subject: [PATCH 19/95] Misc fixes --- .../src/construct.rs | 19 +++++++++++-------- crates/paralegal-flow/src/ana/inline_judge.rs | 6 +----- crates/paralegal-flow/src/lib.rs | 2 +- .../tests/cross-crate/dependency/src/lib.rs | 4 ++-- .../tests/cross-crate/entry/src/main.rs | 4 ++-- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 87e1c2477f..d284abbae8 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -950,13 +950,13 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { return Some(CallHandling::ApproxAsyncSM(handler)); }; - if !resolved_def_id.is_local() { - trace!( - " Bailing because func is non-local: `{}`", - tcx.def_path_str(resolved_def_id) - ); - return None; - }; + // if !resolved_def_id.is_local() { + // trace!( + // " Bailing because func is non-local: `{}`", + // tcx.def_path_str(resolved_def_id) + // ); + // return None; + // }; let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args) { Ok(cc) => cc, @@ -1040,7 +1040,10 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { trace!(" Bailing because user callback said to bail"); return None; } - let descriptor = self.memo.construct_for(cache_key)?; + let Some(descriptor) = self.memo.construct_for(cache_key) else { + trace!(" Bailing because cache lookup {cache_key} failed"); + return None; + }; Some(CallHandling::Ready { descriptor, calling_convention, diff --git a/crates/paralegal-flow/src/ana/inline_judge.rs b/crates/paralegal-flow/src/ana/inline_judge.rs index 564914f01e..0d4414dfef 100644 --- a/crates/paralegal-flow/src/ana/inline_judge.rs +++ b/crates/paralegal-flow/src/ana/inline_judge.rs @@ -34,11 +34,7 @@ impl<'tcx> InlineJudge<'tcx> { let marker_target = info.async_parent.unwrap_or(info.callee); let marker_target_def_id = marker_target.def_id(); match self.analysis_control.inlining_depth() { - _ if self.marker_ctx.is_marked(marker_target_def_id) - || !marker_target_def_id.is_local() => - { - false - } + _ if self.marker_ctx.is_marked(marker_target_def_id) => false, InliningDepth::Adaptive => self .marker_ctx .has_transitive_reachable_markers(marker_target), diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 3895812d40..eff0cc24f5 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -381,7 +381,7 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { // //let lvl = log::LevelFilter::Debug; simple_logger::SimpleLogger::new() .with_level(lvl) - //.with_module_level("flowistry", lvl) + .with_module_level("flowistry", lvl) .with_module_level("rustc_utils", log::LevelFilter::Error) .init() .unwrap(); diff --git a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs index f653ba6da9..23c99b3c23 100644 --- a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs +++ b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs @@ -1,3 +1,3 @@ -pub fn find_me(a: T, b: T) -> T { - b +pub fn find_me(a: usize, _b: usize) -> usize { + a } diff --git a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs index 68e577e58c..e545d78973 100644 --- a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs +++ b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs @@ -8,7 +8,7 @@ fn src() -> usize { } #[paralegal::marker(not_source)] -fn not_source() -> usize { +fn not_src() -> usize { 1 } @@ -17,7 +17,7 @@ fn target(u: usize) {} #[paralegal::analyze] fn basic() { - target(find_me(src(), not_source())) + target(find_me(src(), not_src())) } fn main() {} From 2ea3965aef94fa2be57e5dc655699bbc3d049208 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 15 May 2024 13:38:48 -0700 Subject: [PATCH 20/95] =?UTF-8?q?Basic=20test=20case=20works=20?= =?UTF-8?q?=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/paralegal-flow/src/ana/encoder.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/paralegal-flow/src/ana/encoder.rs b/crates/paralegal-flow/src/ana/encoder.rs index 3c2442143f..42018e05df 100644 --- a/crates/paralegal-flow/src/ana/encoder.rs +++ b/crates/paralegal-flow/src/ana/encoder.rs @@ -39,6 +39,8 @@ impl<'tcx> ParalegalEncoder<'tcx> { } } +const CLEAR_CROSS_CRATE: bool = false; + impl<'a, 'tcx> Encoder for ParalegalEncoder<'tcx> { encoder_methods! { emit_usize(usize); @@ -60,7 +62,7 @@ impl<'a, 'tcx> Encoder for ParalegalEncoder<'tcx> { impl<'tcx> TyEncoder for ParalegalEncoder<'tcx> { type I = TyCtxt<'tcx>; - const CLEAR_CROSS_CRATE: bool = false; + const CLEAR_CROSS_CRATE: bool = CLEAR_CROSS_CRATE; fn position(&self) -> usize { self.file_encoder.position() @@ -106,7 +108,7 @@ impl<'tcx, 'a> ParalegalDecoder<'tcx, 'a> { } impl<'tcx, 'a> TyDecoder for ParalegalDecoder<'tcx, 'a> { - const CLEAR_CROSS_CRATE: bool = true; + const CLEAR_CROSS_CRATE: bool = CLEAR_CROSS_CRATE; type I = TyCtxt<'tcx>; From fbf01f5fe8defc7f33f609246383ca96350d0eed Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 15 May 2024 17:02:33 -0700 Subject: [PATCH 21/95] Fix generics tracking --- .../src/async_support.rs | 19 +++++---- .../src/construct.rs | 24 ++++------- .../paralegal-flow/src/ana/graph_converter.rs | 2 +- crates/paralegal-flow/src/ana/mod.rs | 41 +++++++++++++++---- 4 files changed, 54 insertions(+), 32 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index e74f228000..ef97e78ba9 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -1,7 +1,7 @@ use std::rc::Rc; use either::Either; -use flowistry_pdg::GlobalLocation; +use flowistry_pdg::{CallString, GlobalLocation}; use itertools::Itertools; use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::{DefId, LocalDefId}; @@ -190,14 +190,17 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { determine_async(self.tcx(), self.def_id, &self.body)?; let g = self.memo.construct_for(generator_fn)?; - let mut new_g = push_call_string_root( - g, - GlobalLocation { - function: self.def_id.to_def_id(), - location: flowistry_pdg::RichLocation::Location(location), - }, - ); + let gloc = GlobalLocation { + function: self.def_id.to_def_id(), + location: flowistry_pdg::RichLocation::Location(location), + }; + let mut new_g = push_call_string_root(g, gloc); + let g_generics = std::mem::replace(&mut new_g.graph.generics, self.generic_args()); new_g.graph.asyncness = asyncness; + new_g + .graph + .monos + .insert(CallString::single(gloc), g_generics); Some(new_g) } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index d284abbae8..1115d5b7c5 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -359,8 +359,6 @@ impl<'tcx> PartialGraph<'tcx> { function: constructor.def_id.to_def_id(), }; - let extend_node = |dep: &DepNode<'tcx>| push_call_string_root(dep, gloc); - let (child_descriptor, calling_convention) = match constructor.determine_call_handling(location, func, args)? { CallHandling::Ready { @@ -400,7 +398,7 @@ impl<'tcx> PartialGraph<'tcx> { graph: child_graph, parentable_srcs, parentable_dsts, - } = &*child_descriptor; + } = push_call_string_root(child_descriptor, gloc); // For each source node CHILD that is parentable to PLACE, // add an edge from PLACE -> CHILD. @@ -420,7 +418,7 @@ impl<'tcx> PartialGraph<'tcx> { Inputs::Unresolved { places: vec![(parent_place, None)], }, - Either::Right(extend_node(child_src)), + Either::Right(child_src), location, TargetUse::Assign, ); @@ -446,7 +444,7 @@ impl<'tcx> PartialGraph<'tcx> { results, state, Inputs::Resolved { - node: extend_node(child_dst), + node: child_dst, node_use: SourceUse::Operand, }, Either::Left(parent_place), @@ -455,15 +453,9 @@ impl<'tcx> PartialGraph<'tcx> { ); } } - self.nodes.extend(child_graph.nodes.iter().map(extend_node)); - self.edges - .extend(child_graph.edges.iter().map(|(n1, n2, e)| { - ( - extend_node(n1), - extend_node(n2), - push_call_string_root(e, gloc), - ) - })); + self.nodes.extend(child_graph.nodes); + self.edges.extend(child_graph.edges); + self.monos.extend(child_graph.monos); Some(()) } @@ -545,7 +537,7 @@ type PdgCache<'tcx> = pub struct GraphConstructor<'tcx, 'a> { pub(crate) memo: &'a MemoPdgConstructor<'tcx>, - root: FnResolution<'tcx>, + pub(super) root: FnResolution<'tcx>, body_with_facts: &'tcx BodyWithBorrowckFacts<'tcx>, pub(crate) body: Cow<'tcx, Body<'tcx>>, pub(crate) def_id: LocalDefId, @@ -1133,7 +1125,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { ) } - fn generic_args(&self) -> GenericArgsRef<'tcx> { + pub(super) fn generic_args(&self) -> GenericArgsRef<'tcx> { match self.root { FnResolution::Final(inst) => inst.args, _ => List::empty(), diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 6a1a76dfd4..3235ba19ad 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -216,7 +216,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .metadata_loader .get_body_info(at.leaf().function) .unwrap(); - let generics = self.generator.metadata_loader.get_mono(at); + let generics = self.generator.metadata_loader.get_mono(at).unwrap(); // So actually we're going to check the base place only, because // Flowistry sometimes tracks subplaces instead but we want the marker diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index f3ee5de6aa..5b836eaff3 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -16,7 +16,7 @@ use crate::{ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; -use anyhow::Result; +use anyhow::{anyhow, Result}; use either::Either; use flowistry_pdg_construction::{ graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, PDGLoader, SubgraphDescriptor, @@ -247,13 +247,40 @@ impl<'tcx> MetadataLoader<'tcx> { meta.bodies.get(&key.index) } - pub fn get_mono(&self, cs: CallString) -> GenericArgsRef<'tcx> { - let get_graph = |key: DefId| &self.get_metadata(key.krate).unwrap().pdgs[&key.index].graph; - let Some(cs) = cs.caller() else { - return get_graph(cs.leaf().function).generics; + pub fn get_mono(&self, cs: CallString) -> Result> { + let get_graph = |key: DefId| { + anyhow::Ok( + &self + .get_metadata(key.krate) + .ok_or_else(|| { + anyhow!("no metadata for crate {}", self.tcx.crate_name(key.krate)) + })? + .pdgs + .get(&key.index) + .ok_or_else(|| anyhow!("no pdg for item {key:?}"))? + .graph, + ) }; - let key = cs.root().function; - get_graph(key).monos[&cs] + if let Some(caller) = cs.caller() { + let key = caller.root().function; + let monos = &get_graph(key)?.monos; + monos + .get(&caller) + .ok_or_else(|| { + anyhow!( + "no generics known for call site {cs} (caller {caller}). Known generics are\n{}", + Print(|fmt| { + for (k, v) in monos { + writeln!(fmt, " {k}: {v:?}")?; + } + Ok(()) + }) + ) + }) + .map(|s| *s) + } else { + Ok(get_graph(cs.leaf().function)?.generics) + } } pub fn get_pdg(&self, key: DefId) -> Option> { From bc90d79add0ff2534308a9e730bcac1992e8fbf0 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 15 May 2024 18:57:04 -0700 Subject: [PATCH 22/95] Found a bug I wasn't looking for --- .../src/construct.rs | 2 +- .../paralegal-flow/src/ana/graph_converter.rs | 5 ++- crates/paralegal-flow/src/ana/mod.rs | 38 ++++++++++++++----- crates/paralegal-flow/src/ann/db.rs | 35 +++++++---------- crates/paralegal-flow/tests/marker_tests.rs | 2 +- 5 files changed, 48 insertions(+), 34 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 1115d5b7c5..683c098b54 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -618,7 +618,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { self.make_call_string(location), self.tcx(), &self.body, - !self.place_info.children(place).is_empty(), + self.place_info.children(place).iter().any(|p| *p != place), ) } diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 3235ba19ad..2355952a1d 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -288,13 +288,16 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let Some(place_ty) = self.determine_place_type(weight.at, weight.place.as_ref()) else { return; }; - let deep = !weight.is_split; + // Restore after fixing https://github.com/brownsys/paralegal/issues/138 + //let deep = !weight.is_split; + let deep = true; let mut node_types = self.type_is_marked(place_ty, deep).collect::>(); for (p, _) in weight.place.iter_projections() { if let Some(place_ty) = self.determine_place_type(weight.at, p) { node_types.extend(self.type_is_marked(place_ty, false)); } } + println!(" found marked types {node_types:?}"); self.known_def_ids.extend(node_types.iter().copied()); let tcx = self.tcx(); if !node_types.is_empty() { diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 5b836eaff3..05f039845d 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -17,7 +17,6 @@ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use anyhow::{anyhow, Result}; -use either::Either; use flowistry_pdg_construction::{ graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, PDGLoader, SubgraphDescriptor, }; @@ -112,6 +111,31 @@ impl<'tcx> MetadataLoader<'tcx> { })() .unwrap_or(&[]) } + + pub fn all_annotations<'a>(&'a self) -> impl Iterator { + let b = self.cache.borrow(); + + // Safety: While we're keeping references to the borrow above, we only + // keep references to values behind `Pin>` which are guaranteed + // not to move. So even if the borrow is modified, these references are + // still valid. + // + // In terms of race conditions: this is a cache which never overwrites values. + let metadatas = unsafe { + std::mem::transmute::< + Vec<(CrateNum, &_)>, + Vec<(CrateNum, &'a HashMap>)>, + >( + b.iter() + .filter_map(|(k, v)| Some((*k, &(**(v.as_ref()?)).as_ref()?.local_annotations))) + .collect::>(), + ) + }; + metadatas.into_iter().flat_map(|(krate, m)| { + m.iter() + .flat_map(move |(&index, v)| v.iter().map(move |v| (DefId { krate, index }, v))) + }) + } } #[derive(Clone, Debug, TyEncodable, TyDecodable)] @@ -543,7 +567,7 @@ impl<'tcx> SPDGGenerator<'tcx> { marker_annotation_count: self .marker_ctx() .all_annotations() - .filter_map(|m| m.1.either(Annotation::as_marker, Some)) + .filter(|m| m.1.as_marker().is_some()) .count() as u32, dedup_locs, dedup_functions, @@ -618,16 +642,12 @@ impl<'tcx> SPDGGenerator<'tcx> { .fold_with( |id, _| (format!("{id:?}"), vec![], vec![]), |mut desc, _, ann| { - match ann { - Either::Right(MarkerAnnotation { refinement, marker }) - | Either::Left(Annotation::Marker(MarkerAnnotation { - refinement, - marker, - })) => { + match ann.as_ref() { + Annotation::Marker(MarkerAnnotation { refinement, marker }) => { assert!(refinement.on_self()); desc.2.push(*marker) } - Either::Left(Annotation::OType(id)) => desc.1.push(*id), + Annotation::OType(id) => desc.1.push(*id), _ => panic!("Unexpected type of annotation {ann:?}"), } desc diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index e3e7ce8bd9..b1c5c4ee7a 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -27,7 +27,7 @@ use rustc_hir::def::DefKind; use rustc_middle::{mir, ty}; use rustc_utils::cache::Cache; -use std::rc::Rc; +use std::{borrow::Cow, rc::Rc}; type ExternalMarkers = HashMap>; @@ -66,10 +66,11 @@ impl<'tcx> MarkerCtx<'tcx> { /// /// Query is cached. fn attribute_annotations(&self, key: DefId) -> &[Annotation] { - if key.is_local() { + let key = self.defid_rewrite(key); + if let Some(local) = key.as_local() { self.db() .local_annotations - .get(&self.defid_rewrite(key).expect_local()) + .get(&local) .map_or(&[], Vec::as_slice) } else { self.0.loader.get_annotations(key) @@ -135,18 +136,6 @@ impl<'tcx> MarkerCtx<'tcx> { self.is_attribute_marked(did) || self.is_externally_marked(did) } - /// Return a complete set of local annotations that were discovered. - /// - /// Crucially this is a "readout" from the marker cache, which means only - /// items reachable from the `paralegal_flow::analyze` will end up in this collection. - pub fn local_annotations_found(&self) -> Vec<(LocalDefId, &[Annotation])> { - self.db() - .local_annotations - .iter() - .map(|(k, v)| (*k, (v.as_slice()))) - .collect() - } - /// Direct access to the loaded database of external markers. #[inline] pub fn external_annotations(&self) -> &ExternalMarkers { @@ -429,22 +418,24 @@ impl<'tcx> MarkerCtx<'tcx> { } /// Iterate over all discovered annotations, whether local or external - pub fn all_annotations( - &self, - ) -> impl Iterator)> { + pub fn all_annotations(&self) -> impl Iterator)> { self.0 .local_annotations .iter() .flat_map(|(&id, anns)| { anns.iter() - .map(move |ann| (id.to_def_id(), Either::Left(ann))) + .map(move |ann| (id.to_def_id(), Cow::Borrowed(ann))) }) .chain( self.0 - .external_annotations - .iter() - .flat_map(|(&id, anns)| anns.iter().map(move |ann| (id, Either::Right(ann)))), + .loader + .all_annotations() + .map(|(it, ann)| (it, Cow::Borrowed(ann))), ) + .chain(self.0.external_annotations.iter().flat_map(|(&id, anns)| { + anns.iter() + .map(move |ann| (id, Cow::Owned(Annotation::Marker(ann.clone())))) + })) } pub fn functions_seen(&self) -> Vec> { diff --git a/crates/paralegal-flow/tests/marker_tests.rs b/crates/paralegal-flow/tests/marker_tests.rs index 8ae77b08f9..cf88b55af1 100644 --- a/crates/paralegal-flow/tests/marker_tests.rs +++ b/crates/paralegal-flow/tests/marker_tests.rs @@ -26,7 +26,7 @@ define_test!(use_wrapper: ctrl -> { let cs = ctrl.call_site(&uwf); println!("{:?}", &ctrl.graph().desc.type_info); let tp = cs.output().as_singles().any(|n| - dbg!(ctrl.types_for(n.node())).iter().any(|t| + dbg!(ctrl.types_for(dbg!(n.node()))).iter().any(|t| ctrl.graph().desc.type_info[t].rendering.contains("::Wrapper") ) ); From 1671c93dcfad6254adf754072c69225b3f798868 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 16 May 2024 11:49:58 -0700 Subject: [PATCH 23/95] WIP fixing generics recording --- .../src/async_support.rs | 4 +- .../src/construct.rs | 52 +++++++++---------- .../flowistry_pdg_construction/src/graph.rs | 2 +- .../paralegal-flow/src/ana/graph_converter.rs | 29 ++++++----- crates/paralegal-flow/src/ana/mod.rs | 4 ++ 5 files changed, 47 insertions(+), 44 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index ef97e78ba9..b5bf9db8bc 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -195,12 +195,12 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { location: flowistry_pdg::RichLocation::Location(location), }; let mut new_g = push_call_string_root(g, gloc); - let g_generics = std::mem::replace(&mut new_g.graph.generics, self.generic_args()); + //let g_generics = std::mem::replace(&mut new_g.graph.generics, self.generic_args()); new_g.graph.asyncness = asyncness; new_g .graph .monos - .insert(CallString::single(gloc), g_generics); + .insert(CallString::single(gloc), new_g.graph.generics); Some(new_g) } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 683c098b54..d5f6665d45 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -364,11 +364,8 @@ impl<'tcx> PartialGraph<'tcx> { CallHandling::Ready { calling_convention, descriptor, - generic_args, - } => { - self.monos.insert(CallString::single(gloc), generic_args); - (descriptor, calling_convention) - } + generic_args: _, + } => (descriptor, calling_convention), CallHandling::ApproxAsyncFn => { // Register a synthetic assignment of `future = (arg0, arg1, ...)`. let rvalue = Rvalue::Aggregate( @@ -456,6 +453,8 @@ impl<'tcx> PartialGraph<'tcx> { self.nodes.extend(child_graph.nodes); self.edges.extend(child_graph.edges); self.monos.extend(child_graph.monos); + self.monos + .insert(CallString::single(gloc), child_descriptor.graph.generics); Some(()) } @@ -1175,30 +1174,27 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { analysis.visit_reachable_with(&self.body, &mut final_state); let all_returns = self.body.all_returns().map(|ret| ret.block).collect_vec(); - let has_return = !all_returns.is_empty(); let mut analysis = analysis.into_results_cursor(&self.body); - if has_return { - for block in all_returns { - analysis.seek_to_block_end(block); - let return_state = analysis.get(); - for (place, locations) in &return_state.last_mutation { - let ret_kind = if place.local == RETURN_PLACE { - TargetUse::Return - } else if let Some(num) = other_as_arg(*place, &self.body) { - TargetUse::MutArg(num) - } else { - continue; - }; - for location in locations { - let src = self.make_dep_node(*place, *location); - let dst = self.make_dep_node(*place, RichLocation::End); - let edge = DepEdge::data( - self.make_call_string(self.body.terminator_loc(block)), - SourceUse::Operand, - ret_kind, - ); - final_state.edges.insert((src, dst, edge)); - } + for block in all_returns { + analysis.seek_to_block_end(block); + let return_state = analysis.get(); + for (place, locations) in &return_state.last_mutation { + let ret_kind = if place.local == RETURN_PLACE { + TargetUse::Return + } else if let Some(num) = other_as_arg(*place, &self.body) { + TargetUse::MutArg(num) + } else { + continue; + }; + for location in locations { + let src = self.make_dep_node(*place, *location); + let dst = self.make_dep_node(*place, RichLocation::End); + let edge = DepEdge::data( + self.make_call_string(self.body.terminator_loc(block)), + SourceUse::Operand, + ret_kind, + ); + final_state.edges.insert((src, dst, edge)); } } } diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index f39bcc4dc7..195177f791 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -13,7 +13,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ mir::{Body, Place}, - ty::{GenericArgsRef, TyCtxt}, + ty::{GenericArgsRef, Ty, TyCtxt}, }; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_span::Span; diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 2355952a1d..df3d5f13ec 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -6,7 +6,7 @@ use flowistry_pdg::SourceUse; use paralegal_spdg::{Node, SPDGStats}; use rustc_hir::{def, def_id::LocalDefId}; use rustc_middle::{ - mir::{self, Location}, + mir::{self, tcx::PlaceTy, Location}, ty::{self, Instance, TyCtxt}, }; @@ -236,19 +236,23 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { }; let raw_ty = place.ty(body, tcx); - Some( - *FnResolution::Final( - Instance::resolve( - tcx, - ty::ParamEnv::reveal_all(), - at.leaf().function, - generics, - ) - .unwrap() - .unwrap(), + let function = at.leaf().function; + // println!( + // "Resolving {raw_ty:?} for place {place:?} with generics {generics:?} in {function:?}", + // ); + let resolution = *FnResolution::Final( + Instance::resolve( + tcx, + tcx.param_env_reveal_all_normalized(function), + function, + generics, ) - .try_monomorphize(tcx, ty::ParamEnv::reveal_all(), &raw_ty), + .unwrap() + .unwrap(), ) + .try_monomorphize(tcx, ty::ParamEnv::reveal_all(), &raw_ty); + //println!("Resolved to {resolution:?}"); + Some(resolution) } /// Fetch annotations item identified by this `id`. @@ -297,7 +301,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { node_types.extend(self.type_is_marked(place_ty, false)); } } - println!(" found marked types {node_types:?}"); self.known_def_ids.extend(node_types.iter().copied()); let tcx = self.tcx(); if !node_types.is_empty() { diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 05f039845d..a68b0ad224 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -288,6 +288,10 @@ impl<'tcx> MetadataLoader<'tcx> { if let Some(caller) = cs.caller() { let key = caller.root().function; let monos = &get_graph(key)?.monos; + // println!("Known monos for {key:?} are"); + // for (k, v) in monos { + // println!(" {k}: {v:?}"); + // } monos .get(&caller) .ok_or_else(|| { From 721d5febe882ac78e11fe761ccb23cc69238d743 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 16 May 2024 13:00:00 -0700 Subject: [PATCH 24/95] Fix return place detection --- crates/paralegal-flow/src/ana/graph_converter.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index df3d5f13ec..3c76abfbbd 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -174,10 +174,12 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // this function call be affected/modified by this call? If // so, that location would also need to have this marker // attached - let needs_return_markers = graph - .graph - .edges_directed(old_node, Direction::Incoming) - .any(|e| { + // + // Also yikes. This should have better detection of whether + // a place is (part of) a function return + let mut in_edges = graph.graph.edges_directed(old_node, Direction::Incoming); + let needs_return_markers = in_edges.clone().next().is_none() + || in_edges.any(|e| { let at = e.weight().at; #[cfg(debug_assertions)] assert_edge_location_invariant(self.tcx(), at, body, weight.at); From ce5d30bc268f4d260944bb6c29efbc0af04f2bc8 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 16 May 2024 13:00:36 -0700 Subject: [PATCH 25/95] Schedule analysis targets for construction, use manufactured type arguments --- .../src/construct.rs | 16 ++- crates/paralegal-flow/src/ana/mod.rs | 111 +++++++++++------- crates/paralegal-flow/src/discover.rs | 4 +- 3 files changed, 83 insertions(+), 48 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index d5f6665d45..566524386b 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -98,7 +98,21 @@ impl<'tcx> MemoPdgConstructor<'tcx> { self } - pub fn construct_for<'a>( + pub fn construct_root<'a>( + &'a self, + function: LocalDefId, + ) -> Option<&'a SubgraphDescriptor<'tcx>> { + let generics = manufacture_substs_for(self.tcx, function.to_def_id()).unwrap(); + let resolution = try_resolve_function( + self.tcx, + function.to_def_id(), + self.tcx.param_env_reveal_all_normalized(function), + generics, + ); + self.construct_for(resolution) + } + + pub(crate) fn construct_for<'a>( &'a self, resolution: FnResolution<'tcx>, ) -> Option<&'a SubgraphDescriptor<'tcx>> { diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index a68b0ad224..c774d5274e 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -16,7 +16,6 @@ use crate::{ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; -use anyhow::{anyhow, Result}; use flowistry_pdg_construction::{ graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, PDGLoader, SubgraphDescriptor, }; @@ -35,7 +34,7 @@ use rustc_middle::{ BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, TerminatorKind, }, - ty::{GenericArgsRef, TyCtxt}, + ty::{tls, GenericArgsRef, TyCtxt}, }; use rustc_serialize::{Decodable, Encodable}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; @@ -44,8 +43,10 @@ mod encoder; mod graph_converter; mod inline_judge; +use anyhow::Result; use graph_converter::GraphConverter; use rustc_utils::{cache::Cache, mir::borrowck_facts}; +use thiserror::Error; use self::{ encoder::{ParalegalDecoder, ParalegalEncoder}, @@ -58,9 +59,26 @@ pub struct MetadataLoader<'tcx> { cache: Cache>>, } +#[derive(Debug, Error)] +pub enum MetadataLoaderError { + #[error("no pdg for item {:?}", .0)] + NoPdgForItem(DefId), + #[error("no metadata for crate {}", tls::with(|tcx| tcx.crate_name(*.0)))] + NoMetadataForCrate(CrateNum), + #[error("no generics known for call site {0}")] + NoGenericsKnownForCallSite(CallString), + #[error("no metadata for item {:?} in crate {}", .0, tls::with(|tcx| tcx.crate_name(.0.krate)))] + NoSuchItemInCate(DefId), +} + +use MetadataLoaderError::*; + impl<'tcx> PDGLoader<'tcx> for MetadataLoader<'tcx> { fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { - self.get_metadata(function.krate)?.pdgs.get(&function.index) + self.get_metadata(function.krate) + .ok()? + .pdgs + .get(&function.index) } } @@ -85,10 +103,7 @@ impl<'tcx> MetadataLoader<'tcx> { .map(|t| { ( t.local_def_index, - (*constructor - .construct_for(FnResolution::Partial(t.to_def_id())) - .unwrap()) - .clone(), + (*constructor.construct_root(t).unwrap()).clone(), ) }) .collect::>(); @@ -103,7 +118,8 @@ impl<'tcx> MetadataLoader<'tcx> { pub fn get_annotations(&self, key: DefId) -> &[Annotation] { (|| { Some( - self.get_metadata(key.krate)? + self.get_metadata(key.krate) + .ok()? .local_annotations .get(&key.index)? .as_slice(), @@ -244,8 +260,9 @@ impl<'tcx> MetadataLoader<'tcx> { }) } - pub fn get_metadata(&self, key: CrateNum) -> Option<&Metadata<'tcx>> { - self.cache + pub fn get_metadata(&self, key: CrateNum) -> Result<&Metadata<'tcx>> { + let meta = self + .cache .get(key, |_| { let paths = self.tcx.crate_extern_paths(key); for path in paths { @@ -264,26 +281,42 @@ impl<'tcx> MetadataLoader<'tcx> { None }) .as_ref() + .ok_or(NoMetadataForCrate(key))?; + Ok(meta) } - pub fn get_body_info(&self, key: DefId) -> Option<&BodyInfo<'tcx>> { + pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>> { let meta = self.get_metadata(key.krate)?; - meta.bodies.get(&key.index) + let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); + if res.is_err() { + println!("Known items are"); + for &index in meta.bodies.keys() { + println!( + " {:?}", + DefId { + krate: key.krate, + index + } + ); + } + } + Ok(res?) } pub fn get_mono(&self, cs: CallString) -> Result> { let get_graph = |key: DefId| { - anyhow::Ok( - &self - .get_metadata(key.krate) - .ok_or_else(|| { - anyhow!("no metadata for crate {}", self.tcx.crate_name(key.krate)) - })? - .pdgs - .get(&key.index) - .ok_or_else(|| anyhow!("no pdg for item {key:?}"))? - .graph, - ) + let meta = self.get_metadata(key.krate)?; + println!("Pdgs are known for"); + for &index in meta.pdgs.keys() { + println!( + " {:?}", + DefId { + krate: key.krate, + index + } + ); + } + anyhow::Ok(&meta.pdgs.get(&key.index).ok_or(NoPdgForItem(key))?.graph) }; if let Some(caller) = cs.caller() { let key = caller.root().function; @@ -292,38 +325,26 @@ impl<'tcx> MetadataLoader<'tcx> { // for (k, v) in monos { // println!(" {k}: {v:?}"); // } - monos - .get(&caller) - .ok_or_else(|| { - anyhow!( - "no generics known for call site {cs} (caller {caller}). Known generics are\n{}", - Print(|fmt| { - for (k, v) in monos { - writeln!(fmt, " {k}: {v:?}")?; - } - Ok(()) - }) - ) - }) - .map(|s| *s) + Ok(*monos.get(&caller).ok_or(NoGenericsKnownForCallSite(cs))?) } else { Ok(get_graph(cs.leaf().function)?.generics) } } - pub fn get_pdg(&self, key: DefId) -> Option> { - Some( - self.get_metadata(key.krate)? - .pdgs - .get(&key.index)? - .to_petgraph(), - ) + pub fn get_pdg(&self, key: DefId) -> Result> { + Ok(self + .get_metadata(key.krate)? + .pdgs + .get(&key.index) + .ok_or(NoPdgForItem(key))? + .to_petgraph()) } pub fn get_asyncness(&self, key: DefId) -> Asyncness { (|| { Some( - self.get_metadata(key.krate)? + self.get_metadata(key.krate) + .ok()? .pdgs .get(&key.index)? .graph diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 15e9e20b11..f0a2173d65 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -133,8 +133,8 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { name: *name, def_id: id.to_def_id(), }); - } - if self.tcx.generics_of(id).count() == 0 { + self.emit_target_collector.push(id); + } else if self.tcx.generics_of(id).count() == 0 { self.emit_target_collector.push(id) } } From 4144f716d4f7353f330049903072323e53a9a7d4 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 16 May 2024 15:34:15 -0700 Subject: [PATCH 26/95] Cross crate marker test case --- crates/paralegal-flow/tests/cross-crate.rs | 7 +++++++ .../paralegal-flow/tests/cross-crate/dependency/src/lib.rs | 5 +++++ crates/paralegal-flow/tests/cross-crate/entry/src/main.rs | 7 ++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/crates/paralegal-flow/tests/cross-crate.rs b/crates/paralegal-flow/tests/cross-crate.rs index 4a3de239b9..cfce68ff82 100644 --- a/crates/paralegal-flow/tests/cross-crate.rs +++ b/crates/paralegal-flow/tests/cross-crate.rs @@ -3,6 +3,7 @@ extern crate lazy_static; use paralegal_flow::test_utils::*; +use paralegal_spdg::Identifier; const CRATE_DIR: &str = "tests/cross-crate"; @@ -29,3 +30,9 @@ define_test!(basic : graph -> { assert!(src.output().flows_to_data(&target.input())); assert!(!not_src.output().flows_to_data(&target.input())); }); + +define_test!(basic_marker: graph -> { + + let marker = Identifier::new_intern("mark"); + assert!(dbg!(&graph.spdg().markers).iter().any(|(_, markers)| markers.contains(&marker))) +}); diff --git a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs index 23c99b3c23..12c67474c8 100644 --- a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs +++ b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs @@ -1,3 +1,8 @@ pub fn find_me(a: usize, _b: usize) -> usize { a } + +#[paralegal::marker(mark, return)] +pub fn source() -> usize { + 0 +} diff --git a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs index e545d78973..0d9d42a726 100644 --- a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs +++ b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs @@ -1,6 +1,6 @@ extern crate dependency; -use dependency::find_me; +use dependency::{find_me, source}; #[paralegal::marker(source)] fn src() -> usize { @@ -20,4 +20,9 @@ fn basic() { target(find_me(src(), not_src())) } +#[paralegal::analyze] +fn basic_marker() { + target(source()); +} + fn main() {} From 143ff0f743ac470455cccf3b46fd3cca861e8ff3 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 16 May 2024 15:49:12 -0700 Subject: [PATCH 27/95] WIP default method test case --- .../paralegal-flow/src/ana/graph_converter.rs | 3 +- crates/paralegal-flow/src/ana/mod.rs | 30 +++---------------- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 3c76abfbbd..eca800a2f4 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -218,7 +218,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .metadata_loader .get_body_info(at.leaf().function) .unwrap(); - let generics = self.generator.metadata_loader.get_mono(at).unwrap(); // So actually we're going to check the base place only, because // Flowistry sometimes tracks subplaces instead but we want the marker @@ -242,6 +241,8 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // println!( // "Resolving {raw_ty:?} for place {place:?} with generics {generics:?} in {function:?}", // ); + let generics = self.generator.metadata_loader.get_mono(at).unwrap(); + println!("Determining type fpr place {place:?} at {at} with raw type {raw_ty:?} and generics {generics:?}"); let resolution = *FnResolution::Final( Instance::resolve( tcx, diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index c774d5274e..5189a0cc5d 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -288,43 +288,21 @@ impl<'tcx> MetadataLoader<'tcx> { pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>> { let meta = self.get_metadata(key.krate)?; let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); - if res.is_err() { - println!("Known items are"); - for &index in meta.bodies.keys() { - println!( - " {:?}", - DefId { - krate: key.krate, - index - } - ); - } - } Ok(res?) } pub fn get_mono(&self, cs: CallString) -> Result> { let get_graph = |key: DefId| { let meta = self.get_metadata(key.krate)?; - println!("Pdgs are known for"); - for &index in meta.pdgs.keys() { - println!( - " {:?}", - DefId { - krate: key.krate, - index - } - ); - } anyhow::Ok(&meta.pdgs.get(&key.index).ok_or(NoPdgForItem(key))?.graph) }; if let Some(caller) = cs.caller() { let key = caller.root().function; let monos = &get_graph(key)?.monos; - // println!("Known monos for {key:?} are"); - // for (k, v) in monos { - // println!(" {k}: {v:?}"); - // } + println!("Known monos for {key:?} are"); + for (k, v) in monos { + println!(" {k}: {v:?}"); + } Ok(*monos.get(&caller).ok_or(NoGenericsKnownForCallSite(cs))?) } else { Ok(get_graph(cs.leaf().function)?.generics) From f6889fb67f8bde2cfc43d3a1c69677a427f3b42e Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 16 May 2024 16:14:26 -0700 Subject: [PATCH 28/95] Trialing elimination of FnResolutions --- .../src/async_support.rs | 13 +-- .../src/callback.rs | 11 +-- .../src/construct.rs | 72 +++++++------- crates/flowistry_pdg_construction/src/lib.rs | 8 +- crates/flowistry_pdg_construction/src/meta.rs | 5 +- .../flowistry_pdg_construction/src/utils.rs | 94 ++++--------------- .../paralegal-flow/src/ana/graph_converter.rs | 24 +++-- crates/paralegal-flow/src/ana/mod.rs | 10 +- crates/paralegal-flow/src/ann/db.rs | 32 ++++--- crates/paralegal-flow/src/utils/mod.rs | 76 +++------------ 10 files changed, 117 insertions(+), 228 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index b5bf9db8bc..09334fbac6 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -11,13 +11,13 @@ use rustc_middle::{ AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }, - ty::{GenericArgsRef, TyCtxt}, + ty::{GenericArgsRef, Instance, TyCtxt}, }; use crate::construct::{push_call_string_root, CallKind, SubgraphDescriptor}; use super::construct::GraphConstructor; -use super::utils::{self, FnResolution}; +use super::utils::{self}; #[derive(Debug, Clone, Copy, Decodable, Encodable)] pub enum Asyncness { @@ -162,7 +162,7 @@ pub fn determine_async<'tcx>( tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>, -) -> Option<(FnResolution<'tcx>, Location, Asyncness)> { +) -> Option<(Instance<'tcx>, Location, Asyncness)> { let ((generator_def_id, args, loc), asyncness) = if tcx.asyncness(def_id).is_async() { (get_async_generator(body), Asyncness::AsyncFn) } else { @@ -173,7 +173,7 @@ pub fn determine_async<'tcx>( }; let param_env = tcx.param_env_reveal_all_normalized(def_id); let generator_fn = - utils::try_resolve_function(tcx, generator_def_id.to_def_id(), param_env, args); + utils::try_resolve_function(tcx, generator_def_id.to_def_id(), param_env, args)?; Some((generator_fn, loc, asyncness)) } @@ -226,7 +226,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { fn find_async_args<'b>( &'b self, args: &'b [Operand<'tcx>], - ) -> Result<(FnResolution<'tcx>, Location, Place<'tcx>), String> { + ) -> Result<(Instance<'tcx>, Location, Place<'tcx>), String> { macro_rules! let_assert { ($p:pat = $e:expr, $($arg:tt)*) => { let $p = $e else { @@ -333,7 +333,8 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { op, self.tcx().param_env_reveal_all_normalized(self.def_id), generics, - ); + ) + .ok_or_else(|| "Resolving function failed")?; Ok((resolution, async_fn_call_loc, calling_convention)) } diff --git a/crates/flowistry_pdg_construction/src/callback.rs b/crates/flowistry_pdg_construction/src/callback.rs index 3008f8f6f6..816a43b853 100644 --- a/crates/flowistry_pdg_construction/src/callback.rs +++ b/crates/flowistry_pdg_construction/src/callback.rs @@ -1,17 +1,16 @@ //! CAllbacks to influence graph construction and their supporting types. use flowistry_pdg::{rustc_portable::Location, CallString}; - -use crate::FnResolution; +use rustc_middle::ty::Instance; pub trait CallChangeCallback<'tcx> { fn on_inline(&self, info: CallInfo<'tcx>) -> CallChanges; fn on_inline_miss( &self, - _resolution: FnResolution<'tcx>, + _resolution: Instance<'tcx>, _loc: Location, - _under_analysis: FnResolution<'tcx>, + _under_analysis: Instance<'tcx>, _call_string: Option, _reason: InlineMissReason, ) { @@ -50,11 +49,11 @@ impl Default for CallChanges { /// Information about the function being called. pub struct CallInfo<'tcx> { /// The potentially-monomorphized resolution of the callee. - pub callee: FnResolution<'tcx>, + pub callee: Instance<'tcx>, /// If the callee is an async closure created by an `async fn`, this is the /// `async fn` item. - pub async_parent: Option>, + pub async_parent: Option>, /// The call-stack up to the current call site. pub call_string: CallString, diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 566524386b..b595a18425 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -18,7 +18,7 @@ use rustc_middle::{ visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, }, - ty::{GenericArg, GenericArgsRef, List, TyCtxt, TyKind}, + ty::{GenericArg, GenericArgsRef, Instance, List, TyCtxt, TyKind}, }; use rustc_mir_dataflow::{ self as df, fmt::DebugWithContext, Analysis, AnalysisDomain, Results, ResultsVisitor, @@ -36,7 +36,7 @@ use crate::{ graph::{DepEdge, DepGraph, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, try_resolve_function, - utils::{self, is_non_default_trait_method, manufacture_substs_for, FnResolution}, + utils::{self, is_non_default_trait_method, manufacture_substs_for, try_monomorphize}, Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall, }; @@ -108,18 +108,16 @@ impl<'tcx> MemoPdgConstructor<'tcx> { function.to_def_id(), self.tcx.param_env_reveal_all_normalized(function), generics, - ); + )?; self.construct_for(resolution) } pub(crate) fn construct_for<'a>( &'a self, - resolution: FnResolution<'tcx>, + resolution: Instance<'tcx>, ) -> Option<&'a SubgraphDescriptor<'tcx>> { - let (def_id, generics) = match resolution { - FnResolution::Final(instance) => (instance.def_id(), Some(instance.args)), - FnResolution::Partial(def_id) => (def_id, None), - }; + let def_id = resolution.def_id(); + let generics = resolution.args; if let Some(local) = def_id.as_local() { self.pdg_cache.get_maybe_recursive((local, generics), |_| { let g = GraphConstructor::new(self, resolution).construct_partial(); @@ -131,27 +129,31 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } - pub fn is_in_cache(&self, resolution: FnResolution<'tcx>) -> bool { - let (def_id, generics) = match resolution { - FnResolution::Final(instance) => (instance.def_id(), Some(instance.args)), - FnResolution::Partial(def_id) => (def_id, None), - }; - if let Some(local) = def_id.as_local() { - self.pdg_cache.is_in_cache(&(local, generics)) + pub fn is_in_cache(&self, resolution: Instance<'tcx>) -> bool { + if let Some(local) = resolution.def_id().as_local() { + self.pdg_cache.is_in_cache(&(local, resolution.args)) } else { - self.loader.load(def_id).is_some() + self.loader.load(resolution.def_id()).is_some() } } pub fn construct_graph(&self, function: DefId) -> Result, ErrorGuaranteed> { let args = manufacture_substs_for(self.tcx, function)?; let g = self - .construct_for(try_resolve_function( - self.tcx, - function, - self.tcx.param_env_reveal_all_normalized(function), - args, - )) + .construct_for( + try_resolve_function( + self.tcx, + function, + self.tcx.param_env_reveal_all_normalized(function), + args, + ) + .ok_or_else(|| { + self.tcx.sess.span_err( + self.tcx.def_span(function), + "Could not construct graph for this function", + ) + })?, + ) .unwrap() .to_petgraph(); Ok(g) @@ -545,14 +547,13 @@ impl<'tcx> PartialGraph<'tcx> { } } -type PdgCache<'tcx> = - Rc>), SubgraphDescriptor<'tcx>>>; +type PdgCache<'tcx> = Rc), SubgraphDescriptor<'tcx>>>; pub struct GraphConstructor<'tcx, 'a> { pub(crate) memo: &'a MemoPdgConstructor<'tcx>, - pub(super) root: FnResolution<'tcx>, + pub(super) root: Instance<'tcx>, body_with_facts: &'tcx BodyWithBorrowckFacts<'tcx>, - pub(crate) body: Cow<'tcx, Body<'tcx>>, + pub(crate) body: Body<'tcx>, pub(crate) def_id: LocalDefId, place_info: PlaceInfo<'tcx>, control_dependencies: ControlDependencies, @@ -580,7 +581,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { /// Creates [`GraphConstructor`] for a function resolved as `fn_resolution` in a given `calling_context`. pub(crate) fn new( memo: &'a MemoPdgConstructor<'tcx>, - root: FnResolution<'tcx>, + root: Instance<'tcx>, ) -> GraphConstructor<'tcx, 'a> { let tcx = memo.tcx; let def_id = root.def_id().expect_local(); @@ -590,7 +591,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { // Some(cx) => cx.param_env, // None => ParamEnv::reveal_all(), // }; - let body = root.try_monomorphize(tcx, param_env, &body_with_facts.body); + let body = try_monomorphize(root, tcx, param_env, &body_with_facts.body); if memo.dump_mir { use std::io::Write; @@ -752,7 +753,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { if let Some((PlaceElem::Deref, rest)) = place.projection.split_last() { let mut new_place = place; new_place.projection = self.tcx().mk_place_elems(rest); - if new_place.ty(self.body.as_ref(), self.tcx()).ty.is_box() { + if new_place.ty(&self.body, self.tcx()).ty.is_box() { if new_place.is_indirect() { // TODO might be unsound: We assume that if // there are other indirections in here, @@ -912,9 +913,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { }; let mut operands = IndexVec::new(); operands.push(op.clone()); - let TyKind::Adt(adt_id, generics) = - destination.ty(self.body.as_ref(), self.tcx()).ty.kind() - else { + let TyKind::Adt(adt_id, generics) = destination.ty(&self.body, self.tcx()).ty.kind() else { unreachable!() }; assert_eq!(adt_id.did(), lang_items.pin_type().unwrap()); @@ -939,7 +938,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { // Monomorphize the called function with the known generic_args. let param_env = tcx.param_env_reveal_all_normalized(self.def_id); let resolved_fn = - utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args); + utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args)?; let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); @@ -1139,10 +1138,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { } pub(super) fn generic_args(&self) -> GenericArgsRef<'tcx> { - match self.root { - FnResolution::Final(inst) => inst.args, - _ => List::empty(), - } + self.root.args } fn handle_terminator( @@ -1272,7 +1268,7 @@ pub enum CallKind<'tcx> { /// A call to a function variable, like `fn foo(f: impl Fn()) { f() }` Indirect, /// A poll to an async function, like `f.await`. - AsyncPoll(FnResolution<'tcx>, Location, Place<'tcx>), + AsyncPoll(Instance<'tcx>, Location, Place<'tcx>), } type ApproximationHandler<'tcx, 'a> = fn( diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index ed82073022..65cdcd5bcf 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -15,8 +15,6 @@ extern crate rustc_span; extern crate rustc_target; extern crate rustc_type_ir; -pub use utils::FnResolution; - pub use self::graph::DepGraph; pub use async_support::{determine_async, is_async_trait_fn, match_async_trait_assign, Asyncness}; pub mod callback; @@ -25,8 +23,8 @@ pub use callback::{ CallChangeCallback, CallChangeCallbackFn, CallChanges, CallInfo, InlineMissReason, SkipCall, }; pub use construct::{NoLoader, PDGLoader, SubgraphDescriptor}; -use rustc_middle::ty::TyCtxt; -pub use utils::{is_non_default_trait_method, try_resolve_function}; +use rustc_middle::ty::{Instance, TyCtxt}; +pub use utils::{is_non_default_trait_method, try_monomorphize, try_resolve_function}; mod async_support; mod calling_convention; @@ -37,7 +35,7 @@ mod mutation; mod utils; /// Computes a global program dependence graph (PDG) starting from the root function specified by `def_id`. -pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: FnResolution<'tcx>) -> DepGraph<'tcx> { +pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: Instance<'tcx>) -> DepGraph<'tcx> { let constructor = MemoPdgConstructor::new(tcx, NoLoader); constructor.construct_for(params).unwrap().to_petgraph() } diff --git a/crates/flowistry_pdg_construction/src/meta.rs b/crates/flowistry_pdg_construction/src/meta.rs index b7a8806367..43018b3224 100644 --- a/crates/flowistry_pdg_construction/src/meta.rs +++ b/crates/flowistry_pdg_construction/src/meta.rs @@ -24,10 +24,7 @@ impl MetadataCollector { .map(|t| { ( t.local_def_index, - (*constructor - .construct_for(crate::FnResolution::Partial(t.to_def_id())) - .unwrap()) - .clone(), + (*constructor.construct_root(t).unwrap()).clone(), ) }) .collect::>() diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 33ef11056c..7a3ab1f844 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -19,51 +19,6 @@ use rustc_span::ErrorGuaranteed; use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; -#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, TyDecodable, TyEncodable)] -pub enum FnResolution<'tcx> { - Final(ty::Instance<'tcx>), - Partial(DefId), -} - -impl<'tcx> PartialOrd for FnResolution<'tcx> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl<'tcx> Ord for FnResolution<'tcx> { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - use FnResolution::*; - match (self, other) { - (Final(_), Partial(_)) => std::cmp::Ordering::Greater, - (Partial(_), Final(_)) => std::cmp::Ordering::Less, - (Partial(slf), Partial(otr)) => slf.cmp(otr), - (Final(slf), Final(otr)) => match slf.def.cmp(&otr.def) { - std::cmp::Ordering::Equal => slf.args.cmp(otr.args), - result => result, - }, - } - } -} - -impl<'tcx> FnResolution<'tcx> { - pub fn def_id(self) -> DefId { - match self { - FnResolution::Final(f) => f.def_id(), - FnResolution::Partial(p) => p, - } - } -} - -impl<'tcx> std::fmt::Display for FnResolution<'tcx> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - FnResolution::Final(sub) => std::fmt::Debug::fmt(sub, f), - FnResolution::Partial(p) => std::fmt::Debug::fmt(p, f), - } - } -} - /// Try and normalize the provided generics. /// /// The purpose of this function is to test whether resolving these generics @@ -86,20 +41,14 @@ pub fn try_resolve_function<'tcx>( def_id: DefId, param_env: ParamEnv<'tcx>, args: GenericArgsRef<'tcx>, -) -> FnResolution<'tcx> { +) -> Option> { let param_env = param_env.with_reveal_all_normalized(tcx); - let make_opt = || { - if let Err(e) = test_generics_normalization(tcx, param_env, args) { - debug!("Normalization failed: {e:?}"); - return None; - } - Instance::resolve(tcx, param_env, def_id, args).unwrap() - }; - match make_opt() { - Some(inst) => FnResolution::Final(inst), - None => FnResolution::Partial(def_id), + if let Err(e) = test_generics_normalization(tcx, param_env, args) { + panic!("Normalization failed: {e:?}"); + return None; } + Instance::resolve(tcx, param_env, def_id, args).unwrap() } pub fn is_non_default_trait_method(tcx: TyCtxt, function: DefId) -> Option { @@ -112,25 +61,20 @@ pub fn is_non_default_trait_method(tcx: TyCtxt, function: DefId) -> Option FnResolution<'tcx> { - pub fn try_monomorphize<'a, T>( - self, - tcx: TyCtxt<'tcx>, - param_env: ParamEnv<'tcx>, - t: &'a T, - ) -> Cow<'a, T> - where - T: TypeFoldable> + Clone, - { - match self { - FnResolution::Partial(_) => Cow::Borrowed(t), - FnResolution::Final(inst) => Cow::Owned(inst.subst_mir_and_normalize_erasing_regions( - tcx, - param_env, - EarlyBinder::bind(tcx.erase_regions(t.clone())), - )), - } - } +pub fn try_monomorphize<'tcx, 'a, T>( + inst: Instance<'tcx>, + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, + t: &'a T, +) -> T +where + T: TypeFoldable> + Clone, +{ + inst.subst_mir_and_normalize_erasing_regions( + tcx, + param_env, + EarlyBinder::bind(tcx.erase_regions(t.clone())), + ) } pub fn retype_place<'tcx>( diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index eca800a2f4..ca36c9fe7b 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -19,7 +19,7 @@ use anyhow::{bail, Result}; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, - CallChangeCallback, CallChanges, CallInfo, InlineMissReason, + try_monomorphize, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall::Skip, }; use petgraph::{ @@ -243,17 +243,15 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // ); let generics = self.generator.metadata_loader.get_mono(at).unwrap(); println!("Determining type fpr place {place:?} at {at} with raw type {raw_ty:?} and generics {generics:?}"); - let resolution = *FnResolution::Final( - Instance::resolve( - tcx, - tcx.param_env_reveal_all_normalized(function), - function, - generics, - ) - .unwrap() - .unwrap(), + let instance = Instance::resolve( + tcx, + tcx.param_env_reveal_all_normalized(function), + function, + generics, ) - .try_monomorphize(tcx, ty::ParamEnv::reveal_all(), &raw_ty); + .unwrap() + .unwrap(); + let resolution = try_monomorphize(instance, tcx, ty::ParamEnv::reveal_all(), &raw_ty); //println!("Resolved to {resolution:?}"); Some(resolution) } @@ -530,9 +528,9 @@ impl<'tcx> CallChangeCallback<'tcx> for MyCallback<'tcx> { fn on_inline_miss( &self, - resolution: FnResolution<'tcx>, + resolution: Instance<'tcx>, loc: Location, - parent: FnResolution<'tcx>, + parent: Instance<'tcx>, call_string: Option, reason: InlineMissReason, ) { diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 5189a0cc5d..318bbb657c 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -159,7 +159,7 @@ pub struct Metadata<'tcx> { pub pdgs: FxHashMap>, pub bodies: FxHashMap>, pub local_annotations: HashMap>, - pub reachable_markers: HashMap<(DefIndex, Option>), Box<[InternedString]>>, + pub reachable_markers: HashMap<(DefIndex, GenericArgsRef<'tcx>), Box<[InternedString]>>, } impl<'tcx> Metadata<'tcx> { @@ -237,11 +237,9 @@ impl<'tcx> Metadata<'tcx> { .collect(), reachable_markers: (&*cache_borrow) .iter() - .filter_map(|(k, v)| { - let (id, args) = match k { - FnResolution::Partial(d) => (*d, None), - FnResolution::Final(inst) => (inst.def_id(), Some(inst.args)), - }; + .filter_map(|(inst, v)| { + let id = inst.def_id(); + let args = inst.args; Some(( (id.as_local()?.local_def_index, args), (**(v.as_ref()?)).clone(), diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index b1c5c4ee7a..6204c697b3 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -16,15 +16,18 @@ use crate::{ args::{Args, MarkerControl}, consts, utils::{ - resolve::expect_resolve_string_to_def_id, AsFnAndArgs, FnResolution, FnResolutionExt, - IntoDefId, IntoHirId, MetaItemMatch, TyCtxtExt, TyExt, + resolve::expect_resolve_string_to_def_id, AsFnAndArgs, InstanceExt, IntoDefId, IntoHirId, + MetaItemMatch, TyCtxtExt, TyExt, }, DefId, Either, HashMap, HashSet, LocalDefId, TyCtxt, }; -use flowistry_pdg_construction::{determine_async, graph::InternedString}; +use flowistry_pdg_construction::{determine_async, graph::InternedString, try_monomorphize}; use rustc_ast::Attribute; use rustc_hir::def::DefKind; -use rustc_middle::{mir, ty}; +use rustc_middle::{ + mir, + ty::{self, Instance}, +}; use rustc_utils::cache::Cache; use std::{borrow::Cow, rc::Rc}; @@ -148,16 +151,16 @@ impl<'tcx> MarkerCtx<'tcx> { /// functions called in its body are marked. /// /// XXX Does not take into account reachable type markers - pub fn marker_is_reachable(&self, res: FnResolution<'tcx>) -> bool { + pub fn marker_is_reachable(&self, res: Instance<'tcx>) -> bool { self.is_marked(res.def_id()) || self.has_transitive_reachable_markers(res) } /// Queries the transitive marker cache. - pub fn has_transitive_reachable_markers(&self, res: FnResolution<'tcx>) -> bool { + pub fn has_transitive_reachable_markers(&self, res: Instance<'tcx>) -> bool { !self.get_reachable_markers(res).is_empty() } - pub fn get_reachable_markers(&self, res: FnResolution<'tcx>) -> &[InternedString] { + pub fn get_reachable_markers(&self, res: Instance<'tcx>) -> &[InternedString] { self.db() .reachable_markers .get_maybe_recursive(res, |_| self.compute_reachable_markers(res)) @@ -166,7 +169,7 @@ impl<'tcx> MarkerCtx<'tcx> { fn get_reachable_and_self_markers( &self, - res: FnResolution<'tcx>, + res: Instance<'tcx>, ) -> impl Iterator + '_ { if res.def_id().is_local() { let mut direct_markers = self @@ -191,7 +194,7 @@ impl<'tcx> MarkerCtx<'tcx> { /// If the transitive marker cache did not contain the answer, this is what /// computes it. - fn compute_reachable_markers(&self, res: FnResolution<'tcx>) -> Box<[InternedString]> { + fn compute_reachable_markers(&self, res: Instance<'tcx>) -> Box<[InternedString]> { trace!("Computing reachable markers for {res:?}"); let Some(local) = res.def_id().as_local() else { trace!(" Is not local"); @@ -205,7 +208,8 @@ impl<'tcx> MarkerCtx<'tcx> { trace!(" Cannot find body"); return Box::new([]); }; - let mono_body = res.try_monomorphize( + let mono_body = try_monomorphize( + res, self.tcx(), self.tcx().param_env_reveal_all_normalized(local), &body.body, @@ -252,7 +256,7 @@ impl<'tcx> MarkerCtx<'tcx> { && let ty::TyKind::Generator(closure_fn, substs, _) = self.tcx().type_of(alias.def_id).skip_binder().kind() { trace!(" fits opaque type"); Either::Left(self.get_reachable_and_self_markers( - FnResolution::Final(ty::Instance::expect_resolve(self.tcx(), ty::ParamEnv::reveal_all(), *closure_fn, substs)) + ty::Instance::expect_resolve(self.tcx(), ty::ParamEnv::reveal_all(), *closure_fn, substs) )) } else { Either::Right(std::iter::empty()) @@ -383,7 +387,7 @@ impl<'tcx> MarkerCtx<'tcx> { /// the type that was marked (if any). pub fn all_function_markers<'a>( &'a self, - function: FnResolution<'tcx>, + function: Instance<'tcx>, ) -> impl Iterator, DefId)>)> { // Markers not coming from types, hence the "None" let direct_markers = self @@ -438,7 +442,7 @@ impl<'tcx> MarkerCtx<'tcx> { })) } - pub fn functions_seen(&self) -> Vec> { + pub fn functions_seen(&self) -> Vec> { let cache = self.0.reachable_markers.borrow(); cache.keys().copied().collect::>() } @@ -455,7 +459,7 @@ pub struct MarkerDatabase<'tcx> { pub(crate) local_annotations: HashMap>, external_annotations: ExternalMarkers, /// Cache whether markers are reachable transitively. - pub(crate) reachable_markers: Cache, Box<[InternedString]>>, + pub(crate) reachable_markers: Cache, Box<[InternedString]>>, /// Configuration options config: &'static MarkerControl, type_markers: Cache, Box>, diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index f674ff03df..47130b56c2 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -8,7 +8,7 @@ use smallvec::SmallVec; use crate::{desc::Identifier, rustc_span::ErrorGuaranteed, Either, Symbol, TyCtxt}; -pub use flowistry_pdg_construction::{is_non_default_trait_method, FnResolution}; +pub use flowistry_pdg_construction::is_non_default_trait_method; pub use paralegal_spdg::{ShortHash, TinyBitSet}; use rustc_ast as ast; @@ -23,7 +23,7 @@ use rustc_hir::{ }; use rustc_middle::{ mir::{self, Location, Place, ProjectionElem}, - ty, + ty::{self, Instance}, }; use rustc_span::{symbol::Ident, Span as RustSpan}; @@ -205,7 +205,7 @@ impl<'tcx> DfppBodyExt<'tcx> for mir::Body<'tcx> { } } -pub trait FnResolutionExt<'tcx> { +pub trait InstanceExt<'tcx> { /// Get the most precise type signature we can for this function, erase any /// regions and discharge binders. /// @@ -216,15 +216,15 @@ pub trait FnResolutionExt<'tcx> { fn sig(self, tcx: TyCtxt<'tcx>) -> Result, ErrorGuaranteed>; } -impl<'tcx> FnResolutionExt<'tcx> for FnResolution<'tcx> { +impl<'tcx> InstanceExt<'tcx> for Instance<'tcx> { fn sig(self, tcx: TyCtxt<'tcx>) -> Result, ErrorGuaranteed> { let sess = tcx.sess; let def_id = self.def_id(); let def_span = tcx.def_span(def_id); let fn_kind = FunctionKind::for_def_id(tcx, def_id)?; - let late_bound_sig = match (self, fn_kind) { - (FnResolution::Final(sub), FunctionKind::Generator) => { - let gen = sub.args.as_generator(); + let late_bound_sig = match fn_kind { + FunctionKind::Generator => { + let gen = self.args.as_generator(); ty::Binder::dummy(ty::FnSig { inputs_and_output: tcx.mk_type_list(&[gen.resume_ty(), gen.return_ty()]), c_variadic: false, @@ -232,41 +232,8 @@ impl<'tcx> FnResolutionExt<'tcx> for FnResolution<'tcx> { abi: Abi::Rust, }) } - (FnResolution::Final(sub), FunctionKind::Closure) => sub.args.as_closure().sig(), - (FnResolution::Final(sub), FunctionKind::Plain) => { - sub.ty(tcx, ty::ParamEnv::reveal_all()).fn_sig(tcx) - } - (FnResolution::Partial(_), FunctionKind::Closure) => { - if let Some(local) = def_id.as_local() { - sess.span_warn( - def_span, - "Precise variable instantiation for \ - closure not known, using user type annotation.", - ); - let sig = tcx.closure_user_provided_sig(local); - Ok(sig.value) - } else { - Err(sess.span_err( - def_span, - format!( - "Could not determine type signature for external closure {def_id:?}" - ), - )) - }? - } - (FnResolution::Partial(_), FunctionKind::Generator) => Err(sess.span_err( - def_span, - format!( - "Cannot determine signature of generator {def_id:?} without monomorphization" - ), - ))?, - (FnResolution::Partial(_), FunctionKind::Plain) => { - let sig = tcx.fn_sig(def_id); - sig.no_bound_vars().unwrap_or_else(|| { - sess.span_warn(def_span, format!("Cannot discharge bound variables for {sig:?}, they will not be considered by the analysis")); - sig.skip_binder() - }) - } + FunctionKind::Closure => self.args.as_closure().sig(), + FunctionKind::Plain => self.ty(tcx, ty::ParamEnv::reveal_all()).fn_sig(tcx), }; Ok(tcx .try_normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), late_bound_sig) @@ -345,14 +312,7 @@ pub trait AsFnAndArgs<'tcx> { fn as_instance_and_args( &self, tcx: TyCtxt<'tcx>, - ) -> Result< - ( - FnResolution<'tcx>, - SimplifiedArguments<'tcx>, - mir::Place<'tcx>, - ), - AsFnAndArgsErr<'tcx>, - >; + ) -> Result<(Instance<'tcx>, SimplifiedArguments<'tcx>, mir::Place<'tcx>), AsFnAndArgsErr<'tcx>>; } #[derive(Debug, Error)] @@ -373,14 +333,8 @@ impl<'tcx> AsFnAndArgs<'tcx> for mir::Terminator<'tcx> { fn as_instance_and_args( &self, tcx: TyCtxt<'tcx>, - ) -> Result< - ( - FnResolution<'tcx>, - SimplifiedArguments<'tcx>, - mir::Place<'tcx>, - ), - AsFnAndArgsErr<'tcx>, - > { + ) -> Result<(Instance<'tcx>, SimplifiedArguments<'tcx>, mir::Place<'tcx>), AsFnAndArgsErr<'tcx>> + { let mir::TerminatorKind::Call { func, args, @@ -407,12 +361,12 @@ impl<'tcx> AsFnAndArgs<'tcx> for mir::Terminator<'tcx> { using partial resolution." ), ); - FnResolution::Partial(*defid) + return Err(AsFnAndArgsErr::InstanceResolutionErr); } Ok(_) => ty::Instance::resolve(tcx, ty::ParamEnv::reveal_all(), *defid, gargs) .map_err(|_| AsFnAndArgsErr::InstanceResolutionErr)? - .map_or(FnResolution::Partial(*defid), FnResolution::Final), - }; + .ok_or(AsFnAndArgsErr::InstanceResolutionErr), + }?; Ok(( instance, args.iter().map(|a| a.place()).collect(), From 5fbd6d6e9019bd04956e95b26c11c25d1dfee6d9 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 16 May 2024 23:29:51 -0700 Subject: [PATCH 29/95] Simplifying in search of a bug --- .../src/construct.rs | 24 ++++------ .../flowistry_pdg_construction/tests/pdg.rs | 2 +- .../paralegal-flow/src/ana/graph_converter.rs | 16 +++---- crates/paralegal-flow/src/ana/mod.rs | 2 +- crates/paralegal-flow/src/discover.rs | 21 +++++---- crates/paralegal-flow/src/utils/resolve.rs | 47 +++++++++++++------ 6 files changed, 60 insertions(+), 52 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index b595a18425..b54d81afc9 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -137,24 +137,16 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } - pub fn construct_graph(&self, function: DefId) -> Result, ErrorGuaranteed> { - let args = manufacture_substs_for(self.tcx, function)?; + pub fn construct_graph(&self, function: LocalDefId) -> Result, ErrorGuaranteed> { + let args = manufacture_substs_for(self.tcx, function.to_def_id())?; let g = self - .construct_for( - try_resolve_function( - self.tcx, - function, - self.tcx.param_env_reveal_all_normalized(function), - args, + .construct_root(function) + .ok_or_else(|| { + self.tcx.sess.span_err( + self.tcx.def_span(function), + "Could not construct graph for this function", ) - .ok_or_else(|| { - self.tcx.sess.span_err( - self.tcx.def_span(function), - "Could not construct graph for this function", - ) - })?, - ) - .unwrap() + })? .to_petgraph(); Ok(g) } diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 1300314e66..36309021fc 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -40,7 +40,7 @@ fn pdg( let def_id = get_main(tcx); let mut memo = MemoPdgConstructor::new(tcx, NoLoader); configure(tcx, &mut memo); - let pdg = memo.construct_graph(def_id.to_def_id()).unwrap(); + let pdg = memo.construct_graph(def_id).unwrap(); tests(tcx, pdg) }) } diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index ca36c9fe7b..83fab6f6a6 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -40,7 +40,7 @@ pub struct GraphConverter<'tcx, 'a, C> { /// The flowistry graph we are converting dep_graph: Rc>, /// Same as the ID stored in self.target, but as a local def id - local_def_id: DefId, + local_def_id: LocalDefId, // Mutable fields /// Where we write every [`DefId`] we encounter into. @@ -98,7 +98,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { fn entrypoint_is_async(&self) -> bool { self.generator .metadata_loader - .get_asyncness(self.local_def_id) + .get_asyncness(self.local_def_id.to_def_id()) .is_async() } @@ -319,13 +319,9 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { /// `local_def_id`. fn create_flowistry_graph( generator: &SPDGGenerator<'tcx>, - def_id: DefId, + def_id: LocalDefId, ) -> Result> { - let Ok(pdg) = generator.flowistry_loader.construct_graph(def_id) else { - bail!("Failed to construct the graph"); - }; - - Ok(pdg) + generator.metadata_loader.get_pdg(def_id.to_def_id()) } /// Consume the generator and compile the [`SPDG`]. @@ -334,9 +330,9 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let arguments = self.determine_arguments(); let return_ = self.determine_return(); SPDG { - path: path_for_item(self.local_def_id, self.tcx()), + path: path_for_item(self.local_def_id.to_def_id(), self.tcx()), graph: self.spdg, - id: self.local_def_id, + id: self.local_def_id.to_def_id(), name: Identifier::new(self.target.name()), arguments, markers: self diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 318bbb657c..064a8baae8 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -441,7 +441,7 @@ impl<'tcx> SPDGGenerator<'tcx> { let converter = GraphConverter::new_with_flowistry(self, known_def_ids, target)?; let spdg = converter.make_spdg(); - Ok((local_def_id, spdg)) + Ok((local_def_id.to_def_id(), spdg)) } /// Main analysis driver. Essentially just calls [`Self::handle_target`] diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index f0a2173d65..27a3ee4c4d 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -8,14 +8,14 @@ use std::rc::Rc; use crate::{ana::MetadataLoader, ann::db::MarkerDatabase, consts, utils::*}; use rustc_hir::{ - def_id::{DefId, LocalDefId}, + def_id::LocalDefId, intravisit::{self, FnKind}, BodyId, }; use rustc_middle::{hir::nested_filter::OnlyBodies, ty::TyCtxt}; use rustc_span::{symbol::Ident, Span, Symbol}; -use self::resolve::expect_resolve_string_to_def_id; +use self::resolve::resolve_string_to_def_id; /// Values of this type can be matched against Rust attributes pub type AttrMatchT = Vec; @@ -43,7 +43,7 @@ pub struct CollectingVisitor<'tcx> { /// [`CollectingVisitor::handle_target`]. pub struct FnToAnalyze { pub name: Ident, - pub def_id: DefId, + pub def_id: LocalDefId, } impl FnToAnalyze { @@ -64,15 +64,16 @@ impl<'tcx> CollectingVisitor<'tcx> { .selected_targets() .iter() .filter_map(|path| { - let def_id = expect_resolve_string_to_def_id(tcx, path, opts.relaxed())?; - if !def_id.is_local() { + let def_id = resolve_string_to_def_id(tcx, path).ok()?; + if let Some(local) = def_id.as_local() { + Some(FnToAnalyze { + def_id: local, + name: tcx.opt_item_ident(def_id).unwrap(), + }) + } else { tcx.sess.span_err(tcx.def_span(def_id), "found an external function as analysis target. Analysis targets are required to be local."); return None; } - Some(FnToAnalyze { - def_id, - name: tcx.opt_item_ident(def_id).unwrap(), - }) }) .collect(); Self { @@ -131,7 +132,7 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { if self.should_analyze_function(id) { self.functions_to_analyze.push(FnToAnalyze { name: *name, - def_id: id.to_def_id(), + def_id: id, }); self.emit_target_collector.push(id); } else if self.tcx.generics_of(id).count() == 0 { diff --git a/crates/paralegal-flow/src/utils/resolve.rs b/crates/paralegal-flow/src/utils/resolve.rs index b81931c20d..78ba5669de 100644 --- a/crates/paralegal-flow/src/utils/resolve.rs +++ b/crates/paralegal-flow/src/utils/resolve.rs @@ -10,6 +10,7 @@ use rustc_hir::{ }; use rustc_middle::ty::{self, TyCtxt}; use rustc_span::Symbol; +use thiserror::Error; use ty::{fast_reject::SimplifiedType, FloatTy, IntTy, UintTy}; #[derive(Debug, Clone, Copy)] @@ -18,18 +19,24 @@ pub enum Res { PrimTy(PrimTy), } -#[derive(Clone, Debug)] -pub enum ResolutionError<'a> { +#[derive(Clone, Debug, Error)] +pub enum ResolutionError { + #[error("cannot resolve primitive type {}", .0)] CannotResolvePrimitiveType(Symbol), + #[error("path is empty")] PathIsEmpty, + #[error("could not find child {segment} in {item:?} (which is a {search_space:?})")] CouldNotFindChild { item: DefId, - segment: &'a str, + segment: String, search_space: SearchSpace, }, + #[error("empty start segments")] EmptyStarts, + #[error("non-convertible resolution {:?}", .0)] UnconvertibleRes(def::Res), - CouldNotResolveCrate(&'a str), + #[error("could not resolve crate {}", .0)] + CouldNotResolveCrate(String), } #[derive(Clone, Debug)] @@ -39,7 +46,7 @@ pub enum SearchSpace { } impl Res { - fn from_def_res<'a>(res: def::Res) -> Result> { + fn from_def_res(res: def::Res) -> Result { match res { def::Res::Def(k, i) => Ok(Res::Def(k, i)), def::Res::PrimTy(t) => Ok(Res::PrimTy(t)), @@ -116,13 +123,25 @@ pub fn expect_resolve_string_to_def_id(tcx: TyCtxt, path: &str, relaxed: bool) - } } +pub fn resolve_string_to_def_id(tcx: TyCtxt, path: &str) -> anyhow::Result { + let segment_vec = path.split("::").collect::>(); + + let res = def_path_res(tcx, &segment_vec)?; + match res { + Res::Def(_, did) => Ok(did), + other => { + anyhow::bail!("expected {path} to resolve to an item, got {other:?}") + } + } +} + /// Lifted from `clippy_utils` -pub fn def_path_res<'a>(tcx: TyCtxt, path: &[&'a str]) -> Result> { - fn item_child_by_name<'a>( +pub fn def_path_res(tcx: TyCtxt, path: &[&str]) -> Result { + fn item_child_by_name( tcx: TyCtxt<'_>, def_id: DefId, name: &str, - ) -> Option>> { + ) -> Option> { if let Some(local_id) = def_id.as_local() { local_item_children_by_name(tcx, local_id, name) } else { @@ -130,11 +149,11 @@ pub fn def_path_res<'a>(tcx: TyCtxt, path: &[&'a str]) -> Result( + fn non_local_item_children_by_name( tcx: TyCtxt<'_>, def_id: DefId, name: &str, - ) -> Option>> { + ) -> Option> { match tcx.def_kind(def_id) { DefKind::Mod | DefKind::Enum | DefKind::Trait => tcx .module_children(def_id) @@ -151,11 +170,11 @@ pub fn def_path_res<'a>(tcx: TyCtxt, path: &[&'a str]) -> Result( + fn local_item_children_by_name( tcx: TyCtxt<'_>, local_id: LocalDefId, name: &str, - ) -> Option>> { + ) -> Option> { let hir = tcx.hir(); let root_mod; @@ -235,13 +254,13 @@ pub fn def_path_res<'a>(tcx: TyCtxt, path: &[&'a str]) -> Result Date: Wed, 22 May 2024 15:11:32 -0700 Subject: [PATCH 30/95] Deal with auto traits and `Sized` --- .../flowistry_pdg_construction/src/utils.rs | 102 ++++++++++++------ 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 7a3ab1f844..c06200a093 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -201,6 +201,13 @@ pub fn ty_resolve<'tcx>(ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Ty<'tcx> { } } +/// This function creates dynamic types that satisfy the constraints on the +/// given function. It returns a list of generic arguments that are suitable for +/// calling `Instance::resolve` for this function, guaranteeing that the resolve +/// call does not fail. +/// +/// This is achieved by constructing `dyn` types which assume the constraints of +/// the `where` clause for this function (and any parents). pub fn manufacture_substs_for( tcx: TyCtxt<'_>, function: DefId, @@ -216,8 +223,10 @@ pub fn manufacture_substs_for( trace!("Found generics {generics:?}"); let predicates = tcx.predicates_of(function).instantiate_identity(tcx); trace!("Found predicates {predicates:?}"); + let lang_items = tcx.lang_items(); let types = (0..generics.count()).map(|gidx| { let param = generics.param_at(gidx, tcx); + trace!("Trying param {param:?}"); if let Some(default_val) = param.default_value(tcx) { return Ok(default_val.instantiate_identity()); } @@ -240,46 +249,77 @@ pub fn manufacture_substs_for( }; let param_as_ty = ParamTy::for_def(param); - let constraints = predicates.predicates.iter().filter_map(|clause| { - let pred = if let Some(trait_ref) = clause.as_trait_clause() { - if trait_ref.polarity() != ImplPolarity::Positive { - return None; - }; - let Some(TraitPredicate { trait_ref, .. }) = trait_ref.no_bound_vars() else { - return Some(Err(tcx.sess.span_err( - tcx.def_span(param.def_id), - format!("Trait ref had binder {trait_ref:?}"), - ))); - }; - if !matches!(trait_ref.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { - return None; - }; - Some(ExistentialPredicate::Trait( - ExistentialTraitRef::erase_self_ty(tcx, trait_ref), - )) - } else if let Some(pred) = clause.as_projection_clause() { - let pred = pred.no_bound_vars()?; - if !matches!(pred.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { + let constraints = predicates.predicates.iter().enumerate().rev().filter_map( + |(pidx, clause)| { + trace!(" Trying clause {clause:?}"); + let pred = if let Some(trait_ref) = clause.as_trait_clause() { + trace!(" is trait clause"); + if trait_ref.polarity() != ImplPolarity::Positive { + trace!(" Bailing because it is negative"); + return None; + }; + let Some(TraitPredicate { trait_ref, .. }) = trait_ref.no_bound_vars() else { + return Some(Err(tcx.sess.span_err( + tcx.def_span(param.def_id), + format!("Trait ref had binder {trait_ref:?}"), + ))); + }; + if !matches!(trait_ref.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) + { + trace!(" Bailing because self type is not param type"); + return None; + }; + if Some(trait_ref.def_id) == lang_items.sized_trait() + || tcx.trait_is_auto(trait_ref.def_id) + { + trace!(" bailing because trait is auto trait"); + return None; + } + ExistentialPredicate::Trait(ExistentialTraitRef::erase_self_ty(tcx, trait_ref)) + } else if let Some(pred) = clause.as_projection_clause() { + trace!(" is projection clause"); + let Some(pred) = pred.no_bound_vars() else { + return Some(Err(tcx + .sess + .span_err(predicates.spans[pidx], "Bound vars in predicate"))); + }; + if !matches!(pred.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { + trace!(" Bailing because self type is not param type"); + return None; + }; + ExistentialPredicate::Projection(ExistentialProjection::erase_self_ty( + tcx, pred, + )) + } else { + trace!(" is other clause: ignoring"); return None; }; - Some(ExistentialPredicate::Projection( - ExistentialProjection::erase_self_ty(tcx, pred), - )) - } else { - None - }?; - Some(Ok(Binder::dummy(pred))) - }); + trace!(" Created predicate {pred:?}"); + + Some(Ok(Binder::dummy(pred))) + }, + ); + let mut predicates = constraints.collect::, _>>()?; + trace!(" collected predicates {predicates:?}"); + match predicates.len() { + 0 => predicates.push(Binder::dummy(ExistentialPredicate::Trait(ExistentialTraitRef { def_id: tcx.get_diagnostic_item(rustc_span::sym::Any).expect("The `Any` item is not defined."), args: List::empty() }))), + 1 => (), + _ => + return Err(tcx.sess.span_err(tcx.def_span(function), format!("Could not create dynamic arguments for this function because more than one predicate were required: {predicates:?}"))), + }; + let poly_predicate = tcx.mk_poly_existential_predicates_from_iter(predicates.into_iter()); + trace!(" poly predicate {poly_predicate:?}"); let ty = Ty::new_dynamic( tcx, - tcx.mk_poly_existential_predicates_from_iter(constraints)?, + poly_predicate, Region::new_free(tcx, function, BoundRegionKind::BrAnon(None)), DynKind::Dyn, ); + trace!(" Created a dyn {ty:?}"); Ok(GenericArg::from(ty)) }); - let args = tcx.mk_args_from_iter(types); + let args = tcx.mk_args_from_iter(types)?; trace!("Created args {args:?}"); - args + Ok(args) } From 9ba8a7a4074ae0f92b39af18b0d8b547e66a87be Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 23 May 2024 14:59:41 -0700 Subject: [PATCH 31/95] Refactoring --- .../paralegal-flow/src/ana/graph_converter.rs | 29 ++--- crates/paralegal-flow/src/ana/mod.rs | 108 ++++++++++-------- 2 files changed, 78 insertions(+), 59 deletions(-) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 83fab6f6a6..e3afcefabd 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -6,7 +6,7 @@ use flowistry_pdg::SourceUse; use paralegal_spdg::{Node, SPDGStats}; use rustc_hir::{def, def_id::LocalDefId}; use rustc_middle::{ - mir::{self, tcx::PlaceTy, Location}, + mir::{self, Location}, ty::{self, Instance, TyCtxt}, }; @@ -15,7 +15,7 @@ use std::{cell::RefCell, fmt::Display, rc::Rc}; use super::{ default_index, path_for_item, src_loc_for_span, BodyInfo, RustcInstructionKind, SPDGGenerator, }; -use anyhow::{bail, Result}; +use anyhow::Result; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, @@ -269,20 +269,21 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { function: DefId, mut filter: impl FnMut(&MarkerAnnotation) -> bool, ) { + trace!("Checking annotations for {node:?} on function {function:?}"); let parent = get_parent(self.tcx(), function); let marker_ctx = self.marker_ctx().clone(); - self.register_markers( - node, - marker_ctx - .combined_markers(function) - .chain( - parent - .into_iter() - .flat_map(|parent| marker_ctx.combined_markers(parent)), - ) - .filter(|ann| filter(ann)) - .map(|ann| Identifier::new_intern(ann.marker.as_str())), - ); + let markers = marker_ctx + .combined_markers(function) + .chain( + parent + .into_iter() + .flat_map(|parent| marker_ctx.combined_markers(parent)), + ) + .filter(|ann| filter(ann)) + .map(|ann| Identifier::new_intern(ann.marker.as_str())) + .collect::>(); + trace!("Found markers {markers:?}"); + self.register_markers(node, markers.into_iter()); self.known_def_ids.extend(parent); } diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 064a8baae8..f83e234e6f 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -25,14 +25,14 @@ use petgraph::visit::GraphBase; use rustc_hash::FxHashMap; use rustc_hir::{ def, - def_id::{CrateNum, DefIndex, LOCAL_CRATE}, + def_id::{CrateNum, DefIndex, LocalDefId, LOCAL_CRATE}, }; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ - BasicBlock, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, - TerminatorKind, + BasicBlock, BasicBlockData, Body, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, + Location, Statement, Terminator, TerminatorKind, }, ty::{tls, GenericArgsRef, TyCtxt}, }; @@ -177,53 +177,21 @@ impl<'tcx> Metadata<'tcx> { markers: &MarkerDatabase<'tcx>, ) -> Self { let mut bodies: FxHashMap = Default::default(); - for pdg in pdgs.values().flat_map(|d| { - d.graph + for location in pdgs.values().flat_map(|subgraph| { + subgraph + .graph .nodes .iter() .map(|n| &n.at) - .chain(d.graph.edges.iter().map(|e| &e.2.at)) + .chain(subgraph.graph.edges.iter().map(|e| &e.2.at)) .flat_map(|at| at.iter()) }) { - if let Some(local) = pdg.function.as_local() { - let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, local); - let body = &body_with_facts.body; - bodies - .entry(local.local_def_index) - .or_insert_with(|| BodyInfo { - arg_count: body.arg_count, - decls: body.local_decls().to_owned(), - instructions: body - .basic_blocks - .iter() - .map(|bb| { - let t = bb.terminator(); - bb.statements - .iter() - .map(|s| RustcInstructionInfo { - kind: RustcInstructionKind::Statement, - span: s.source_info.span, - description: format!("{:?}", s.kind).into(), - }) - .chain([RustcInstructionInfo { - kind: if let Ok((id, ..)) = t.as_fn_and_args(tcx) { - RustcInstructionKind::FunctionCall(FunctionCallInfo { - id, - }) - } else if matches!(t.kind, TerminatorKind::SwitchInt { .. }) - { - RustcInstructionKind::SwitchInt - } else { - RustcInstructionKind::Terminator - }, - span: t.source_info.span, - description: format!("{:?}", t.kind).into(), - }]) - .collect() - }) - .collect(), - def_span: tcx.def_span(local), - }); + if let Some(local) = location.function.as_local() { + bodies.entry(local.local_def_index).or_insert_with(|| { + let info = BodyInfo::from_body(tcx, local); + trace!("Created info for body {local:?}\n{info:?}"); + info + }); } } let cache_borrow = markers.reachable_markers.borrow(); @@ -339,6 +307,23 @@ pub struct BodyInfo<'tcx> { pub def_span: rustc_span::Span, } +impl<'tcx> BodyInfo<'tcx> { + pub fn from_body(tcx: TyCtxt<'tcx>, function_id: LocalDefId) -> Self { + let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, function_id); + let body = &body_with_facts.body; + Self { + arg_count: body.arg_count, + decls: body.local_decls().to_owned(), + instructions: body + .basic_blocks + .iter() + .map(|bb| RustcInstructionInfo::from_basic_block(tcx, bb)) + .collect(), + def_span: tcx.def_span(function_id), + } + } +} + #[derive(Clone, Copy, Debug, Encodable, Decodable)] pub struct RustcInstructionInfo { /// Classification of the instruction @@ -349,6 +334,39 @@ pub struct RustcInstructionInfo { pub description: InternedString, } +impl RustcInstructionInfo { + pub fn from_statement(stmt: &Statement) -> Self { + Self { + kind: RustcInstructionKind::Statement, + span: stmt.source_info.span, + description: format!("{:?}", stmt.kind).into(), + } + } + + pub fn from_terminator<'tcx>(tcx: TyCtxt<'tcx>, term: &Terminator<'tcx>) -> Self { + Self { + kind: if let Ok((id, ..)) = term.as_fn_and_args(tcx) { + RustcInstructionKind::FunctionCall(FunctionCallInfo { id }) + } else if matches!(term.kind, TerminatorKind::SwitchInt { .. }) { + RustcInstructionKind::SwitchInt + } else { + RustcInstructionKind::Terminator + }, + span: term.source_info.span, + description: format!("{:?}", term.kind).into(), + } + } + + pub fn from_basic_block<'tcx>(tcx: TyCtxt<'tcx>, bb: &BasicBlockData<'tcx>) -> Vec { + let t = bb.terminator(); + bb.statements + .iter() + .map(Self::from_statement) + .chain([Self::from_terminator(tcx, t)]) + .collect() + } +} + /// The type of instructions we may encounter #[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, Encodable, Decodable)] pub enum RustcInstructionKind { From 9da9167de632eac0ee98d2e73bec431f3e948ae2 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 23 May 2024 15:37:27 -0700 Subject: [PATCH 32/95] Fix entrypoint generics by resolving functions late --- .../src/construct.rs | 10 +- crates/flowistry_pdg_construction/src/lib.rs | 2 +- .../flowistry_pdg_construction/src/utils.rs | 13 +- .../paralegal-flow/src/ana/graph_converter.rs | 13 +- crates/paralegal-flow/src/ana/mod.rs | 146 +++++++----------- crates/paralegal-flow/src/test_utils.rs | 2 +- crates/paralegal-flow/tests/marker_tests.rs | 2 +- crates/paralegal-policy/src/context.rs | 104 ++++++------- crates/paralegal-policy/src/test_utils.rs | 2 +- crates/paralegal-spdg/src/dot.rs | 2 +- crates/paralegal-spdg/src/lib.rs | 21 +-- 11 files changed, 134 insertions(+), 183 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index b54d81afc9..772bb78a7b 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -843,15 +843,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { place.ty(&self.body.local_decls, self.tcx()).ty } }; - let ty = utils::ty_resolve(ty, self.tcx()); - match ty.kind() { - TyKind::FnDef(def_id, generic_args) => Some((*def_id, generic_args)), - TyKind::Generator(def_id, generic_args, _) => Some((*def_id, generic_args)), - ty => { - trace!("Bailing from handle_call because func is literal with type: {ty:?}"); - None - } - } + utils::type_as_fn(self.tcx(), ty) } fn fmt_fn(&self, def_id: DefId) -> String { diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 65cdcd5bcf..0b4ad6bcbd 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -32,7 +32,7 @@ mod construct; pub mod graph; pub mod meta; mod mutation; -mod utils; +pub mod utils; /// Computes a global program dependence graph (PDG) starting from the root function specified by `def_id`. pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: Instance<'tcx>) -> DepGraph<'tcx> { diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index c06200a093..553d7dbf9b 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -46,7 +46,6 @@ pub fn try_resolve_function<'tcx>( if let Err(e) = test_generics_normalization(tcx, param_env, args) { panic!("Normalization failed: {e:?}"); - return None; } Instance::resolve(tcx, param_env, def_id, args).unwrap() } @@ -77,6 +76,18 @@ where ) } +pub fn type_as_fn<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<(DefId, GenericArgsRef<'tcx>)> { + let ty = ty_resolve(ty, tcx); + match ty.kind() { + TyKind::FnDef(def_id, generic_args) => Some((*def_id, generic_args)), + TyKind::Generator(def_id, generic_args, _) => Some((*def_id, generic_args)), + ty => { + trace!("Bailing from handle_call because func is literal with type: {ty:?}"); + None + } + } +} + pub fn retype_place<'tcx>( orig: Place<'tcx>, tcx: TyCtxt<'tcx>, diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index e3afcefabd..27d2ce4b3f 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -143,6 +143,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .metadata_loader .get_body_info(leaf_loc.function) .unwrap(); + let monos = self.generator.metadata_loader.get_mono(weight.at).unwrap(); match leaf_loc.location { RichLocation::Start @@ -166,7 +167,13 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { RichLocation::Location(loc) => { let instruction = body.instruction_at(loc); if let RustcInstructionKind::FunctionCall(f) = instruction.kind { - self.known_def_ids.extend(Some(f.id)); + let f = flowistry_pdg_construction::utils::type_as_fn( + self.tcx(), + f.instantiate(self.tcx(), monos), + ) + .unwrap() + .0; + self.known_def_ids.extend(Some(f)); // Question: Could a function with no input produce an // output that has aliases? E.g. could some place, where the @@ -187,7 +194,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { }); if needs_return_markers { - self.register_annotations_for_function(node, f.id, |ann| { + self.register_annotations_for_function(node, f, |ann| { ann.refinement.on_return() }); } @@ -196,7 +203,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let SourceUse::Argument(arg) = e.weight().source_use else { continue; }; - self.register_annotations_for_function(node, f.id, |ann| { + self.register_annotations_for_function(node, f, |ann| { ann.refinement.on_argument().contains(arg as u32).unwrap() }); } diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index f83e234e6f..08743195dc 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -34,7 +34,7 @@ use rustc_middle::{ BasicBlock, BasicBlockData, Body, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, Statement, Terminator, TerminatorKind, }, - ty::{tls, GenericArgsRef, TyCtxt}, + ty::{tls, EarlyBinder, GenericArgsRef, Ty, TyCtxt}, }; use rustc_serialize::{Decodable, Encodable}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; @@ -303,7 +303,7 @@ impl<'tcx> MetadataLoader<'tcx> { pub struct BodyInfo<'tcx> { pub arg_count: usize, pub decls: IndexVec>, - pub instructions: IndexVec>, + pub instructions: IndexVec>>, pub def_span: rustc_span::Span, } @@ -317,24 +317,24 @@ impl<'tcx> BodyInfo<'tcx> { instructions: body .basic_blocks .iter() - .map(|bb| RustcInstructionInfo::from_basic_block(tcx, bb)) + .map(|bb| RustcInstructionInfo::from_basic_block(tcx, body, bb)) .collect(), def_span: tcx.def_span(function_id), } } } -#[derive(Clone, Copy, Debug, Encodable, Decodable)] -pub struct RustcInstructionInfo { +#[derive(Clone, Copy, Debug, TyEncodable, TyDecodable)] +pub struct RustcInstructionInfo<'tcx> { /// Classification of the instruction - pub kind: RustcInstructionKind, + pub kind: RustcInstructionKind<'tcx>, /// The source code span pub span: rustc_span::Span, /// Textual rendering of the MIR pub description: InternedString, } -impl RustcInstructionInfo { +impl<'tcx> RustcInstructionInfo<'tcx> { pub fn from_statement(stmt: &Statement) -> Self { Self { kind: RustcInstructionKind::Statement, @@ -343,37 +343,54 @@ impl RustcInstructionInfo { } } - pub fn from_terminator<'tcx>(tcx: TyCtxt<'tcx>, term: &Terminator<'tcx>) -> Self { + pub fn from_terminator( + tcx: TyCtxt<'tcx>, + local_decls: &impl HasLocalDecls<'tcx>, + term: &Terminator<'tcx>, + ) -> Self { Self { - kind: if let Ok((id, ..)) = term.as_fn_and_args(tcx) { - RustcInstructionKind::FunctionCall(FunctionCallInfo { id }) - } else if matches!(term.kind, TerminatorKind::SwitchInt { .. }) { - RustcInstructionKind::SwitchInt - } else { - RustcInstructionKind::Terminator + kind: match &term.kind { + TerminatorKind::Call { + func, + args, + destination, + target, + unwind, + call_source, + fn_span, + } => { + let op_ty = func.ty(local_decls, tcx); + RustcInstructionKind::FunctionCall(EarlyBinder::bind(op_ty)) + } + TerminatorKind::SwitchInt { .. } => RustcInstructionKind::SwitchInt, + _ => RustcInstructionKind::Terminator, }, span: term.source_info.span, description: format!("{:?}", term.kind).into(), } } - pub fn from_basic_block<'tcx>(tcx: TyCtxt<'tcx>, bb: &BasicBlockData<'tcx>) -> Vec { + pub fn from_basic_block( + tcx: TyCtxt<'tcx>, + local_decls: &impl HasLocalDecls<'tcx>, + bb: &BasicBlockData<'tcx>, + ) -> Vec { let t = bb.terminator(); bb.statements .iter() .map(Self::from_statement) - .chain([Self::from_terminator(tcx, t)]) + .chain([Self::from_terminator(tcx, local_decls, t)]) .collect() } } /// The type of instructions we may encounter -#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, Encodable, Decodable)] -pub enum RustcInstructionKind { +#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, TyEncodable, TyDecodable)] +pub enum RustcInstructionKind<'tcx> { /// Some type of statement Statement, - /// A function call - FunctionCall(FunctionCallInfo), + /// A function call. The type is guaranteed to be of function type + FunctionCall(EarlyBinder>), /// A basic block terminator Terminator, /// The switch int terminator @@ -393,7 +410,7 @@ impl<'tcx> BodyInfo<'tcx> { } } - pub fn instruction_at(&self, location: Location) -> RustcInstructionInfo { + pub fn instruction_at(&self, location: Location) -> RustcInstructionInfo<'tcx> { self.instructions[location.block][location.statement_index] } @@ -515,32 +532,6 @@ impl<'tcx> SPDGGenerator<'tcx> { let instruction_info = self.collect_instruction_info(&controllers); - let inlined_functions = instruction_info - .keys() - .filter_map(|l| l.function.as_local()) - .collect::>(); - let analyzed_spans = inlined_functions - .iter() - .copied() - // Because we now take the functions seen from the marker context - // this includes functions where the body is not present (e.g. `dyn`) - // so if we fail to retrieve the body in that case it is allowed. - // - // Prefereably in future we would filter what we get from the marker - // context better. - .filter_map(|f| { - let body = match tcx.body_for_def_id(f) { - Ok(b) => Some(b), - Err(BodyResolutionError::IsTraitAssocFn(_)) => None, - Err(e) => panic!("{e:?}"), - }?; - let span = body_span(&body.body); - Some((f, src_loc_for_span(span, tcx))) - }) - .collect::>(); - - known_def_ids.extend(inlined_functions.iter().map(|f| f.to_def_id())); - let type_info = self.collect_type_info(); known_def_ids.extend(type_info.keys()); let def_info = known_def_ids @@ -548,51 +539,12 @@ impl<'tcx> SPDGGenerator<'tcx> { .map(|id| (*id, def_info_for_item(*id, self.marker_ctx(), tcx))) .collect(); - let dedup_locs = analyzed_spans.values().map(Span::line_len).sum(); - let dedup_functions = analyzed_spans.len() as u32; - - let (seen_locs, seen_functions) = if self.opts.anactrl().inlining_depth().is_adaptive() { - let mut total_functions = inlined_functions; - let mctx = self.marker_ctx(); - total_functions.extend( - mctx.functions_seen() - .into_iter() - .map(|f| f.def_id()) - .filter(|f| !mctx.is_marked(f)) - .filter_map(|f| f.as_local()), - ); - let mut seen_functions = 0; - let locs = total_functions - .into_iter() - .filter_map(|f| Some(body_span(&tcx.body_for_def_id(f).ok()?.body))) - .map(|span| { - seen_functions += 1; - let (_, start_line, _, end_line, _) = - tcx.sess.source_map().span_to_location_info(span); - end_line - start_line + 1 - }) - .sum::() as u32; - (locs, seen_functions) - } else { - (dedup_locs, dedup_functions) - }; - type_info_sanity_check(&controllers, &type_info); ProgramDescription { type_info, instruction_info, controllers, def_info, - marker_annotation_count: self - .marker_ctx() - .all_annotations() - .filter(|m| m.1.as_marker().is_some()) - .count() as u32, - dedup_locs, - dedup_functions, - seen_functions, - seen_locs, - analyzed_spans, } } @@ -601,7 +553,7 @@ impl<'tcx> SPDGGenerator<'tcx> { fn collect_instruction_info( &self, controllers: &HashMap, - ) -> HashMap { + ) -> HashMap { let all_instructions = controllers .values() .flat_map(|v| { @@ -610,13 +562,16 @@ impl<'tcx> SPDGGenerator<'tcx> { .map(|n| &n.at) .chain(v.graph.edge_weights().map(|e| &e.at)) }) - .flat_map(|at| at.iter()) .collect::>(); all_instructions .into_iter() - .map(|n| { - let body = self.metadata_loader.get_body_info(n.function).unwrap(); - let (kind, description, span) = match n.location { + .map(|&n| { + let monos = self.metadata_loader.get_mono(n).unwrap(); + let body = self + .metadata_loader + .get_body_info(n.leaf().function) + .unwrap(); + let (kind, description, span) = match n.leaf().location { RichLocation::End => { (InstructionKind::Return, "start".to_owned(), body.def_span) } @@ -629,7 +584,14 @@ impl<'tcx> SPDGGenerator<'tcx> { match instruction.kind { RustcInstructionKind::SwitchInt => InstructionKind::SwitchInt, RustcInstructionKind::FunctionCall(c) => { - InstructionKind::FunctionCall(c) + InstructionKind::FunctionCall(FunctionCallInfo { + id: flowistry_pdg_construction::utils::type_as_fn( + self.tcx, + c.instantiate(self.tcx, monos), + ) + .unwrap() + .0, + }) } RustcInstructionKind::Statement => InstructionKind::Statement, RustcInstructionKind::Terminator => InstructionKind::Terminator, diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 59e82db5ba..a75a9b5d9d 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -343,7 +343,7 @@ impl<'g> CtrlRef<'g> { .chain(self.ctrl.graph.node_weights().map(|info| info.at)) .filter(|m| { instruction_info - .get(&m.leaf()) + .get(&m) .unwrap_or_else(|| { panic!( "Could not find instruction {} in\n{}", diff --git a/crates/paralegal-flow/tests/marker_tests.rs b/crates/paralegal-flow/tests/marker_tests.rs index cf88b55af1..83f6480e2c 100644 --- a/crates/paralegal-flow/tests/marker_tests.rs +++ b/crates/paralegal-flow/tests/marker_tests.rs @@ -41,7 +41,7 @@ define_test!(trait_method_marker: ctrl -> { .iter() .any(|(node, markers)| { let weight = spdg.graph.node_weight(*node).unwrap(); - !matches!(ctrl.graph().desc.instruction_info[&weight.at.leaf()].kind, + !matches!(ctrl.graph().desc.instruction_info[&weight.at].kind, InstructionKind::FunctionCall(fun) if fun.id == method.ident) || markers.contains(&marker) })); diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 18a286f9d6..0082911c3b 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -473,9 +473,7 @@ impl Context { continue; } let w = g.node_weight(n).unwrap(); - if self.desc.instruction_info[&w.at.leaf()] - .kind - .is_function_call() + if self.desc.instruction_info[&w.at].kind.is_function_call() || w.at.leaf().location.is_start() { roots.push(GlobalNode::from_local_node(ctrl_id, n)); @@ -611,55 +609,55 @@ impl Context { node.get_location(self) } - #[doc(hidden)] - pub fn write_analyzed_code( - &self, - mut out: impl Write, - include_signatures: bool, - ) -> std::io::Result<()> { - let ordered_span_set = self - .desc - .analyzed_spans - .values() - .zip(std::iter::repeat(true)) - .chain( - include_signatures - .then(|| { - self.desc - .def_info - .iter() - .filter(|(did, _)| { - !matches!(defid_as_local(**did), Some(local) - if self.desc.analyzed_spans.contains_key(&local) - ) - }) - .map(|(_, i)| (&i.src_info, matches!(i.kind, DefKind::Type))) - }) - .into_iter() - .flatten(), - ) - .collect::>(); - let mut current_file = None; - for (s, is_complete) in ordered_span_set { - if Some(&s.source_file.file_path) != current_file { - writeln!(out, "// {}", s.source_file.file_path)?; - current_file = Some(&s.source_file.file_path); - } - let file = BufReader::new(File::open(&s.source_file.abs_file_path).unwrap()); - for l in file - .lines() - .skip(s.start.line as usize - 1) - .take((s.end.line - s.start.line + 1) as usize) - { - writeln!(out, "{}", l.unwrap()).unwrap() - } - if !is_complete { - writeln!(out, "unreachable!() }}")?; - } - } - - Ok(()) - } + // #[doc(hidden)] + // pub fn write_analyzed_code( + // &self, + // mut out: impl Write, + // include_signatures: bool, + // ) -> std::io::Result<()> { + // let ordered_span_set = self + // .desc + // .analyzed_spans + // .values() + // .zip(std::iter::repeat(true)) + // .chain( + // include_signatures + // .then(|| { + // self.desc + // .def_info + // .iter() + // .filter(|(did, _)| { + // !matches!(defid_as_local(**did), Some(local) + // if self.desc.analyzed_spans.contains_key(&local) + // ) + // }) + // .map(|(_, i)| (&i.src_info, matches!(i.kind, DefKind::Type))) + // }) + // .into_iter() + // .flatten(), + // ) + // .collect::>(); + // let mut current_file = None; + // for (s, is_complete) in ordered_span_set { + // if Some(&s.source_file.file_path) != current_file { + // writeln!(out, "// {}", s.source_file.file_path)?; + // current_file = Some(&s.source_file.file_path); + // } + // let file = BufReader::new(File::open(&s.source_file.abs_file_path).unwrap()); + // for l in file + // .lines() + // .skip(s.start.line as usize - 1) + // .take((s.end.line - s.start.line + 1) as usize) + // { + // writeln!(out, "{}", l.unwrap()).unwrap() + // } + // if !is_complete { + // writeln!(out, "unreachable!() }}")?; + // } + // } + + // Ok(()) + // } } /// Context queries conveniently accessible on nodes @@ -874,7 +872,7 @@ impl NodeExt for GlobalNode { } fn instruction(self, ctx: &Context) -> &InstructionInfo { - &ctx.desc.instruction_info[&self.info(ctx).at.leaf()] + &ctx.desc.instruction_info[&self.info(ctx).at] } fn successors(self, ctx: &Context) -> Box + '_> { diff --git a/crates/paralegal-policy/src/test_utils.rs b/crates/paralegal-policy/src/test_utils.rs index 9f322d6c4a..5797d037aa 100644 --- a/crates/paralegal-policy/src/test_utils.rs +++ b/crates/paralegal-policy/src/test_utils.rs @@ -45,7 +45,7 @@ fn is_at_function_call_with_name( node: SPDGNode, ) -> bool { let weight = ctrl.graph.node_weight(node).unwrap().at; - let instruction = &ctx.desc().instruction_info[&weight.leaf()]; + let instruction = &ctx.desc().instruction_info[&weight]; matches!( instruction.kind, InstructionKind::FunctionCall(call) if diff --git a/crates/paralegal-spdg/src/dot.rs b/crates/paralegal-spdg/src/dot.rs index 754c549da5..d93d0f1770 100644 --- a/crates/paralegal-spdg/src/dot.rs +++ b/crates/paralegal-spdg/src/dot.rs @@ -107,7 +107,7 @@ impl<'a, 'd> dot::Labeller<'a, CallString, GlobalEdge> for DotPrintableProgramDe fn node_label(&'a self, n: &CallString) -> LabelText<'a> { let (ctrl_id, nodes) = &self.call_sites[n]; let ctrl = &self.spdg.controllers[ctrl_id]; - let instruction = &self.spdg.instruction_info[&n.leaf()]; + let instruction = &self.spdg.instruction_info[&n]; let write_label = || { use std::fmt::Write; diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index fe66687ed7..817a64b9a6 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -341,31 +341,12 @@ pub struct ProgramDescription { /// Metadata about the instructions that are executed at all program /// locations we know about. #[serde(with = "serde_map_via_vec")] - pub instruction_info: HashMap, + pub instruction_info: HashMap, #[cfg_attr(not(feature = "rustc"), serde(with = "serde_map_via_vec"))] #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] /// Metadata about the `DefId`s pub def_info: HashMap, - /// How many marker annotations were found - pub marker_annotation_count: u32, - // /// How long rustc ran before out plugin executed - //pub rustc_time: Duration, - /// The number of functions we produced a PDG for - pub dedup_functions: u32, - /// The lines of code corresponding to the functions from - /// [`Self::dedup_functions`]. - pub dedup_locs: u32, - /// The number of functions we produced PDGs for or we inspected to check - /// for markers. - pub seen_functions: u32, - /// The lines of code corresponding to the functions from - /// [`Self::seen_functions`]. This is the sum of all - /// `analyzed_locs` of the controllers but deduplicated. - pub seen_locs: u32, - #[doc(hidden)] - #[serde(with = "ser_localdefid_map")] - pub analyzed_spans: HashMap, } /// Metadata about a type From d7677660d2fa58b95662de5e8f40a31e1cecdfd8 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 23 May 2024 15:41:32 -0700 Subject: [PATCH 33/95] Fixing warnings --- crates/flowistry_pdg/src/pdg.rs | 4 +-- .../src/async_support.rs | 2 +- .../src/construct.rs | 20 +++++------ .../flowistry_pdg_construction/src/graph.rs | 8 ++--- .../flowistry_pdg_construction/src/utils.rs | 6 ++-- crates/paralegal-flow/src/ana/encoder.rs | 2 +- .../paralegal-flow/src/ana/graph_converter.rs | 2 +- crates/paralegal-flow/src/ana/mod.rs | 33 ++++++++----------- crates/paralegal-flow/src/ann/db.rs | 2 +- crates/paralegal-flow/src/discover.rs | 2 +- crates/paralegal-flow/src/lib.rs | 4 +-- crates/paralegal-flow/src/test_utils.rs | 2 +- crates/paralegal-policy/src/context.rs | 10 ++---- crates/paralegal-spdg/src/lib.rs | 1 + 14 files changed, 42 insertions(+), 56 deletions(-) diff --git a/crates/flowistry_pdg/src/pdg.rs b/crates/flowistry_pdg/src/pdg.rs index ddf170c566..92d568a59c 100644 --- a/crates/flowistry_pdg/src/pdg.rs +++ b/crates/flowistry_pdg/src/pdg.rs @@ -50,7 +50,7 @@ impl Decodable for RichLocation { match d.read_usize() { 0 => Self::Location(Location { block: d.read_u32().into(), - statement_index: d.read_usize().into(), + statement_index: d.read_usize(), }), 1 => Self::Start, 2 => Self::End, @@ -140,7 +140,7 @@ pub struct CallString(Intern); #[cfg(feature = "rustc")] impl Encodable for CallString { fn encode(&self, s: &mut S) { - let inner: &CallStringInner = &*self.0; + let inner: &CallStringInner = &self.0; inner.encode(s); } } diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 09334fbac6..805d9d1efe 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -334,7 +334,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { self.tcx().param_env_reveal_all_normalized(self.def_id), generics, ) - .ok_or_else(|| "Resolving function failed")?; + .ok_or("Resolving function failed")?; Ok((resolution, async_fn_call_loc, calling_convention)) } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 772bb78a7b..5f58ffd5b6 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, collections::HashSet, iter, rc::Rc}; +use std::{collections::HashSet, iter, rc::Rc}; use either::Either; use flowistry::mir::placeinfo::PlaceInfo; @@ -54,13 +54,13 @@ impl<'tcx> PDGLoader<'tcx> for NoLoader { impl<'tcx, T: PDGLoader<'tcx>> PDGLoader<'tcx> for Rc { fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { - (&**self).load(function) + (**self).load(function) } } impl<'tcx, T: PDGLoader<'tcx>> PDGLoader<'tcx> for Box { fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { - (&**self).load(function) + (**self).load(function) } } @@ -138,7 +138,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } pub fn construct_graph(&self, function: LocalDefId) -> Result, ErrorGuaranteed> { - let args = manufacture_substs_for(self.tcx, function.to_def_id())?; + let _args = manufacture_substs_for(self.tcx, function.to_def_id())?; let g = self .construct_root(function) .ok_or_else(|| { @@ -372,7 +372,6 @@ impl<'tcx> PartialGraph<'tcx> { CallHandling::Ready { calling_convention, descriptor, - generic_args: _, } => (descriptor, calling_convention), CallHandling::ApproxAsyncFn => { // Register a synthetic assignment of `future = (arg0, arg1, ...)`. @@ -411,7 +410,7 @@ impl<'tcx> PartialGraph<'tcx> { for (child_src, _kind) in parentable_srcs { if let Some(parent_place) = calling_convention.translate_to_parent( child_src.place, - &constructor.async_info(), + constructor.async_info(), constructor.tcx(), &constructor.body, constructor.def_id.to_def_id(), @@ -439,7 +438,7 @@ impl<'tcx> PartialGraph<'tcx> { for (child_dst, kind) in parentable_dsts { if let Some(parent_place) = calling_convention.translate_to_parent( child_dst.place, - &constructor.async_info(), + constructor.async_info(), constructor.tcx(), &constructor.body, constructor.def_id.to_def_id(), @@ -666,7 +665,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { } fn async_info(&self) -> &AsyncInfo { - &*self.memo.async_info + &self.memo.async_info } fn make_call_string(&self, location: impl Into) -> CallString { @@ -1035,7 +1034,6 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { Some(CallHandling::Ready { descriptor, calling_convention, - generic_args, }) } @@ -1056,7 +1054,6 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let (child_constructor, calling_convention) = match preamble { CallHandling::Ready { descriptor, - generic_args: _, calling_convention, } => (descriptor, calling_convention), CallHandling::ApproxAsyncFn => { @@ -1086,7 +1083,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let translate_to_parent = |child: Place<'tcx>| -> Option> { calling_convention.translate_to_parent( child, - &self.async_info(), + self.async_info(), self.tcx(), parent_body, self.def_id.to_def_id(), @@ -1387,7 +1384,6 @@ enum CallHandling<'tcx, 'a> { Ready { calling_convention: CallingConvention<'tcx, 'a>, descriptor: &'a SubgraphDescriptor<'tcx>, - generic_args: GenericArgsRef<'tcx>, }, ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), } diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 195177f791..9d48ac5df2 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -13,7 +13,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ mir::{Body, Place}, - ty::{GenericArgsRef, Ty, TyCtxt}, + ty::{GenericArgsRef, TyCtxt}, }; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_span::Span; @@ -241,7 +241,7 @@ impl InternedString { } pub fn as_str(&self) -> &str { - &**self.0 + &self.0 } } @@ -261,13 +261,13 @@ impl std::ops::Deref for InternedString { type Target = String; fn deref(&self) -> &Self::Target { - &*self.0 + &self.0 } } impl Encodable for InternedString { fn encode(&self, e: &mut E) { - let s: &String = &*self.0; + let s: &String = &self.0; s.encode(e); } } diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 553d7dbf9b..901f39e9e6 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -1,11 +1,11 @@ -use std::{borrow::Cow, collections::hash_map::Entry, hash::Hash}; +use std::{collections::hash_map::Entry, hash::Hash}; use either::Either; use itertools::Itertools; -use log::{debug, trace}; +use log::trace; use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::DefId; -use rustc_macros::{TyDecodable, TyEncodable}; + use rustc_middle::{ mir::{ tcx::PlaceTy, Body, HasLocalDecls, Local, Location, Place, ProjectionElem, Statement, diff --git a/crates/paralegal-flow/src/ana/encoder.rs b/crates/paralegal-flow/src/ana/encoder.rs index 42018e05df..e82f8fa0a3 100644 --- a/crates/paralegal-flow/src/ana/encoder.rs +++ b/crates/paralegal-flow/src/ana/encoder.rs @@ -41,7 +41,7 @@ impl<'tcx> ParalegalEncoder<'tcx> { const CLEAR_CROSS_CRATE: bool = false; -impl<'a, 'tcx> Encoder for ParalegalEncoder<'tcx> { +impl<'tcx> Encoder for ParalegalEncoder<'tcx> { encoder_methods! { emit_usize(usize); emit_u128(u128); diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 27d2ce4b3f..7925b92c90 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -290,7 +290,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .map(|ann| Identifier::new_intern(ann.marker.as_str())) .collect::>(); trace!("Found markers {markers:?}"); - self.register_markers(node, markers.into_iter()); + self.register_markers(node, markers); self.known_def_ids.extend(parent); } diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 08743195dc..f9f3f0ee3f 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -28,10 +28,10 @@ use rustc_hir::{ def_id::{CrateNum, DefIndex, LocalDefId, LOCAL_CRATE}, }; use rustc_index::IndexVec; -use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; +use rustc_macros::{TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ - BasicBlock, BasicBlockData, Body, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, + BasicBlock, BasicBlockData, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, Statement, Terminator, TerminatorKind, }, ty::{tls, EarlyBinder, GenericArgsRef, Ty, TyCtxt}, @@ -87,7 +87,7 @@ impl<'tcx> MetadataLoader<'tcx> { self: Rc, args: &'static Args, path: impl AsRef, - ) -> (Vec, MarkerCtx<'tcx>, MemoPdgConstructor<'tcx>) { + ) -> (Vec, MarkerCtx<'tcx>) { let tcx = self.tcx; let mut collector = CollectingVisitor::new(tcx, args, self.clone()); collector.run(); @@ -109,10 +109,10 @@ impl<'tcx> MetadataLoader<'tcx> { .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); let path = path.as_ref(); - println!("Writing metadata to {}", path.display()); + debug!("Writing metadata to {}", path.display()); meta.write(path, tcx); self.cache.get(LOCAL_CRATE, |_| Some(meta)); - (collector.functions_to_analyze, marker_ctx, constructor) + (collector.functions_to_analyze, marker_ctx) } pub fn get_annotations(&self, key: DefId) -> &[Annotation] { @@ -203,7 +203,7 @@ impl<'tcx> Metadata<'tcx> { .iter() .map(|(k, v)| (k.local_def_index, v.clone())) .collect(), - reachable_markers: (&*cache_borrow) + reachable_markers: (*cache_borrow) .iter() .filter_map(|(inst, v)| { let id = inst.def_id(); @@ -352,12 +352,12 @@ impl<'tcx> RustcInstructionInfo<'tcx> { kind: match &term.kind { TerminatorKind::Call { func, - args, - destination, - target, - unwind, - call_source, - fn_span, + args: _, + destination: _, + target: _, + unwind: _, + call_source: _, + fn_span: _, } => { let op_ty = func.ty(local_decls, tcx); RustcInstructionKind::FunctionCall(EarlyBinder::bind(op_ty)) @@ -435,7 +435,6 @@ pub struct SPDGGenerator<'tcx> { pub opts: &'static crate::Args, pub tcx: TyCtxt<'tcx>, marker_ctx: MarkerCtx<'tcx>, - flowistry_loader: MemoPdgConstructor<'tcx>, metadata_loader: Rc>, } @@ -444,14 +443,12 @@ impl<'tcx> SPDGGenerator<'tcx> { marker_ctx: MarkerCtx<'tcx>, opts: &'static crate::Args, tcx: TyCtxt<'tcx>, - loader: MemoPdgConstructor<'tcx>, metadata_loader: Rc>, ) -> Self { Self { marker_ctx, opts, tcx, - flowistry_loader: loader, metadata_loader, } } @@ -512,11 +509,7 @@ impl<'tcx> SPDGGenerator<'tcx> { }) }) .collect::>>() - .map(|controllers| { - let desc = self.make_program_description(controllers, known_def_ids, &targets); - - desc - }) + .map(|controllers| self.make_program_description(controllers, known_def_ids, &targets)) } /// Given the PDGs and a record of all [`DefId`]s we've seen, compile diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 6204c697b3..05f4016602 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -96,7 +96,7 @@ impl<'tcx> MarkerCtx<'tcx> { /// Queries are cached/precomputed so calling this repeatedly is cheap. pub fn combined_markers(&self, def_id: DefId) -> impl Iterator { self.attribute_annotations(def_id) - .into_iter() + .iter() .filter_map(Annotation::as_marker) .chain(self.external_markers(def_id).iter()) } diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 27a3ee4c4d..e9f7d7f6b1 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -72,7 +72,7 @@ impl<'tcx> CollectingVisitor<'tcx> { }) } else { tcx.sess.span_err(tcx.def_span(def_id), "found an external function as analysis target. Analysis targets are required to be local."); - return None; + None } }) .collect(); diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index eff0cc24f5..6c642954ab 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -160,12 +160,12 @@ impl Callbacks { let intermediate_out_file = intermediate_out_file_path(tcx)?; - let (analysis_targets, mctx, pdg_constructor) = loader + let (analysis_targets, mctx) = loader .clone() .collect_and_emit_metadata(self.opts, intermediate_out_file); tcx.sess.abort_if_errors(); - let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, pdg_constructor, loader.clone()); + let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, loader); let compilation = if !analysis_targets.is_empty() { let desc = gen.analyze(analysis_targets)?; diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index a75a9b5d9d..a1d2ccd1ad 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -343,7 +343,7 @@ impl<'g> CtrlRef<'g> { .chain(self.ctrl.graph.node_weights().map(|info| info.at)) .filter(|m| { instruction_info - .get(&m) + .get(m) .unwrap_or_else(|| { panic!( "Could not find instruction {} in\n{}", diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 0082911c3b..2b074c004c 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -1,17 +1,13 @@ -use std::collections::BTreeMap; -use std::fs::File; -use std::io::{BufRead, BufReader}; use std::time::{Duration, Instant}; use std::vec; use std::{io::Write, process::exit, sync::Arc}; -use paralegal_spdg::rustc_portable::defid_as_local; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; use paralegal_spdg::traverse::{generic_flows_to, EdgeSelection}; use paralegal_spdg::{ - CallString, DefKind, DisplayNode, Endpoint, GlobalNode, HashMap, HashSet, Identifier, - InstructionInfo, IntoIterGlobalNodes, Node as SPDGNode, NodeCluster, NodeInfo, - ProgramDescription, SPDGImpl, Span, TypeId, SPDG, + CallString, DisplayNode, Endpoint, GlobalNode, HashMap, HashSet, Identifier, InstructionInfo, + IntoIterGlobalNodes, Node as SPDGNode, NodeCluster, NodeInfo, ProgramDescription, SPDGImpl, + Span, TypeId, SPDG, }; use anyhow::{anyhow, bail, Result}; diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 817a64b9a6..32df9a1d6d 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -36,6 +36,7 @@ pub mod utils; use internment::Intern; use itertools::Itertools; +#[cfg(feature = "rustc")] use rustc_macros::{Decodable, Encodable}; use rustc_portable::DefId; use serde::{Deserialize, Serialize}; From 859c499e2ad396f585642a28f794263c80a9d45d Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 23 May 2024 19:56:58 -0700 Subject: [PATCH 34/95] Export mIssing definitions --- crates/paralegal-flow/src/ana/mod.rs | 26 ++++++++++++++++++------- crates/paralegal-flow/src/test_utils.rs | 5 ++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index f9f3f0ee3f..112c0b1962 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -34,7 +34,7 @@ use rustc_middle::{ BasicBlock, BasicBlockData, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, Statement, Terminator, TerminatorKind, }, - ty::{tls, EarlyBinder, GenericArgsRef, Ty, TyCtxt}, + ty::{tls, EarlyBinder, GenericArgsRef, Instance, ParamEnv, Ty, TyCtxt}, }; use rustc_serialize::{Decodable, Encodable}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; @@ -359,7 +359,7 @@ impl<'tcx> RustcInstructionInfo<'tcx> { call_source: _, fn_span: _, } => { - let op_ty = func.ty(local_decls, tcx); + let op_ty = tcx.erase_regions(func.ty(local_decls, tcx)); RustcInstructionKind::FunctionCall(EarlyBinder::bind(op_ty)) } TerminatorKind::SwitchInt { .. } => RustcInstructionKind::SwitchInt, @@ -523,7 +523,7 @@ impl<'tcx> SPDGGenerator<'tcx> { ) -> ProgramDescription { let tcx = self.tcx; - let instruction_info = self.collect_instruction_info(&controllers); + let instruction_info = self.collect_instruction_info(&controllers, &mut known_def_ids); let type_info = self.collect_type_info(); known_def_ids.extend(type_info.keys()); @@ -546,6 +546,7 @@ impl<'tcx> SPDGGenerator<'tcx> { fn collect_instruction_info( &self, controllers: &HashMap, + known_def_ids: &mut impl Extend, ) -> HashMap { let all_instructions = controllers .values() @@ -577,13 +578,24 @@ impl<'tcx> SPDGGenerator<'tcx> { match instruction.kind { RustcInstructionKind::SwitchInt => InstructionKind::SwitchInt, RustcInstructionKind::FunctionCall(c) => { - InstructionKind::FunctionCall(FunctionCallInfo { - id: flowistry_pdg_construction::utils::type_as_fn( + InstructionKind::FunctionCall({ + let (id, generics) = + flowistry_pdg_construction::utils::type_as_fn( + self.tcx, + c.instantiate(self.tcx, monos), + ) + .unwrap(); + let instance_id = Instance::resolve( self.tcx, - c.instantiate(self.tcx, monos), + ParamEnv::reveal_all(), + id, + generics, ) .unwrap() - .0, + .unwrap() + .def_id(); + known_def_ids.extend(Some(instance_id)); + FunctionCallInfo { id: instance_id } }) } RustcInstructionKind::Statement => InstructionKind::Statement, diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index a1d2ccd1ad..6c0bd76380 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -186,7 +186,10 @@ pub trait HasGraph<'g>: Sized + Copy { let name = Identifier::new_intern(name.as_ref()); let id = match self.graph().name_map.get(&name).map(Vec::as_slice) { Some([one]) => *one, - Some([]) | None => panic!("Did not find name {name}"), + Some([]) | None => panic!( + "Did not find name {name}. Known names:\n{:?}", + self.graph().name_map.keys().collect::>() + ), _ => panic!("Found too many function matching name {name}"), }; FnRef { From 1d0d6a5b980d36b77f08d23ab833aaec65d20fe4 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 23 May 2024 20:23:55 -0700 Subject: [PATCH 35/95] Factor out metadata --- .../paralegal-flow/src/ana/graph_converter.rs | 5 +- crates/paralegal-flow/src/ana/metadata.rs | 409 +++++++++++++++++ crates/paralegal-flow/src/ana/mod.rs | 420 +----------------- crates/paralegal-flow/src/lib.rs | 6 +- 4 files changed, 427 insertions(+), 413 deletions(-) create mode 100644 crates/paralegal-flow/src/ana/metadata.rs diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 7925b92c90..5b48386f77 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -13,7 +13,8 @@ use rustc_middle::{ use std::{cell::RefCell, fmt::Display, rc::Rc}; use super::{ - default_index, path_for_item, src_loc_for_span, BodyInfo, RustcInstructionKind, SPDGGenerator, + default_index, metadata::BodyInfo, path_for_item, src_loc_for_span, RustcInstructionKind, + SPDGGenerator, }; use anyhow::Result; use either::Either; @@ -249,7 +250,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // "Resolving {raw_ty:?} for place {place:?} with generics {generics:?} in {function:?}", // ); let generics = self.generator.metadata_loader.get_mono(at).unwrap(); - println!("Determining type fpr place {place:?} at {at} with raw type {raw_ty:?} and generics {generics:?}"); + trace!("Determining type for place {place:?} at {at} with raw type {raw_ty:?} and generics {generics:?}"); let instance = Instance::resolve( tcx, tcx.param_env_reveal_all_normalized(function), diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs new file mode 100644 index 0000000000..c036a07cc1 --- /dev/null +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -0,0 +1,409 @@ +use crate::{ + ann::{db::MarkerDatabase, Annotation}, + consts::INTERMEDIATE_ARTIFACT_EXT, + desc::*, + discover::{CollectingVisitor, FnToAnalyze}, + Args, DefId, HashMap, MarkerCtx, +}; + +use std::path::Path; +use std::{fs::File, io::Read, rc::Rc}; + +use flowistry_pdg_construction::{ + graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, PDGLoader, SubgraphDescriptor, +}; + +use rustc_hash::FxHashMap; +use rustc_hir::def_id::{CrateNum, DefIndex, LocalDefId, LOCAL_CRATE}; +use rustc_index::IndexVec; +use rustc_macros::{TyDecodable, TyEncodable}; +use rustc_middle::{ + mir::{ + BasicBlock, BasicBlockData, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, + Location, Statement, Terminator, TerminatorKind, + }, + ty::{tls, EarlyBinder, GenericArgsRef, Ty, TyCtxt}, +}; +use rustc_serialize::{Decodable, Encodable}; + +use anyhow::Result; +use rustc_utils::{cache::Cache, mir::borrowck_facts}; +use thiserror::Error; + +use super::{ + encoder::{ParalegalDecoder, ParalegalEncoder}, + graph_converter::MyCallback, + inline_judge::InlineJudge, +}; +pub struct MetadataLoader<'tcx> { + tcx: TyCtxt<'tcx>, + cache: Cache>>, +} + +#[derive(Debug, Error)] +pub enum MetadataLoaderError { + #[error("no pdg for item {:?}", .0)] + NoPdgForItem(DefId), + #[error("no metadata for crate {}", tls::with(|tcx| tcx.crate_name(*.0)))] + NoMetadataForCrate(CrateNum), + #[error("no generics known for call site {0}")] + NoGenericsKnownForCallSite(CallString), + #[error("no metadata for item {:?} in crate {}", .0, tls::with(|tcx| tcx.crate_name(.0.krate)))] + NoSuchItemInCate(DefId), +} + +use MetadataLoaderError::*; + +impl<'tcx> PDGLoader<'tcx> for MetadataLoader<'tcx> { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + self.get_metadata(function.krate) + .ok()? + .pdgs + .get(&function.index) + } +} + +impl<'tcx> MetadataLoader<'tcx> { + pub fn collect_and_emit_metadata( + self: Rc, + args: &'static Args, + path: impl AsRef, + ) -> (Vec, MarkerCtx<'tcx>) { + let tcx = self.tcx; + let mut collector = CollectingVisitor::new(tcx, args, self.clone()); + collector.run(); + let emit_targets = collector.emit_target_collector; + let marker_ctx: MarkerCtx = collector.marker_ctx.into(); + let mut constructor = MemoPdgConstructor::new(tcx, self.clone()); + constructor.with_call_change_callback(MyCallback { + tcx, + judge: InlineJudge::new(marker_ctx.clone(), tcx, args.anactrl()), + }); + let pdgs = emit_targets + .into_iter() + .map(|t| { + ( + t.local_def_index, + (*constructor.construct_root(t).unwrap()).clone(), + ) + }) + .collect::>(); + let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); + let path = path.as_ref(); + debug!("Writing metadata to {}", path.display()); + meta.write(path, tcx); + self.cache.get(LOCAL_CRATE, |_| Some(meta)); + (collector.functions_to_analyze, marker_ctx) + } + + pub fn get_annotations(&self, key: DefId) -> &[Annotation] { + (|| { + Some( + self.get_metadata(key.krate) + .ok()? + .local_annotations + .get(&key.index)? + .as_slice(), + ) + })() + .unwrap_or(&[]) + } + + pub fn all_annotations<'a>(&'a self) -> impl Iterator { + let b = self.cache.borrow(); + + // Safety: While we're keeping references to the borrow above, we only + // keep references to values behind `Pin>` which are guaranteed + // not to move. So even if the borrow is modified, these references are + // still valid. + // + // In terms of race conditions: this is a cache which never overwrites values. + let metadatas = unsafe { + std::mem::transmute::< + Vec<(CrateNum, &_)>, + Vec<(CrateNum, &'a HashMap>)>, + >( + b.iter() + .filter_map(|(k, v)| Some((*k, &(**(v.as_ref()?)).as_ref()?.local_annotations))) + .collect::>(), + ) + }; + metadatas.into_iter().flat_map(|(krate, m)| { + m.iter() + .flat_map(move |(&index, v)| v.iter().map(move |v| (DefId { krate, index }, v))) + }) + } +} +#[derive(Clone, Debug, TyEncodable, TyDecodable)] +pub struct Metadata<'tcx> { + pub pdgs: FxHashMap>, + pub bodies: FxHashMap>, + pub local_annotations: HashMap>, + pub reachable_markers: HashMap<(DefIndex, GenericArgsRef<'tcx>), Box<[InternedString]>>, +} + +impl<'tcx> Metadata<'tcx> { + fn write(&self, path: impl AsRef, tcx: TyCtxt<'tcx>) { + let mut encoder = ParalegalEncoder::new(path, tcx); + self.encode(&mut encoder); + encoder.finish() + } +} + +impl<'tcx> Metadata<'tcx> { + pub fn from_pdgs( + tcx: TyCtxt<'tcx>, + pdgs: FxHashMap>, + markers: &MarkerDatabase<'tcx>, + ) -> Self { + let mut bodies: FxHashMap = Default::default(); + for location in pdgs.values().flat_map(|subgraph| { + subgraph + .graph + .nodes + .iter() + .map(|n| &n.at) + .chain(subgraph.graph.edges.iter().map(|e| &e.2.at)) + .flat_map(|at| at.iter()) + }) { + if let Some(local) = location.function.as_local() { + bodies.entry(local.local_def_index).or_insert_with(|| { + let info = BodyInfo::from_body(tcx, local); + trace!("Created info for body {local:?}\n{info:?}"); + info + }); + } + } + let cache_borrow = markers.reachable_markers.borrow(); + Self { + pdgs, + bodies, + local_annotations: markers + .local_annotations + .iter() + .map(|(k, v)| (k.local_def_index, v.clone())) + .collect(), + reachable_markers: (*cache_borrow) + .iter() + .filter_map(|(inst, v)| { + let id = inst.def_id(); + let args = inst.args; + Some(( + (id.as_local()?.local_def_index, args), + (**(v.as_ref()?)).clone(), + )) + }) + .collect(), + } + } +} + +impl<'tcx> MetadataLoader<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> Rc { + Rc::new(Self { + tcx, + cache: Default::default(), + }) + } + + pub fn get_metadata(&self, key: CrateNum) -> Result<&Metadata<'tcx>> { + let meta = self + .cache + .get(key, |_| { + let paths = self.tcx.crate_extern_paths(key); + for path in paths { + let path = path.with_extension(INTERMEDIATE_ARTIFACT_EXT); + println!("Trying to load file {}", path.display()); + let Ok(mut file) = File::open(path) else { + continue; + }; + let mut buf = Vec::new(); + file.read_to_end(&mut buf).unwrap(); + let mut decoder = ParalegalDecoder::new(self.tcx, buf.as_slice()); + let meta = Metadata::decode(&mut decoder); + println!("Successfully loaded"); + return Some(meta); + } + None + }) + .as_ref() + .ok_or(NoMetadataForCrate(key))?; + Ok(meta) + } + + pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>> { + let meta = self.get_metadata(key.krate)?; + let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); + Ok(res?) + } + + pub fn get_mono(&self, cs: CallString) -> Result> { + let get_graph = |key: DefId| { + let meta = self.get_metadata(key.krate)?; + anyhow::Ok(&meta.pdgs.get(&key.index).ok_or(NoPdgForItem(key))?.graph) + }; + if let Some(caller) = cs.caller() { + let key = caller.root().function; + let monos = &get_graph(key)?.monos; + trace!("Known monos for {key:?} are"); + for (k, v) in monos { + trace!(" {k}: {v:?}"); + } + Ok(*monos.get(&caller).ok_or(NoGenericsKnownForCallSite(cs))?) + } else { + Ok(get_graph(cs.leaf().function)?.generics) + } + } + + pub fn get_pdg(&self, key: DefId) -> Result> { + Ok(self + .get_metadata(key.krate)? + .pdgs + .get(&key.index) + .ok_or(NoPdgForItem(key))? + .to_petgraph()) + } + + pub fn get_asyncness(&self, key: DefId) -> Asyncness { + (|| { + Some( + self.get_metadata(key.krate) + .ok()? + .pdgs + .get(&key.index)? + .graph + .asyncness, + ) + })() + .unwrap_or(Asyncness::No) + } +} + +#[derive(Clone, Debug, TyEncodable, TyDecodable)] +pub struct BodyInfo<'tcx> { + pub arg_count: usize, + pub decls: IndexVec>, + pub instructions: IndexVec>>, + pub def_span: rustc_span::Span, +} + +impl<'tcx> BodyInfo<'tcx> { + pub fn from_body(tcx: TyCtxt<'tcx>, function_id: LocalDefId) -> Self { + let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, function_id); + let body = &body_with_facts.body; + Self { + arg_count: body.arg_count, + decls: body.local_decls().to_owned(), + instructions: body + .basic_blocks + .iter() + .map(|bb| RustcInstructionInfo::from_basic_block(tcx, body, bb)) + .collect(), + def_span: tcx.def_span(function_id), + } + } +} + +#[derive(Clone, Copy, Debug, TyEncodable, TyDecodable)] +pub struct RustcInstructionInfo<'tcx> { + /// Classification of the instruction + pub kind: RustcInstructionKind<'tcx>, + /// The source code span + pub span: rustc_span::Span, + /// Textual rendering of the MIR + pub description: InternedString, +} + +impl<'tcx> RustcInstructionInfo<'tcx> { + pub fn from_statement(stmt: &Statement) -> Self { + Self { + kind: RustcInstructionKind::Statement, + span: stmt.source_info.span, + description: format!("{:?}", stmt.kind).into(), + } + } + + pub fn from_terminator( + tcx: TyCtxt<'tcx>, + local_decls: &impl HasLocalDecls<'tcx>, + term: &Terminator<'tcx>, + ) -> Self { + Self { + kind: match &term.kind { + TerminatorKind::Call { + func, + args: _, + destination: _, + target: _, + unwind: _, + call_source: _, + fn_span: _, + } => { + let op_ty = tcx.erase_regions(func.ty(local_decls, tcx)); + RustcInstructionKind::FunctionCall(EarlyBinder::bind(op_ty)) + } + TerminatorKind::SwitchInt { .. } => RustcInstructionKind::SwitchInt, + _ => RustcInstructionKind::Terminator, + }, + span: term.source_info.span, + description: format!("{:?}", term.kind).into(), + } + } + + pub fn from_basic_block( + tcx: TyCtxt<'tcx>, + local_decls: &impl HasLocalDecls<'tcx>, + bb: &BasicBlockData<'tcx>, + ) -> Vec { + let t = bb.terminator(); + bb.statements + .iter() + .map(Self::from_statement) + .chain([Self::from_terminator(tcx, local_decls, t)]) + .collect() + } +} + +/// The type of instructions we may encounter +#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, TyEncodable, TyDecodable)] +pub enum RustcInstructionKind<'tcx> { + /// Some type of statement + Statement, + /// A function call. The type is guaranteed to be of function type + FunctionCall(EarlyBinder>), + /// A basic block terminator + Terminator, + /// The switch int terminator + SwitchInt, +} + +impl<'tcx> BodyInfo<'tcx> { + pub fn local_kind(&self, local: Local) -> LocalKind { + let local = local.as_usize(); + assert!(local < self.decls.len()); + if local == 0 { + LocalKind::ReturnPointer + } else if local < self.arg_count + 1 { + LocalKind::Arg + } else { + LocalKind::Temp + } + } + + pub fn instruction_at(&self, location: Location) -> RustcInstructionInfo<'tcx> { + self.instructions[location.block][location.statement_index] + } + + pub fn span_of(&self, loc: RichLocation) -> rustc_span::Span { + match loc { + RichLocation::Location(loc) => self.instruction_at(loc).span, + _ => self.def_span, + } + } +} + +impl<'tcx> HasLocalDecls<'tcx> for BodyInfo<'tcx> { + fn local_decls(&self) -> &LocalDecls<'tcx> { + &self.decls + } +} diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 112c0b1962..08bb829d18 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -5,428 +5,32 @@ //! [`analyze`](SPDGGenerator::analyze). use crate::{ - ann::{db::MarkerDatabase, Annotation, MarkerAnnotation}, - consts::INTERMEDIATE_ARTIFACT_EXT, + ann::{Annotation, MarkerAnnotation}, desc::*, - discover::{CollectingVisitor, FnToAnalyze}, + discover::FnToAnalyze, utils::*, - Args, DefId, HashMap, HashSet, LogLevelConfig, MarkerCtx, Symbol, + DefId, HashMap, HashSet, LogLevelConfig, MarkerCtx, Symbol, }; -use std::path::Path; -use std::{fs::File, io::Read, rc::Rc}; +use std::rc::Rc; -use flowistry_pdg_construction::{ - graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, PDGLoader, SubgraphDescriptor, -}; +use anyhow::Result; use itertools::Itertools; use petgraph::visit::GraphBase; -use rustc_hash::FxHashMap; -use rustc_hir::{ - def, - def_id::{CrateNum, DefIndex, LocalDefId, LOCAL_CRATE}, -}; -use rustc_index::IndexVec; -use rustc_macros::{TyDecodable, TyEncodable}; -use rustc_middle::{ - mir::{ - BasicBlock, BasicBlockData, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, - Location, Statement, Terminator, TerminatorKind, - }, - ty::{tls, EarlyBinder, GenericArgsRef, Instance, ParamEnv, Ty, TyCtxt}, -}; -use rustc_serialize::{Decodable, Encodable}; +use rustc_hir::def; +use rustc_middle::ty::{Instance, ParamEnv, TyCtxt}; use rustc_span::{FileNameDisplayPreference, Span as RustSpan}; mod encoder; mod graph_converter; mod inline_judge; +mod metadata; -use anyhow::Result; use graph_converter::GraphConverter; -use rustc_utils::{cache::Cache, mir::borrowck_facts}; -use thiserror::Error; - -use self::{ - encoder::{ParalegalDecoder, ParalegalEncoder}, - graph_converter::MyCallback, - inline_judge::InlineJudge, -}; - -pub struct MetadataLoader<'tcx> { - tcx: TyCtxt<'tcx>, - cache: Cache>>, -} - -#[derive(Debug, Error)] -pub enum MetadataLoaderError { - #[error("no pdg for item {:?}", .0)] - NoPdgForItem(DefId), - #[error("no metadata for crate {}", tls::with(|tcx| tcx.crate_name(*.0)))] - NoMetadataForCrate(CrateNum), - #[error("no generics known for call site {0}")] - NoGenericsKnownForCallSite(CallString), - #[error("no metadata for item {:?} in crate {}", .0, tls::with(|tcx| tcx.crate_name(.0.krate)))] - NoSuchItemInCate(DefId), -} - -use MetadataLoaderError::*; - -impl<'tcx> PDGLoader<'tcx> for MetadataLoader<'tcx> { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { - self.get_metadata(function.krate) - .ok()? - .pdgs - .get(&function.index) - } -} - -impl<'tcx> MetadataLoader<'tcx> { - pub fn collect_and_emit_metadata( - self: Rc, - args: &'static Args, - path: impl AsRef, - ) -> (Vec, MarkerCtx<'tcx>) { - let tcx = self.tcx; - let mut collector = CollectingVisitor::new(tcx, args, self.clone()); - collector.run(); - let emit_targets = collector.emit_target_collector; - let marker_ctx: MarkerCtx = collector.marker_ctx.into(); - let mut constructor = MemoPdgConstructor::new(tcx, self.clone()); - constructor.with_call_change_callback(MyCallback { - tcx, - judge: InlineJudge::new(marker_ctx.clone(), tcx, args.anactrl()), - }); - let pdgs = emit_targets - .into_iter() - .map(|t| { - ( - t.local_def_index, - (*constructor.construct_root(t).unwrap()).clone(), - ) - }) - .collect::>(); - let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); - let path = path.as_ref(); - debug!("Writing metadata to {}", path.display()); - meta.write(path, tcx); - self.cache.get(LOCAL_CRATE, |_| Some(meta)); - (collector.functions_to_analyze, marker_ctx) - } - - pub fn get_annotations(&self, key: DefId) -> &[Annotation] { - (|| { - Some( - self.get_metadata(key.krate) - .ok()? - .local_annotations - .get(&key.index)? - .as_slice(), - ) - })() - .unwrap_or(&[]) - } - - pub fn all_annotations<'a>(&'a self) -> impl Iterator { - let b = self.cache.borrow(); +use metadata::RustcInstructionKind; - // Safety: While we're keeping references to the borrow above, we only - // keep references to values behind `Pin>` which are guaranteed - // not to move. So even if the borrow is modified, these references are - // still valid. - // - // In terms of race conditions: this is a cache which never overwrites values. - let metadatas = unsafe { - std::mem::transmute::< - Vec<(CrateNum, &_)>, - Vec<(CrateNum, &'a HashMap>)>, - >( - b.iter() - .filter_map(|(k, v)| Some((*k, &(**(v.as_ref()?)).as_ref()?.local_annotations))) - .collect::>(), - ) - }; - metadatas.into_iter().flat_map(|(krate, m)| { - m.iter() - .flat_map(move |(&index, v)| v.iter().map(move |v| (DefId { krate, index }, v))) - }) - } -} - -#[derive(Clone, Debug, TyEncodable, TyDecodable)] -pub struct Metadata<'tcx> { - pub pdgs: FxHashMap>, - pub bodies: FxHashMap>, - pub local_annotations: HashMap>, - pub reachable_markers: HashMap<(DefIndex, GenericArgsRef<'tcx>), Box<[InternedString]>>, -} - -impl<'tcx> Metadata<'tcx> { - fn write(&self, path: impl AsRef, tcx: TyCtxt<'tcx>) { - let mut encoder = ParalegalEncoder::new(path, tcx); - self.encode(&mut encoder); - encoder.finish() - } -} - -impl<'tcx> Metadata<'tcx> { - pub fn from_pdgs( - tcx: TyCtxt<'tcx>, - pdgs: FxHashMap>, - markers: &MarkerDatabase<'tcx>, - ) -> Self { - let mut bodies: FxHashMap = Default::default(); - for location in pdgs.values().flat_map(|subgraph| { - subgraph - .graph - .nodes - .iter() - .map(|n| &n.at) - .chain(subgraph.graph.edges.iter().map(|e| &e.2.at)) - .flat_map(|at| at.iter()) - }) { - if let Some(local) = location.function.as_local() { - bodies.entry(local.local_def_index).or_insert_with(|| { - let info = BodyInfo::from_body(tcx, local); - trace!("Created info for body {local:?}\n{info:?}"); - info - }); - } - } - let cache_borrow = markers.reachable_markers.borrow(); - Self { - pdgs, - bodies, - local_annotations: markers - .local_annotations - .iter() - .map(|(k, v)| (k.local_def_index, v.clone())) - .collect(), - reachable_markers: (*cache_borrow) - .iter() - .filter_map(|(inst, v)| { - let id = inst.def_id(); - let args = inst.args; - Some(( - (id.as_local()?.local_def_index, args), - (**(v.as_ref()?)).clone(), - )) - }) - .collect(), - } - } -} - -impl<'tcx> MetadataLoader<'tcx> { - pub fn new(tcx: TyCtxt<'tcx>) -> Rc { - Rc::new(Self { - tcx, - cache: Default::default(), - }) - } - - pub fn get_metadata(&self, key: CrateNum) -> Result<&Metadata<'tcx>> { - let meta = self - .cache - .get(key, |_| { - let paths = self.tcx.crate_extern_paths(key); - for path in paths { - let path = path.with_extension(INTERMEDIATE_ARTIFACT_EXT); - println!("Trying to load file {}", path.display()); - let Ok(mut file) = File::open(path) else { - continue; - }; - let mut buf = Vec::new(); - file.read_to_end(&mut buf).unwrap(); - let mut decoder = ParalegalDecoder::new(self.tcx, buf.as_slice()); - let meta = Metadata::decode(&mut decoder); - println!("Successfully loaded"); - return Some(meta); - } - None - }) - .as_ref() - .ok_or(NoMetadataForCrate(key))?; - Ok(meta) - } - - pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>> { - let meta = self.get_metadata(key.krate)?; - let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); - Ok(res?) - } - - pub fn get_mono(&self, cs: CallString) -> Result> { - let get_graph = |key: DefId| { - let meta = self.get_metadata(key.krate)?; - anyhow::Ok(&meta.pdgs.get(&key.index).ok_or(NoPdgForItem(key))?.graph) - }; - if let Some(caller) = cs.caller() { - let key = caller.root().function; - let monos = &get_graph(key)?.monos; - println!("Known monos for {key:?} are"); - for (k, v) in monos { - println!(" {k}: {v:?}"); - } - Ok(*monos.get(&caller).ok_or(NoGenericsKnownForCallSite(cs))?) - } else { - Ok(get_graph(cs.leaf().function)?.generics) - } - } - - pub fn get_pdg(&self, key: DefId) -> Result> { - Ok(self - .get_metadata(key.krate)? - .pdgs - .get(&key.index) - .ok_or(NoPdgForItem(key))? - .to_petgraph()) - } - - pub fn get_asyncness(&self, key: DefId) -> Asyncness { - (|| { - Some( - self.get_metadata(key.krate) - .ok()? - .pdgs - .get(&key.index)? - .graph - .asyncness, - ) - })() - .unwrap_or(Asyncness::No) - } -} - -#[derive(Clone, Debug, TyEncodable, TyDecodable)] -pub struct BodyInfo<'tcx> { - pub arg_count: usize, - pub decls: IndexVec>, - pub instructions: IndexVec>>, - pub def_span: rustc_span::Span, -} - -impl<'tcx> BodyInfo<'tcx> { - pub fn from_body(tcx: TyCtxt<'tcx>, function_id: LocalDefId) -> Self { - let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, function_id); - let body = &body_with_facts.body; - Self { - arg_count: body.arg_count, - decls: body.local_decls().to_owned(), - instructions: body - .basic_blocks - .iter() - .map(|bb| RustcInstructionInfo::from_basic_block(tcx, body, bb)) - .collect(), - def_span: tcx.def_span(function_id), - } - } -} - -#[derive(Clone, Copy, Debug, TyEncodable, TyDecodable)] -pub struct RustcInstructionInfo<'tcx> { - /// Classification of the instruction - pub kind: RustcInstructionKind<'tcx>, - /// The source code span - pub span: rustc_span::Span, - /// Textual rendering of the MIR - pub description: InternedString, -} - -impl<'tcx> RustcInstructionInfo<'tcx> { - pub fn from_statement(stmt: &Statement) -> Self { - Self { - kind: RustcInstructionKind::Statement, - span: stmt.source_info.span, - description: format!("{:?}", stmt.kind).into(), - } - } - - pub fn from_terminator( - tcx: TyCtxt<'tcx>, - local_decls: &impl HasLocalDecls<'tcx>, - term: &Terminator<'tcx>, - ) -> Self { - Self { - kind: match &term.kind { - TerminatorKind::Call { - func, - args: _, - destination: _, - target: _, - unwind: _, - call_source: _, - fn_span: _, - } => { - let op_ty = tcx.erase_regions(func.ty(local_decls, tcx)); - RustcInstructionKind::FunctionCall(EarlyBinder::bind(op_ty)) - } - TerminatorKind::SwitchInt { .. } => RustcInstructionKind::SwitchInt, - _ => RustcInstructionKind::Terminator, - }, - span: term.source_info.span, - description: format!("{:?}", term.kind).into(), - } - } - - pub fn from_basic_block( - tcx: TyCtxt<'tcx>, - local_decls: &impl HasLocalDecls<'tcx>, - bb: &BasicBlockData<'tcx>, - ) -> Vec { - let t = bb.terminator(); - bb.statements - .iter() - .map(Self::from_statement) - .chain([Self::from_terminator(tcx, local_decls, t)]) - .collect() - } -} - -/// The type of instructions we may encounter -#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, TyEncodable, TyDecodable)] -pub enum RustcInstructionKind<'tcx> { - /// Some type of statement - Statement, - /// A function call. The type is guaranteed to be of function type - FunctionCall(EarlyBinder>), - /// A basic block terminator - Terminator, - /// The switch int terminator - SwitchInt, -} - -impl<'tcx> BodyInfo<'tcx> { - pub fn local_kind(&self, local: Local) -> LocalKind { - let local = local.as_usize(); - assert!(local < self.decls.len()); - if local == 0 { - LocalKind::ReturnPointer - } else if local < self.arg_count + 1 { - LocalKind::Arg - } else { - LocalKind::Temp - } - } - - pub fn instruction_at(&self, location: Location) -> RustcInstructionInfo<'tcx> { - self.instructions[location.block][location.statement_index] - } - - pub fn span_of(&self, loc: RichLocation) -> rustc_span::Span { - match loc { - RichLocation::Location(loc) => self.instruction_at(loc).span, - _ => self.def_span, - } - } -} - -impl<'tcx> HasLocalDecls<'tcx> for BodyInfo<'tcx> { - fn local_decls(&self) -> &LocalDecls<'tcx> { - &self.decls - } -} +pub use metadata::MetadataLoader; /// Read-only database of information the analysis needs. /// @@ -592,8 +196,8 @@ impl<'tcx> SPDGGenerator<'tcx> { generics, ) .unwrap() - .unwrap() - .def_id(); + .map(|i| i.def_id()) + .unwrap_or(id); known_def_ids.extend(Some(instance_id)); FunctionCallInfo { id: instance_id } }) diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 6c642954ab..1d98dbf260 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -368,14 +368,14 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { .as_ref() .map_or(false, |n| n == "build_script_build"); - println!("Handling {}", crate_name.unwrap_or("".to_owned())); + debug!("Handling {}", crate_name.unwrap_or("".to_owned())); if !is_target || is_build_script { - println!("Is not target, skipping"); + debug!("Is not target, skipping"); return rustc_driver::RunCompiler::new(&compiler_args, &mut NoopCallbacks {}).run(); } - println!("Is target, compiling"); + debug!("Is target, compiling"); let lvl = plugin_args.verbosity(); // //let lvl = log::LevelFilter::Debug; From 3bbd8437166909a4f3e3bafdd6b158fe63562d75 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 24 May 2024 13:49:32 -0700 Subject: [PATCH 36/95] WIP debugging virtual instance resolution --- Cargo.lock | 1 + crates/flowistry_pdg_construction/Cargo.toml | 1 + .../src/construct.rs | 11 +++--- .../flowistry_pdg_construction/src/utils.rs | 9 +++++ crates/paralegal-flow/src/lib.rs | 1 + .../tests/entrypoint-generics.rs | 37 +++++++++++++++++++ 6 files changed, 54 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1d3b183e6..521d977739 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -493,6 +493,7 @@ dependencies = [ "petgraph", "rustc_utils 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=e990ded60afc928f76293fb9ad265c58405da1a7)", "serde", + "strum", ] [[package]] diff --git a/crates/flowistry_pdg_construction/Cargo.toml b/crates/flowistry_pdg_construction/Cargo.toml index acc872a72a..a5320feb31 100644 --- a/crates/flowistry_pdg_construction/Cargo.toml +++ b/crates/flowistry_pdg_construction/Cargo.toml @@ -24,6 +24,7 @@ flowistry_pdg = { version = "0.1.0", path = "../flowistry_pdg", features = [ #flowistry = { path = "../../../flowistry/crates/flowistry", default-features = false } flowistry = { workspace = true } serde = { workspace = true, features = ["derive"] } +strum = { workspace = true } [dev-dependencies] rustc_utils = { workspace = true, features = ["indexical", "test"] } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 5f58ffd5b6..7cfe0104a1 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -836,12 +836,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { &self, func: &Operand<'tcx>, ) -> Option<(DefId, &'tcx List>)> { - let ty = match func { - Operand::Constant(func) => func.literal.ty(), - Operand::Copy(place) | Operand::Move(place) => { - place.ty(&self.body.local_decls, self.tcx()).ty - } - }; + let ty = func.ty(&self.body, self.tcx()); utils::type_as_fn(self.tcx(), ty) } @@ -922,6 +917,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let param_env = tcx.param_env_reveal_all_normalized(self.def_id); let resolved_fn = utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args)?; + trace!("resolved to instance {resolved_fn:?}"); let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); @@ -1051,6 +1047,8 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let preamble = self.determine_call_handling(location, func, args)?; + trace!("Call handling is {}", preamble.as_ref()); + let (child_constructor, calling_convention) = match preamble { CallHandling::Ready { descriptor, @@ -1379,6 +1377,7 @@ impl<'tcx> SubgraphDescriptor<'tcx> { } } +#[derive(strum::AsRefStr)] enum CallHandling<'tcx, 'a> { ApproxAsyncFn, Ready { diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 901f39e9e6..c60b7bbd70 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -43,6 +43,15 @@ pub fn try_resolve_function<'tcx>( args: GenericArgsRef<'tcx>, ) -> Option> { let param_env = param_env.with_reveal_all_normalized(tcx); + trace!( + "resolving {def_id:?} with arguments {args:?} substituted for {:?}", + { + let g = tcx.generics_of(def_id); + (0..g.count()) + .map(|i| g.param_at(i, tcx)) + .collect::>() + } + ); if let Err(e) = test_generics_normalization(tcx, param_env, args) { panic!("Normalization failed: {e:?}"); diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 1d98dbf260..40b435ed77 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -383,6 +383,7 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { .with_level(lvl) .with_module_level("flowistry", lvl) .with_module_level("rustc_utils", log::LevelFilter::Error) + .without_timestamps() .init() .unwrap(); if matches!(*plugin_args.direct_debug(), LogLevelConfig::Targeted(..)) { diff --git a/crates/paralegal-policy/tests/entrypoint-generics.rs b/crates/paralegal-policy/tests/entrypoint-generics.rs index 3949b07e13..f61a442691 100644 --- a/crates/paralegal-policy/tests/entrypoint-generics.rs +++ b/crates/paralegal-policy/tests/entrypoint-generics.rs @@ -52,6 +52,43 @@ fn simple_parent() -> Result<()> { #[test] fn default_method() -> Result<()> { + let test = Test::new(stringify!( + #[paralegal::marker(source, return)] + fn actual_source() -> usize { + 0 + } + + trait Src { + fn source(&self) -> usize { + actual_source() + } + } + + #[paralegal::marker(sink, arguments = [0])] + fn actual_sink(t: T) {} + + trait Snk { + fn sink(&self, t: usize) { + actual_sink(t) + } + } + + struct Wrap(T); + + impl Wrap { + #[paralegal::analyze] + fn main(&self, s: &S) { + s.sink(self.0.source()) + } + } + ))?; + + test.run(simple_policy) +} + +#[test] +#[ignore = "Default methods with generics don't resolve properly. See https://github.com/brownsys/paralegal/issues/152"] +fn default_method_with_generic() -> Result<()> { let test = Test::new(stringify!( #[paralegal::marker(source, return)] fn actual_source() -> usize { From 7c5c8b4b7f2ba0682a51d1d3c48e49ee37b9184f Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 25 May 2024 13:33:06 -0700 Subject: [PATCH 37/95] Fix freedit test case --- crates/paralegal-flow/src/ana/graph_converter.rs | 10 ++++++---- crates/paralegal-policy/tests/freedit.rs | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 5b48386f77..6349f2e25b 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -7,7 +7,7 @@ use paralegal_spdg::{Node, SPDGStats}; use rustc_hir::{def, def_id::LocalDefId}; use rustc_middle::{ mir::{self, Location}, - ty::{self, Instance, TyCtxt}, + ty::{self, Instance, ParamEnv, TyCtxt}, }; use std::{cell::RefCell, fmt::Display, rc::Rc}; @@ -168,12 +168,14 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { RichLocation::Location(loc) => { let instruction = body.instruction_at(loc); if let RustcInstructionKind::FunctionCall(f) = instruction.kind { - let f = flowistry_pdg_construction::utils::type_as_fn( + let (def_id, args) = flowistry_pdg_construction::utils::type_as_fn( self.tcx(), f.instantiate(self.tcx(), monos), ) - .unwrap() - .0; + .unwrap(); + let f = Instance::resolve(self.tcx(), ParamEnv::reveal_all(), def_id, args) + .unwrap() + .map_or(def_id, |i| i.def_id()); self.known_def_ids.extend(Some(f)); // Question: Could a function with no input produce an diff --git a/crates/paralegal-policy/tests/freedit.rs b/crates/paralegal-policy/tests/freedit.rs index 26e2b79dfb..480c371ab2 100644 --- a/crates/paralegal-policy/tests/freedit.rs +++ b/crates/paralegal-policy/tests/freedit.rs @@ -132,7 +132,7 @@ fn simple_monomorphization() -> Result<()> { #[paralegal::analyze] fn unconnected() { - Receiver.target(Donator.source()) + Donator.target(Receiver.source()) } ))?; test.run(|ctx| { @@ -146,7 +146,7 @@ fn simple_monomorphization() -> Result<()> { .filter(|n| n.controller_id() == ctx.id()) .collect(); - let expect_connect = ctx.current().name.as_str() != "connected"; + let expect_connect = ctx.current().name.as_str() == "connected"; assert_error!( ctx, From 30e9c0b784c8d04d0e7b2d28d6f9089b1bfba487 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 25 May 2024 13:41:12 -0700 Subject: [PATCH 38/95] Make sure not to check asyncness for closures --- crates/flowistry_pdg_construction/src/construct.rs | 6 ++++-- crates/flowistry_pdg_construction/src/utils.rs | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 7cfe0104a1..b52bc98a48 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -36,7 +36,9 @@ use crate::{ graph::{DepEdge, DepGraph, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, try_resolve_function, - utils::{self, is_non_default_trait_method, manufacture_substs_for, try_monomorphize}, + utils::{ + self, is_async, is_non_default_trait_method, manufacture_substs_for, try_monomorphize, + }, Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall, }; @@ -997,7 +999,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { }); // Handle async functions at the time of polling, not when the future is created. - if tcx.asyncness(resolved_def_id).is_async() { + if is_async(tcx, resolved_def_id) { trace!(" Bailing because func is async"); // If a skip was requested then "poll" will not be inlined later so we diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index c60b7bbd70..dda6774250 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -36,6 +36,10 @@ fn test_generics_normalization<'tcx>( .map(|_| ()) } +pub fn is_async<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> bool { + !tcx.is_closure(def_id) && tcx.asyncness(def_id).is_async() +} + pub fn try_resolve_function<'tcx>( tcx: TyCtxt<'tcx>, def_id: DefId, From b486f2cc4ca7478ed255fafb81072511870e52c4 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 25 May 2024 13:42:20 -0700 Subject: [PATCH 39/95] All test cases fixed or reported --- crates/paralegal-flow/src/ann/db.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 05f4016602..75d6e95c63 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -21,7 +21,9 @@ use crate::{ }, DefId, Either, HashMap, HashSet, LocalDefId, TyCtxt, }; -use flowistry_pdg_construction::{determine_async, graph::InternedString, try_monomorphize}; +use flowistry_pdg_construction::{ + determine_async, graph::InternedString, try_monomorphize, utils::is_async, +}; use rustc_ast::Attribute; use rustc_hir::def::DefKind; use rustc_middle::{ @@ -108,7 +110,7 @@ impl<'tcx> MarkerCtx<'tcx> { if matches!(def_kind, DefKind::Generator) { if let Some(parent) = self.tcx().opt_parent(def_id) { if matches!(self.tcx().def_kind(parent), DefKind::AssocFn | DefKind::Fn) - && self.tcx().asyncness(parent).is_async() + && is_async(self.tcx(), parent) { return parent; } From 0d94bd980a2bcc458b2bfa9a38de1c4f377d6cba Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 25 May 2024 16:39:48 -0700 Subject: [PATCH 40/95] Documentation and renaming --- .../src/async_support.rs | 19 ++- .../src/construct.rs | 147 ++++++++---------- .../flowistry_pdg_construction/src/graph.rs | 39 +++++ crates/flowistry_pdg_construction/src/lib.rs | 7 +- crates/flowistry_pdg_construction/src/meta.rs | 36 ----- .../flowistry_pdg_construction/src/utils.rs | 47 ++---- .../paralegal-flow/src/ana/graph_converter.rs | 3 +- crates/paralegal-flow/src/ana/metadata.rs | 4 +- crates/paralegal-flow/src/ann/db.rs | 4 +- crates/paralegal-flow/src/utils/mod.rs | 2 +- 10 files changed, 139 insertions(+), 169 deletions(-) delete mode 100644 crates/flowistry_pdg_construction/src/meta.rs diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 805d9d1efe..8ad3c07a42 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -14,11 +14,15 @@ use rustc_middle::{ ty::{GenericArgsRef, Instance, TyCtxt}, }; -use crate::construct::{push_call_string_root, CallKind, SubgraphDescriptor}; - -use super::construct::GraphConstructor; -use super::utils::{self}; +use crate::{ + construct::{push_call_string_root, CallKind, LocalAnalysis}, + utils, SubgraphDescriptor, +}; +/// Describe in which way a function is `async`. +/// +/// Critically distinguishes between a normal `async fn` and an +/// `#[async_trait]`. #[derive(Debug, Clone, Copy, Decodable, Encodable)] pub enum Asyncness { No, @@ -158,6 +162,11 @@ fn get_async_generator<'tcx>(body: &Body<'tcx>) -> (LocalDefId, GenericArgsRef<' (def_id.expect_local(), generic_args, location) } +/// Try to interpret this function as an async function. +/// +/// If this is an async function it returns the [`Instance`] of the generator, +/// the location where the generator is bound and the type of [`Asyncness`] +/// which in this case is guaranteed to satisfy [`Asyncness::is_async`]. pub fn determine_async<'tcx>( tcx: TyCtxt<'tcx>, def_id: LocalDefId, @@ -184,7 +193,7 @@ pub enum AsyncDeterminationResult { NotAsync, } -impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { +impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { pub(crate) fn try_handle_as_async(&self) -> Option> { let (generator_fn, location, asyncness) = determine_async(self.tcx(), self.def_id, &self.body)?; diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index b52bc98a48..72cde7ddbe 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -1,3 +1,13 @@ +//! Constructing PDGs. +//! +//! The construction is split into two steps. A local analysis and a +//! cross-procedure PDG merging. +//! +//! 1. [`GraphConstructor`] is responsible for the local analysis. It performs a +//! procedure-local fixpoint analysis to determine a pre- and post effect +//! [`InstructionState`] at each instruction in the procedure. +//! 2. [`PartialGraph`] implements [`ResultsVisitor`] over the analysis result + use std::{collections::HashSet, iter, rc::Rc}; use either::Either; @@ -12,7 +22,6 @@ use rustc_borrowck::consumers::{places_conflict, BodyWithBorrowckFacts, PlaceCon use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; -use rustc_macros::{TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, @@ -35,48 +44,31 @@ use crate::{ calling_convention::*, graph::{DepEdge, DepGraph, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, - try_resolve_function, utils::{ self, is_async, is_non_default_trait_method, manufacture_substs_for, try_monomorphize, + try_resolve_function, }, - Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall, + ArtifactLoader, Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, + SkipCall, SubgraphDescriptor, }; -pub trait PDGLoader<'tcx> { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>>; -} - -pub struct NoLoader; - -impl<'tcx> PDGLoader<'tcx> for NoLoader { - fn load(&self, _: DefId) -> Option<&SubgraphDescriptor<'tcx>> { - None - } -} - -impl<'tcx, T: PDGLoader<'tcx>> PDGLoader<'tcx> for Rc { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { - (**self).load(function) - } -} - -impl<'tcx, T: PDGLoader<'tcx>> PDGLoader<'tcx> for Box { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { - (**self).load(function) - } -} - +/// A memoizing constructor of PDGs. +/// +/// Each `(LocalDefId, GenericArgs)` pair is guaranteed to be constructed only +/// once. pub struct MemoPdgConstructor<'tcx> { pub(crate) tcx: TyCtxt<'tcx>, pub(crate) call_change_callback: Option + 'tcx>>, pub(crate) dump_mir: bool, pub(crate) async_info: Rc, pub(crate) pdg_cache: PdgCache<'tcx>, - pub(crate) loader: Box + 'tcx>, + pub(crate) loader: Box + 'tcx>, } impl<'tcx> MemoPdgConstructor<'tcx> { - pub fn new(tcx: TyCtxt<'tcx>, loader: impl PDGLoader<'tcx> + 'tcx) -> Self { + /// Initialize the constructor, parameterized over an [`ArtifactLoader`] for + /// retrieving PDGs of functions from dependencies. + pub fn new(tcx: TyCtxt<'tcx>, loader: impl ArtifactLoader<'tcx> + 'tcx) -> Self { Self { tcx, call_change_callback: None, @@ -87,11 +79,14 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } + /// Dump the MIR of any function that is visited. pub fn with_dump_mir(&mut self, dump_mir: bool) -> &mut Self { self.dump_mir = dump_mir; self } + /// Register a callback to determine how to deal with function calls seen. + /// Overwrites any previously registered callback with no warning. pub fn with_call_change_callback( &mut self, callback: impl CallChangeCallback<'tcx> + 'tcx, @@ -100,6 +95,8 @@ impl<'tcx> MemoPdgConstructor<'tcx> { self } + /// Construct the intermediate PDG for this function. Instantiates any + /// generic arguments as `dyn `. pub fn construct_root<'a>( &'a self, function: LocalDefId, @@ -122,7 +119,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { let generics = resolution.args; if let Some(local) = def_id.as_local() { self.pdg_cache.get_maybe_recursive((local, generics), |_| { - let g = GraphConstructor::new(self, resolution).construct_partial(); + let g = LocalAnalysis::new(self, resolution).construct_partial(); g.check_invariants(); g }) @@ -131,6 +128,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } + /// Has a PDG been constructed for this instance before? pub fn is_in_cache(&self, resolution: Instance<'tcx>) -> bool { if let Some(local) = resolution.def_id().as_local() { self.pdg_cache.is_in_cache(&(local, resolution.args)) @@ -139,6 +137,8 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } + /// Construct a final PDG for this function. Same as + /// [`Self::construct_root`] this instantiates all generics as `dyn`. pub fn construct_graph(&self, function: LocalDefId) -> Result, ErrorGuaranteed> { let _args = manufacture_substs_for(self.tcx, function.to_def_id())?; let g = self @@ -171,14 +171,14 @@ impl<'tcx> df::JoinSemiLattice for InstructionState<'tcx> { } } -impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx>>> +impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>> for PartialGraph<'tcx> { - type FlowState = as AnalysisDomain<'tcx>>::Domain; + type FlowState = <&'mir LocalAnalysis<'tcx, 'mir> as AnalysisDomain<'tcx>>::Domain; fn visit_statement_before_primary_effect( &mut self, - results: &Results<'tcx, DfAnalysis<'mir, 'tcx>>, + results: &Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, state: &Self::FlowState, statement: &'mir rustc_middle::mir::Statement<'tcx>, location: Location, @@ -206,7 +206,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> /// call site. fn visit_terminator_before_primary_effect( &mut self, - results: &Results<'tcx, DfAnalysis<'mir, 'tcx>>, + results: &Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, state: &Self::FlowState, terminator: &'mir rustc_middle::mir::Terminator<'tcx>, location: Location, @@ -233,7 +233,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> { trace!("Handling terminator {:?} as not inlined", terminator.kind); let mut arg_vis = ModularMutationVisitor::new( - &results.analysis.0.place_info, + &results.analysis.place_info, move |location, mutation| { self.register_mutation( results, @@ -254,13 +254,13 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> fn visit_terminator_after_primary_effect( &mut self, - results: &Results<'tcx, DfAnalysis<'mir, 'tcx>>, - state: &Self::FlowState, + results: &Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, + state: &<&'mir LocalAnalysis<'tcx, 'mir> as AnalysisDomain<'tcx>>::Domain, terminator: &'mir rustc_middle::mir::Terminator<'tcx>, location: Location, ) { if let TerminatorKind::Call { func, args, .. } = &terminator.kind { - let constructor = results.analysis.0; + let constructor = results.analysis; if matches!( constructor.determine_call_handling(location, func, args), @@ -271,9 +271,8 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> } trace!("Handling terminator {:?} as not inlined", terminator.kind); - let mut arg_vis = ModularMutationVisitor::new( - &results.analysis.0.place_info, - move |location, mutation| { + let mut arg_vis = + ModularMutationVisitor::new(&results.analysis.place_info, move |location, mutation| { self.register_mutation( results, state, @@ -284,8 +283,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, DfAnalysis<'mir, 'tcx> location, mutation.mutation_reason, ) - }, - ); + }); arg_vis.set_time(Time::After); arg_vis.visit_terminator(terminator, location); } @@ -305,12 +303,12 @@ fn as_arg<'tcx>(node: &DepNode<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> } impl<'tcx> PartialGraph<'tcx> { - fn modular_mutation_visitor<'a>( + fn modular_mutation_visitor<'a, 'mir>( &'a mut self, - results: &'a Results<'tcx, DfAnalysis<'a, 'tcx>>, + results: &'a Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, state: &'a InstructionState<'tcx>, ) -> ModularMutationVisitor<'a, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'a> { - ModularMutationVisitor::new(&results.analysis.0.place_info, move |location, mutation| { + ModularMutationVisitor::new(&results.analysis.place_info, move |location, mutation| { self.register_mutation( results, state, @@ -347,10 +345,10 @@ impl<'tcx> PartialGraph<'tcx> { .filter(|node| node.0.at.leaf().location.is_end()) } - fn handle_as_inline( + fn handle_as_inline<'a>( &mut self, - results: &Results<'tcx, DfAnalysis<'_, 'tcx>>, - state: & as AnalysisDomain<'tcx>>::Domain, + results: &Results<'tcx, &'a LocalAnalysis<'tcx, 'a>>, + state: &<&'a LocalAnalysis<'tcx, 'a> as AnalysisDomain<'tcx>>::Domain, terminator: &Terminator<'tcx>, location: Location, ) -> Option<()> { @@ -363,7 +361,7 @@ impl<'tcx> PartialGraph<'tcx> { else { return None; }; - let constructor = results.analysis.0; + let constructor = results.analysis; let gloc = GlobalLocation { location: location.into(), function: constructor.def_id.to_def_id(), @@ -390,7 +388,7 @@ impl<'tcx> PartialGraph<'tcx> { } CallHandling::ApproxAsyncSM(how) => { how( - constructor, + &constructor, &mut self.modular_mutation_visitor(results, state), args, *destination, @@ -467,9 +465,9 @@ impl<'tcx> PartialGraph<'tcx> { Some(()) } - fn register_mutation( + fn register_mutation<'a>( &mut self, - results: &Results<'tcx, DfAnalysis<'_, 'tcx>>, + results: &Results<'tcx, &'a LocalAnalysis<'tcx, 'a>>, state: &InstructionState<'tcx>, inputs: Inputs<'tcx>, mutated: Either, DepNode<'tcx>>, @@ -477,7 +475,7 @@ impl<'tcx> PartialGraph<'tcx> { target_use: TargetUse, ) { trace!("Registering mutation to {mutated:?} with inputs {inputs:?} at {location:?}"); - let constructor = results.analysis.0; + let constructor = results.analysis; let ctrl_inputs = constructor.find_control_inputs(location); trace!(" Found control inputs {ctrl_inputs:?}"); @@ -505,7 +503,6 @@ impl<'tcx> PartialGraph<'tcx> { Either::Right(node) => vec![node], Either::Left(place) => results .analysis - .0 .find_outputs(state, place, location) .into_iter() .map(|t| t.1) @@ -542,7 +539,7 @@ impl<'tcx> PartialGraph<'tcx> { type PdgCache<'tcx> = Rc), SubgraphDescriptor<'tcx>>>; -pub struct GraphConstructor<'tcx, 'a> { +pub struct LocalAnalysis<'tcx, 'a> { pub(crate) memo: &'a MemoPdgConstructor<'tcx>, pub(super) root: Instance<'tcx>, body_with_facts: &'tcx BodyWithBorrowckFacts<'tcx>, @@ -570,12 +567,12 @@ enum Inputs<'tcx> { }, } -impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { +impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { /// Creates [`GraphConstructor`] for a function resolved as `fn_resolution` in a given `calling_context`. pub(crate) fn new( memo: &'a MemoPdgConstructor<'tcx>, root: Instance<'tcx>, - ) -> GraphConstructor<'tcx, 'a> { + ) -> LocalAnalysis<'tcx, 'a> { let tcx = memo.tcx; let def_id = root.def_id().expect_local(); let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, def_id); @@ -602,7 +599,7 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { let body_assignments = utils::find_body_assignments(&body); - GraphConstructor { + LocalAnalysis { memo, root, body_with_facts, @@ -1151,12 +1148,12 @@ impl<'tcx, 'a> GraphConstructor<'tcx, 'a> { } } - pub(crate) fn construct_partial(&self) -> SubgraphDescriptor<'tcx> { + pub(crate) fn construct_partial(&'a self) -> SubgraphDescriptor<'tcx> { if let Some(g) = self.try_handle_as_async() { return g; } - let mut analysis = DfAnalysis(self) + let mut analysis = self .into_engine(self.tcx(), &self.body) .iterate_to_fixpoint(); @@ -1252,13 +1249,8 @@ pub enum CallKind<'tcx> { AsyncPoll(Instance<'tcx>, Location, Place<'tcx>), } -type ApproximationHandler<'tcx, 'a> = fn( - &GraphConstructor<'tcx, 'a>, - &mut dyn Visitor<'tcx>, - &[Operand<'tcx>], - Place<'tcx>, - Location, -); +type ApproximationHandler<'tcx, 'a> = + fn(&LocalAnalysis<'tcx, 'a>, &mut dyn Visitor<'tcx>, &[Operand<'tcx>], Place<'tcx>, Location); pub(crate) trait TransformCallString { fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self; @@ -1337,13 +1329,6 @@ pub(crate) fn push_call_string_root( old.transform_call_string(|c| c.push_front(new_root)) } -#[derive(TyEncodable, TyDecodable, Debug, Clone)] -pub struct SubgraphDescriptor<'tcx> { - pub graph: PartialGraph<'tcx>, - pub(crate) parentable_srcs: Vec<(DepNode<'tcx>, Option)>, - pub(crate) parentable_dsts: Vec<(DepNode<'tcx>, Option)>, -} - impl<'tcx> SubgraphDescriptor<'tcx> { pub fn to_petgraph(&self) -> DepGraph<'tcx> { let domain = &self.graph; @@ -1389,9 +1374,7 @@ enum CallHandling<'tcx, 'a> { ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), } -struct DfAnalysis<'a, 'tcx>(&'a GraphConstructor<'tcx, 'a>); - -impl<'tcx> df::AnalysisDomain<'tcx> for DfAnalysis<'_, 'tcx> { +impl<'tcx, 'a> df::AnalysisDomain<'tcx> for &'a LocalAnalysis<'tcx, 'a> { type Domain = InstructionState<'tcx>; const NAME: &'static str = "GraphConstructor"; @@ -1403,15 +1386,14 @@ impl<'tcx> df::AnalysisDomain<'tcx> for DfAnalysis<'_, 'tcx> { fn initialize_start_block(&self, _body: &Body<'tcx>, _state: &mut Self::Domain) {} } -impl<'tcx> df::Analysis<'tcx> for DfAnalysis<'_, 'tcx> { +impl<'a, 'tcx> df::Analysis<'tcx> for &'a LocalAnalysis<'tcx, 'a> { fn apply_statement_effect( &mut self, state: &mut Self::Domain, statement: &Statement<'tcx>, location: Location, ) { - self.0 - .modular_mutation_visitor(state) + self.modular_mutation_visitor(state) .visit_statement(statement, location) } @@ -1421,8 +1403,7 @@ impl<'tcx> df::Analysis<'tcx> for DfAnalysis<'_, 'tcx> { terminator: &'mir Terminator<'tcx>, location: Location, ) -> TerminatorEdges<'mir, 'tcx> { - self.0 - .handle_terminator(terminator, state, location, Time::Unspecified); + self.handle_terminator(terminator, state, location, Time::Unspecified); terminator.edges() } diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 9d48ac5df2..10c4619232 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -4,12 +4,14 @@ use std::{ fmt::{self, Display}, hash::Hash, path::Path, + rc::Rc, }; use flowistry_pdg::CallString; use internment::Intern; use petgraph::{dot, graph::DiGraph}; use rustc_hash::{FxHashMap, FxHashSet}; +use rustc_hir::def_id::{DefId, DefIndex}; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ mir::{Body, Place}, @@ -298,3 +300,40 @@ impl<'tcx> PartialGraph<'tcx> { } } } + +/// Abstracts over how previously written [`Artifact`]s are retrieved, allowing +/// the user of this module to chose where to store them. +pub trait ArtifactLoader<'tcx> { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>>; +} + +/// Intermediate data that gets stored for each crate. +pub type Artifact<'tcx> = FxHashMap>; + +/// An [`ArtifactLoader`] that always returns `None`. +pub struct NoLoader; + +impl<'tcx> ArtifactLoader<'tcx> for NoLoader { + fn load(&self, _: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + None + } +} + +impl<'tcx, T: ArtifactLoader<'tcx>> ArtifactLoader<'tcx> for Rc { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + (**self).load(function) + } +} + +impl<'tcx, T: ArtifactLoader<'tcx>> ArtifactLoader<'tcx> for Box { + fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + (**self).load(function) + } +} + +#[derive(TyEncodable, TyDecodable, Debug, Clone)] +pub struct SubgraphDescriptor<'tcx> { + pub graph: PartialGraph<'tcx>, + pub(crate) parentable_srcs: Vec<(DepNode<'tcx>, Option)>, + pub(crate) parentable_dsts: Vec<(DepNode<'tcx>, Option)>, +} diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 0b4ad6bcbd..195a667baf 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -15,22 +15,19 @@ extern crate rustc_span; extern crate rustc_target; extern crate rustc_type_ir; -pub use self::graph::DepGraph; -pub use async_support::{determine_async, is_async_trait_fn, match_async_trait_assign, Asyncness}; +pub use async_support::{determine_async, is_async_trait_fn, Asyncness}; +pub use graph::{Artifact, ArtifactLoader, DepGraph, NoLoader, SubgraphDescriptor}; pub mod callback; pub use crate::construct::MemoPdgConstructor; pub use callback::{ CallChangeCallback, CallChangeCallbackFn, CallChanges, CallInfo, InlineMissReason, SkipCall, }; -pub use construct::{NoLoader, PDGLoader, SubgraphDescriptor}; use rustc_middle::ty::{Instance, TyCtxt}; -pub use utils::{is_non_default_trait_method, try_monomorphize, try_resolve_function}; mod async_support; mod calling_convention; mod construct; pub mod graph; -pub mod meta; mod mutation; pub mod utils; diff --git a/crates/flowistry_pdg_construction/src/meta.rs b/crates/flowistry_pdg_construction/src/meta.rs deleted file mode 100644 index 43018b3224..0000000000 --- a/crates/flowistry_pdg_construction/src/meta.rs +++ /dev/null @@ -1,36 +0,0 @@ -use rustc_hash::FxHashMap; -use rustc_hir::def_id::{DefIndex, LocalDefId}; -use rustc_middle::ty::TyCtxt; - -use crate::{construct::SubgraphDescriptor, MemoPdgConstructor, PDGLoader}; - -pub struct MetadataCollector { - targets: Vec, -} - -impl MetadataCollector { - pub fn add_target(&mut self, target: LocalDefId) { - self.targets.push(target) - } - - pub fn into_metadata<'tcx>( - self, - tcx: TyCtxt<'tcx>, - loader: impl PDGLoader<'tcx> + 'tcx, - ) -> FxHashMap> { - let constructor = MemoPdgConstructor::new(tcx, loader); - self.targets - .into_iter() - .map(|t| { - ( - t.local_def_index, - (*constructor.construct_root(t).unwrap()).clone(), - ) - }) - .collect::>() - } - - pub fn new() -> Self { - Self { targets: vec![] } - } -} diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index dda6774250..33f81a9027 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -19,27 +19,12 @@ use rustc_span::ErrorGuaranteed; use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; -/// Try and normalize the provided generics. -/// -/// The purpose of this function is to test whether resolving these generics -/// will return an error. We need this because [`ty::Instance::resolve`] fails -/// with a hard error when this normalization fails (even though it returns -/// [`Result`]). However legitimate situations can arise in the code where this -/// normalization fails for which we want to report warnings but carry on with -/// the analysis which a hard error doesn't allow us to do. -fn test_generics_normalization<'tcx>( - tcx: TyCtxt<'tcx>, - param_env: ParamEnv<'tcx>, - args: &'tcx ty::List>, -) -> Result<(), ty::normalize_erasing_regions::NormalizationError<'tcx>> { - tcx.try_normalize_erasing_regions(param_env, args) - .map(|_| ()) -} - +/// An async check that does not crash if called on closures. pub fn is_async<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> bool { !tcx.is_closure(def_id) && tcx.asyncness(def_id).is_async() } +/// Resolve the `def_id` item to an instance. pub fn try_resolve_function<'tcx>( tcx: TyCtxt<'tcx>, def_id: DefId, @@ -47,22 +32,10 @@ pub fn try_resolve_function<'tcx>( args: GenericArgsRef<'tcx>, ) -> Option> { let param_env = param_env.with_reveal_all_normalized(tcx); - trace!( - "resolving {def_id:?} with arguments {args:?} substituted for {:?}", - { - let g = tcx.generics_of(def_id); - (0..g.count()) - .map(|i| g.param_at(i, tcx)) - .collect::>() - } - ); - - if let Err(e) = test_generics_normalization(tcx, param_env, args) { - panic!("Normalization failed: {e:?}"); - } Instance::resolve(tcx, param_env, def_id, args).unwrap() } +/// Returns the default implementation of this method if it is a trait method. pub fn is_non_default_trait_method(tcx: TyCtxt, function: DefId) -> Option { let assoc_item = tcx.opt_associated_item(function)?; if assoc_item.container != ty::AssocItemContainer::TraitContainer @@ -73,6 +46,7 @@ pub fn is_non_default_trait_method(tcx: TyCtxt, function: DefId) -> Option( inst: Instance<'tcx>, tcx: TyCtxt<'tcx>, @@ -89,6 +63,8 @@ where ) } +/// Attempt to interpret this type as a statically determinable function and its +/// generic arguments. pub fn type_as_fn<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<(DefId, GenericArgsRef<'tcx>)> { let ty = ty_resolve(ty, tcx); match ty.kind() { @@ -101,7 +77,7 @@ pub fn type_as_fn<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<(DefId, Gener } } -pub fn retype_place<'tcx>( +pub(crate) fn retype_place<'tcx>( orig: Place<'tcx>, tcx: TyCtxt<'tcx>, body: &Body<'tcx>, @@ -169,7 +145,7 @@ pub fn retype_place<'tcx>( p } -pub fn hashset_join( +pub(crate) fn hashset_join( hs1: &mut FxHashSet, hs2: &FxHashSet, ) -> bool { @@ -178,7 +154,7 @@ pub fn hashset_join( hs1.len() != orig_len } -pub fn hashmap_join( +pub(crate) fn hashmap_join( hm1: &mut FxHashMap, hm2: &FxHashMap, join: impl Fn(&mut V, &V) -> bool, @@ -198,9 +174,9 @@ pub fn hashmap_join( changed } -pub type BodyAssignments = FxHashMap>; +pub(crate) type BodyAssignments = FxHashMap>; -pub fn find_body_assignments(body: &Body<'_>) -> BodyAssignments { +pub(crate) fn find_body_assignments(body: &Body<'_>) -> BodyAssignments { body.all_locations() .filter_map(|location| match body.stmt_at(location) { Either::Left(Statement { @@ -218,6 +194,7 @@ pub fn find_body_assignments(body: &Body<'_>) -> BodyAssignments { .collect() } +/// Resolve through type aliases pub fn ty_resolve<'tcx>(ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Ty<'tcx> { match ty.kind() { TyKind::Alias(AliasKind::Opaque, alias_ty) => tcx.type_of(alias_ty.def_id).skip_binder(), diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 6349f2e25b..c0cd63baa2 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -20,7 +20,8 @@ use anyhow::Result; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, - try_monomorphize, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, + utils::try_monomorphize, + CallChangeCallback, CallChanges, CallInfo, InlineMissReason, SkipCall::Skip, }; use petgraph::{ diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index c036a07cc1..244ca61be1 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -10,7 +10,7 @@ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ - graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, PDGLoader, SubgraphDescriptor, + graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, ArtifactLoader, SubgraphDescriptor, }; use rustc_hash::FxHashMap; @@ -54,7 +54,7 @@ pub enum MetadataLoaderError { use MetadataLoaderError::*; -impl<'tcx> PDGLoader<'tcx> for MetadataLoader<'tcx> { +impl<'tcx> ArtifactLoader<'tcx> for MetadataLoader<'tcx> { fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { self.get_metadata(function.krate) .ok()? diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 75d6e95c63..39b0c3be47 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -22,7 +22,9 @@ use crate::{ DefId, Either, HashMap, HashSet, LocalDefId, TyCtxt, }; use flowistry_pdg_construction::{ - determine_async, graph::InternedString, try_monomorphize, utils::is_async, + determine_async, + graph::InternedString, + utils::{is_async, try_monomorphize}, }; use rustc_ast::Attribute; use rustc_hir::def::DefKind; diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 47130b56c2..4f8798f5a6 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -8,7 +8,7 @@ use smallvec::SmallVec; use crate::{desc::Identifier, rustc_span::ErrorGuaranteed, Either, Symbol, TyCtxt}; -pub use flowistry_pdg_construction::is_non_default_trait_method; +pub use flowistry_pdg_construction::utils::is_non_default_trait_method; pub use paralegal_spdg::{ShortHash, TinyBitSet}; use rustc_ast as ast; From 71e876f3404f43207675ef8e5553ea98cf72fca1 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 25 May 2024 17:16:00 -0700 Subject: [PATCH 41/95] Remove SubgraphDescriptor type --- .../src/async_support.rs | 11 +- .../src/construct.rs | 125 ++++-------------- .../flowistry_pdg_construction/src/graph.rs | 93 +++++++++++-- crates/flowistry_pdg_construction/src/lib.rs | 2 +- crates/paralegal-flow/src/ana/metadata.rs | 14 +- 5 files changed, 115 insertions(+), 130 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 8ad3c07a42..53a1f2d79f 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -16,7 +16,7 @@ use rustc_middle::{ use crate::{ construct::{push_call_string_root, CallKind, LocalAnalysis}, - utils, SubgraphDescriptor, + utils, PartialGraph, }; /// Describe in which way a function is `async`. @@ -194,7 +194,7 @@ pub enum AsyncDeterminationResult { } impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { - pub(crate) fn try_handle_as_async(&self) -> Option> { + pub(crate) fn try_handle_as_async(&self) -> Option> { let (generator_fn, location, asyncness) = determine_async(self.tcx(), self.def_id, &self.body)?; @@ -205,11 +205,8 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { }; let mut new_g = push_call_string_root(g, gloc); //let g_generics = std::mem::replace(&mut new_g.graph.generics, self.generic_args()); - new_g.graph.asyncness = asyncness; - new_g - .graph - .monos - .insert(CallString::single(gloc), new_g.graph.generics); + new_g.asyncness = asyncness; + new_g.monos.insert(CallString::single(gloc), new_g.generics); Some(new_g) } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 72cde7ddbe..371a8dab18 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -49,7 +49,7 @@ use crate::{ try_resolve_function, }, ArtifactLoader, Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, - SkipCall, SubgraphDescriptor, + SkipCall, }; /// A memoizing constructor of PDGs. @@ -97,10 +97,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { /// Construct the intermediate PDG for this function. Instantiates any /// generic arguments as `dyn `. - pub fn construct_root<'a>( - &'a self, - function: LocalDefId, - ) -> Option<&'a SubgraphDescriptor<'tcx>> { + pub fn construct_root<'a>(&'a self, function: LocalDefId) -> Option<&'a PartialGraph<'tcx>> { let generics = manufacture_substs_for(self.tcx, function.to_def_id()).unwrap(); let resolution = try_resolve_function( self.tcx, @@ -114,7 +111,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub(crate) fn construct_for<'a>( &'a self, resolution: Instance<'tcx>, - ) -> Option<&'a SubgraphDescriptor<'tcx>> { + ) -> Option<&'a PartialGraph<'tcx>> { let def_id = resolution.def_id(); let generics = resolution.args; if let Some(local) = def_id.as_local() { @@ -155,7 +152,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } #[derive(PartialEq, Eq, Default, Clone, Debug)] -pub struct InstructionState<'tcx> { +pub(crate) struct InstructionState<'tcx> { last_mutation: FxHashMap, FxHashSet>, } @@ -321,29 +318,6 @@ impl<'tcx> PartialGraph<'tcx> { ) }) } - fn parentable_srcs<'a>( - &'a self, - def_id: LocalDefId, - body: &'a Body<'tcx>, - ) -> impl Iterator, Option)> + 'a { - self.edges - .iter() - .map(|(src, _, _)| *src) - .filter_map(move |a| Some((a, as_arg(&a, def_id, body)?))) - .filter(|(node, _)| node.at.leaf().location.is_start()) - } - - fn parentable_dsts<'a>( - &'a self, - def_id: LocalDefId, - body: &'a Body<'tcx>, - ) -> impl Iterator, Option)> + 'a { - self.edges - .iter() - .map(|(_, dst, _)| *dst) - .filter_map(move |a| Some((a, as_arg(&a, def_id, body)?))) - .filter(|node| node.0.at.leaf().location.is_end()) - } fn handle_as_inline<'a>( &mut self, @@ -398,16 +372,12 @@ impl<'tcx> PartialGraph<'tcx> { } }; - let SubgraphDescriptor { - graph: child_graph, - parentable_srcs, - parentable_dsts, - } = push_call_string_root(child_descriptor, gloc); + let child_graph = push_call_string_root(child_descriptor, gloc); // For each source node CHILD that is parentable to PLACE, // add an edge from PLACE -> CHILD. trace!("PARENT -> CHILD EDGES:"); - for (child_src, _kind) in parentable_srcs { + for (child_src, _kind) in child_graph.parentable_srcs() { if let Some(parent_place) = calling_convention.translate_to_parent( child_src.place, constructor.async_info(), @@ -435,7 +405,7 @@ impl<'tcx> PartialGraph<'tcx> { // PRECISION TODO: for a given child place, we only want to connect // the *last* nodes in the child function to the parent, not *all* of them. trace!("CHILD -> PARENT EDGES:"); - for (child_dst, kind) in parentable_dsts { + for (child_dst, kind) in child_graph.parentable_dsts() { if let Some(parent_place) = calling_convention.translate_to_parent( child_dst.place, constructor.async_info(), @@ -461,7 +431,7 @@ impl<'tcx> PartialGraph<'tcx> { self.edges.extend(child_graph.edges); self.monos.extend(child_graph.monos); self.monos - .insert(CallString::single(gloc), child_descriptor.graph.generics); + .insert(CallString::single(gloc), child_descriptor.generics); Some(()) } @@ -537,9 +507,9 @@ impl<'tcx> PartialGraph<'tcx> { } } -type PdgCache<'tcx> = Rc), SubgraphDescriptor<'tcx>>>; +type PdgCache<'tcx> = Rc), PartialGraph<'tcx>>>; -pub struct LocalAnalysis<'tcx, 'a> { +pub(crate) struct LocalAnalysis<'tcx, 'a> { pub(crate) memo: &'a MemoPdgConstructor<'tcx>, pub(super) root: Instance<'tcx>, body_with_facts: &'tcx BodyWithBorrowckFacts<'tcx>, @@ -1075,7 +1045,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } }; - let parentable_dsts = &child_constructor.parentable_dsts; + let parentable_dsts = child_constructor.parentable_dsts(); let parent_body = &self.body; let translate_to_parent = |child: Place<'tcx>| -> Option> { calling_convention.translate_to_parent( @@ -1148,7 +1118,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } } - pub(crate) fn construct_partial(&'a self) -> SubgraphDescriptor<'tcx> { + pub(crate) fn construct_partial(&'a self) -> PartialGraph<'tcx> { if let Some(g) = self.try_handle_as_async() { return g; } @@ -1157,7 +1127,12 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { .into_engine(self.tcx(), &self.body) .iterate_to_fixpoint(); - let mut final_state = PartialGraph::new(Asyncness::No, self.generic_args()); + let mut final_state = PartialGraph::new( + Asyncness::No, + self.generic_args(), + self.def_id.to_def_id(), + self.body.arg_count, + ); analysis.visit_reachable_with(&self.body, &mut final_state); @@ -1187,15 +1162,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } } - SubgraphDescriptor { - parentable_dsts: final_state - .parentable_dsts(self.def_id, &self.body) - .collect(), - parentable_srcs: final_state - .parentable_srcs(self.def_id, &self.body) - .collect(), - graph: final_state, - } + final_state } /// Determine the type of call-site. @@ -1280,48 +1247,6 @@ impl TransformCallString for DepEdge { } } -impl<'tcx> TransformCallString for PartialGraph<'tcx> { - fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { - let recurse_node = |n: &DepNode<'tcx>| n.transform_call_string(&f); - Self { - generics: self.generics, - asyncness: self.asyncness, - nodes: self.nodes.iter().map(recurse_node).collect(), - edges: self - .edges - .iter() - .map(|(from, to, e)| { - ( - recurse_node(from), - recurse_node(to), - e.transform_call_string(&f), - ) - }) - .collect(), - monos: self - .monos - .iter() - .map(|(cs, args)| (f(*cs), *args)) - .collect(), - } - } -} - -impl<'tcx> TransformCallString for SubgraphDescriptor<'tcx> { - fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { - let transform_vec = |v: &Vec<(DepNode<'tcx>, Option)>| { - v.iter() - .map(|(n, idx)| (n.transform_call_string(&f), *idx)) - .collect::>() - }; - Self { - graph: self.graph.transform_call_string(&f), - parentable_dsts: transform_vec(&self.parentable_dsts), - parentable_srcs: transform_vec(&self.parentable_srcs), - } - } -} - pub(crate) fn push_call_string_root( old: &T, new_root: GlobalLocation, @@ -1329,9 +1254,9 @@ pub(crate) fn push_call_string_root( old.transform_call_string(|c| c.push_front(new_root)) } -impl<'tcx> SubgraphDescriptor<'tcx> { +impl<'tcx> PartialGraph<'tcx> { pub fn to_petgraph(&self) -> DepGraph<'tcx> { - let domain = &self.graph; + let domain = self; let mut graph: DiGraph, DepEdge> = DiGraph::new(); let mut nodes = FxHashMap::default(); macro_rules! add_node { @@ -1354,11 +1279,11 @@ impl<'tcx> SubgraphDescriptor<'tcx> { } fn check_invariants(&self) { - let root_function = self.graph.nodes.iter().next().unwrap().at.root().function; - for n in &self.graph.nodes { + let root_function = self.nodes.iter().next().unwrap().at.root().function; + for n in &self.nodes { assert_eq!(n.at.root().function, root_function); } - for (_, _, e) in &self.graph.edges { + for (_, _, e) in &self.edges { assert_eq!(e.at.root().function, root_function); } } @@ -1369,7 +1294,7 @@ enum CallHandling<'tcx, 'a> { ApproxAsyncFn, Ready { calling_convention: CallingConvention<'tcx, 'a>, - descriptor: &'a SubgraphDescriptor<'tcx>, + descriptor: &'a PartialGraph<'tcx>, }, ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), } diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 10c4619232..73f1bdc075 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -24,7 +24,7 @@ use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; use serde::{Deserialize, Serialize}; -use crate::Asyncness; +use crate::{construct::TransformCallString, Asyncness}; /// A node in the program dependency graph. /// @@ -287,16 +287,88 @@ pub struct PartialGraph<'tcx> { pub monos: FxHashMap>, pub generics: GenericArgsRef<'tcx>, pub asyncness: Asyncness, + def_id: DefId, + arg_count: usize, } impl<'tcx> PartialGraph<'tcx> { - pub fn new(asyncness: Asyncness, generics: GenericArgsRef<'tcx>) -> Self { + pub fn new( + asyncness: Asyncness, + generics: GenericArgsRef<'tcx>, + def_id: DefId, + arg_count: usize, + ) -> Self { Self { nodes: Default::default(), edges: Default::default(), monos: Default::default(), generics, asyncness, + def_id, + arg_count, + } + } + + pub(crate) fn parentable_srcs<'a>( + &'a self, + ) -> impl Iterator, Option)> + 'a { + self.edges + .iter() + .map(|(src, _, _)| *src) + .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) + .filter(|(node, _)| node.at.leaf().location.is_start()) + } + + pub(crate) fn parentable_dsts<'a>( + &'a self, + ) -> impl Iterator, Option)> + 'a { + self.edges + .iter() + .map(|(_, dst, _)| *dst) + .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) + .filter(|node| node.0.at.leaf().location.is_end()) + } +} + +fn as_arg<'tcx>(node: &DepNode<'tcx>, def_id: DefId, arg_count: usize) -> Option> { + if node.at.leaf().function != def_id { + return None; + } + let local = node.place.local.as_usize(); + if node.place.local == rustc_middle::mir::RETURN_PLACE { + Some(None) + } else if local > 0 && (local - 1) < arg_count { + Some(Some(node.place.local.as_u32() as u8 - 1)) + } else { + None + } +} + +impl<'tcx> TransformCallString for PartialGraph<'tcx> { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + let recurse_node = |n: &DepNode<'tcx>| n.transform_call_string(&f); + Self { + generics: self.generics, + asyncness: self.asyncness, + nodes: self.nodes.iter().map(recurse_node).collect(), + edges: self + .edges + .iter() + .map(|(from, to, e)| { + ( + recurse_node(from), + recurse_node(to), + e.transform_call_string(&f), + ) + }) + .collect(), + monos: self + .monos + .iter() + .map(|(cs, args)| (f(*cs), *args)) + .collect(), + def_id: self.def_id, + arg_count: self.arg_count, } } } @@ -304,36 +376,29 @@ impl<'tcx> PartialGraph<'tcx> { /// Abstracts over how previously written [`Artifact`]s are retrieved, allowing /// the user of this module to chose where to store them. pub trait ArtifactLoader<'tcx> { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>>; + fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>>; } /// Intermediate data that gets stored for each crate. -pub type Artifact<'tcx> = FxHashMap>; +pub type Artifact<'tcx> = FxHashMap>; /// An [`ArtifactLoader`] that always returns `None`. pub struct NoLoader; impl<'tcx> ArtifactLoader<'tcx> for NoLoader { - fn load(&self, _: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + fn load(&self, _: DefId) -> Option<&PartialGraph<'tcx>> { None } } impl<'tcx, T: ArtifactLoader<'tcx>> ArtifactLoader<'tcx> for Rc { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { (**self).load(function) } } impl<'tcx, T: ArtifactLoader<'tcx>> ArtifactLoader<'tcx> for Box { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { (**self).load(function) } } - -#[derive(TyEncodable, TyDecodable, Debug, Clone)] -pub struct SubgraphDescriptor<'tcx> { - pub graph: PartialGraph<'tcx>, - pub(crate) parentable_srcs: Vec<(DepNode<'tcx>, Option)>, - pub(crate) parentable_dsts: Vec<(DepNode<'tcx>, Option)>, -} diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 195a667baf..999fb80fd6 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -16,7 +16,7 @@ extern crate rustc_target; extern crate rustc_type_ir; pub use async_support::{determine_async, is_async_trait_fn, Asyncness}; -pub use graph::{Artifact, ArtifactLoader, DepGraph, NoLoader, SubgraphDescriptor}; +pub use graph::{Artifact, ArtifactLoader, DepGraph, NoLoader, PartialGraph}; pub mod callback; pub use crate::construct::MemoPdgConstructor; pub use callback::{ diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 244ca61be1..5896398ebf 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -10,7 +10,7 @@ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ - graph::InternedString, Asyncness, DepGraph, MemoPdgConstructor, ArtifactLoader, SubgraphDescriptor, + graph::InternedString, ArtifactLoader, Asyncness, DepGraph, MemoPdgConstructor, PartialGraph, }; use rustc_hash::FxHashMap; @@ -55,7 +55,7 @@ pub enum MetadataLoaderError { use MetadataLoaderError::*; impl<'tcx> ArtifactLoader<'tcx> for MetadataLoader<'tcx> { - fn load(&self, function: DefId) -> Option<&SubgraphDescriptor<'tcx>> { + fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { self.get_metadata(function.krate) .ok()? .pdgs @@ -136,7 +136,7 @@ impl<'tcx> MetadataLoader<'tcx> { } #[derive(Clone, Debug, TyEncodable, TyDecodable)] pub struct Metadata<'tcx> { - pub pdgs: FxHashMap>, + pub pdgs: FxHashMap>, pub bodies: FxHashMap>, pub local_annotations: HashMap>, pub reachable_markers: HashMap<(DefIndex, GenericArgsRef<'tcx>), Box<[InternedString]>>, @@ -153,17 +153,16 @@ impl<'tcx> Metadata<'tcx> { impl<'tcx> Metadata<'tcx> { pub fn from_pdgs( tcx: TyCtxt<'tcx>, - pdgs: FxHashMap>, + pdgs: FxHashMap>, markers: &MarkerDatabase<'tcx>, ) -> Self { let mut bodies: FxHashMap = Default::default(); for location in pdgs.values().flat_map(|subgraph| { subgraph - .graph .nodes .iter() .map(|n| &n.at) - .chain(subgraph.graph.edges.iter().map(|e| &e.2.at)) + .chain(subgraph.edges.iter().map(|e| &e.2.at)) .flat_map(|at| at.iter()) }) { if let Some(local) = location.function.as_local() { @@ -240,7 +239,7 @@ impl<'tcx> MetadataLoader<'tcx> { pub fn get_mono(&self, cs: CallString) -> Result> { let get_graph = |key: DefId| { let meta = self.get_metadata(key.krate)?; - anyhow::Ok(&meta.pdgs.get(&key.index).ok_or(NoPdgForItem(key))?.graph) + anyhow::Ok(meta.pdgs.get(&key.index).ok_or(NoPdgForItem(key))?) }; if let Some(caller) = cs.caller() { let key = caller.root().function; @@ -271,7 +270,6 @@ impl<'tcx> MetadataLoader<'tcx> { .ok()? .pdgs .get(&key.index)? - .graph .asyncness, ) })() From 88a9d0074fb1cb627596e8233fade6b6a144bc7f Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 25 May 2024 17:36:31 -0700 Subject: [PATCH 42/95] Encapsulation for PartialGraph --- .../src/construct.rs | 13 ----- .../flowistry_pdg_construction/src/graph.rs | 35 ++++++++++--- .../flowistry_pdg_construction/src/utils.rs | 3 ++ crates/paralegal-flow/src/ana/metadata.rs | 51 ++++++++----------- 4 files changed, 53 insertions(+), 49 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 371a8dab18..6c0fdb17bb 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -286,19 +286,6 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t } } -fn as_arg<'tcx>(node: &DepNode<'tcx>, def_id: LocalDefId, body: &Body<'tcx>) -> Option> { - if node.at.leaf().function != def_id.to_def_id() { - return None; - } - if node.place.local == RETURN_PLACE { - Some(None) - } else if node.place.is_arg(body) { - Some(Some(node.place.local.as_u32() as u8 - 1)) - } else { - None - } -} - impl<'tcx> PartialGraph<'tcx> { fn modular_mutation_visitor<'a, 'mir>( &'a mut self, diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 73f1bdc075..9ccedfea50 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -24,7 +24,7 @@ use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; use serde::{Deserialize, Serialize}; -use crate::{construct::TransformCallString, Asyncness}; +use crate::{construct::TransformCallString, utils::Captures, Asyncness}; /// A node in the program dependency graph. /// @@ -280,18 +280,41 @@ impl Decodable for InternedString { } } +/// A PDG that is fit for combining with other PDGs #[derive(Debug, Clone, TyDecodable, TyEncodable)] pub struct PartialGraph<'tcx> { - pub nodes: FxHashSet>, - pub edges: FxHashSet<(DepNode<'tcx>, DepNode<'tcx>, DepEdge)>, - pub monos: FxHashMap>, - pub generics: GenericArgsRef<'tcx>, - pub asyncness: Asyncness, + pub(crate) nodes: FxHashSet>, + pub(crate) edges: FxHashSet<(DepNode<'tcx>, DepNode<'tcx>, DepEdge)>, + pub(crate) monos: FxHashMap>, + pub(crate) generics: GenericArgsRef<'tcx>, + pub(crate) asyncness: Asyncness, def_id: DefId, arg_count: usize, } impl<'tcx> PartialGraph<'tcx> { + pub fn mentioned_call_string<'a>( + &'a self, + ) -> impl Iterator + Captures<'tcx> + 'a { + self.nodes + .iter() + .map(|n| &n.at) + .chain(self.edges.iter().map(|e| &e.2.at)) + .copied() + } + + pub fn get_mono(&self, cs: CallString) -> Option> { + if let Some(caller) = cs.caller() { + self.monos.get(&caller).copied() + } else { + Some(self.generics) + } + } + + pub fn asyncness(&self) -> Asyncness { + self.asyncness + } + pub fn new( asyncness: Asyncness, generics: GenericArgsRef<'tcx>, diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 33f81a9027..60ff001ca8 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -19,6 +19,9 @@ use rustc_span::ErrorGuaranteed; use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; +pub trait Captures<'a> {} +impl<'a, T: ?Sized> Captures<'a> for T {} + /// An async check that does not crash if called on closures. pub fn is_async<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> bool { !tcx.is_closure(def_id) && tcx.asyncness(def_id).is_async() diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 5896398ebf..afd63f49b3 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -157,20 +157,18 @@ impl<'tcx> Metadata<'tcx> { markers: &MarkerDatabase<'tcx>, ) -> Self { let mut bodies: FxHashMap = Default::default(); - for location in pdgs.values().flat_map(|subgraph| { - subgraph - .nodes - .iter() - .map(|n| &n.at) - .chain(subgraph.edges.iter().map(|e| &e.2.at)) - .flat_map(|at| at.iter()) - }) { - if let Some(local) = location.function.as_local() { - bodies.entry(local.local_def_index).or_insert_with(|| { - let info = BodyInfo::from_body(tcx, local); - trace!("Created info for body {local:?}\n{info:?}"); - info - }); + for call_string in pdgs + .values() + .flat_map(|subgraph| subgraph.mentioned_call_string()) + { + for location in call_string.iter() { + if let Some(local) = location.function.as_local() { + bodies.entry(local.local_def_index).or_insert_with(|| { + let info = BodyInfo::from_body(tcx, local); + trace!("Created info for body {local:?}\n{info:?}"); + info + }); + } } } let cache_borrow = markers.reachable_markers.borrow(); @@ -237,21 +235,14 @@ impl<'tcx> MetadataLoader<'tcx> { } pub fn get_mono(&self, cs: CallString) -> Result> { - let get_graph = |key: DefId| { - let meta = self.get_metadata(key.krate)?; - anyhow::Ok(meta.pdgs.get(&key.index).ok_or(NoPdgForItem(key))?) - }; - if let Some(caller) = cs.caller() { - let key = caller.root().function; - let monos = &get_graph(key)?.monos; - trace!("Known monos for {key:?} are"); - for (k, v) in monos { - trace!(" {k}: {v:?}"); - } - Ok(*monos.get(&caller).ok_or(NoGenericsKnownForCallSite(cs))?) - } else { - Ok(get_graph(cs.leaf().function)?.generics) - } + let key = cs.root().function; + let meta = self.get_metadata(key.krate)?; + Ok(meta + .pdgs + .get(&key.index) + .ok_or(NoPdgForItem(key))? + .get_mono(cs) + .ok_or(NoGenericsKnownForCallSite(cs))?) } pub fn get_pdg(&self, key: DefId) -> Result> { @@ -270,7 +261,7 @@ impl<'tcx> MetadataLoader<'tcx> { .ok()? .pdgs .get(&key.index)? - .asyncness, + .asyncness(), ) })() .unwrap_or(Asyncness::No) From b99aaf5cb295dd82922b199a7b3079bb17c35572 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 25 May 2024 17:46:10 -0700 Subject: [PATCH 43/95] Unexport --- crates/paralegal-flow/src/lib.rs | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 40b435ed77..a0760fbe5e 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -42,27 +42,23 @@ extern crate rustc_span; extern crate rustc_target; extern crate rustc_type_ir; -pub extern crate either; +extern crate either; use std::borrow::Cow; -pub use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use std::{fmt::Display, time::Instant}; -pub use rustc_hir::def_id::{DefId, LocalDefId}; -pub use rustc_hir::BodyId; -pub use rustc_middle::dep_graph::DepGraph; -pub use rustc_middle::mir::Location; -pub use rustc_middle::ty; -pub use rustc_span::Symbol; -pub use rustc_type_ir::sty; -pub use ty::TyCtxt; +use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_middle::ty; +use rustc_span::Symbol; +use ty::TyCtxt; use rustc_driver::Compilation; use rustc_plugin::CrateFilter; use rustc_utils::mir::borrowck_facts; -pub use paralegal_spdg as desc; +use paralegal_spdg as desc; use crate::{ ana::{MetadataLoader, SPDGGenerator}, @@ -70,26 +66,26 @@ use crate::{ stats::{Stats, TimedStat}, utils::Print, }; -pub use args::{AnalysisCtrl, Args, BuildConfig, DepConfig, DumpArgs, ModelCtrl}; +use args::{AnalysisCtrl, Args}; use args::{ClapArgs, LogLevelConfig}; use consts::INTERMEDIATE_ARTIFACT_EXT; use desc::utils::write_sep; use anyhow::{anyhow, Context as _, Result}; -pub use either::Either; +use either::Either; // This import is sort of special because it comes from the private rustc // dependencies and not from our `Cargo.toml`. -pub mod ana; -pub mod ann; +mod ana; +mod ann; mod args; -pub mod dbg; +mod dbg; mod discover; mod stats; //mod sah; #[macro_use] -pub mod utils; -pub mod consts; +mod utils; +mod consts; #[cfg(feature = "test")] pub mod test_utils; From f4341600c8b4d04bdf2185958969b54eaa723db5 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sun, 26 May 2024 09:40:42 -0700 Subject: [PATCH 44/95] Misc --- crates/paralegal-flow/src/args.rs | 5 ++++- crates/paralegal-flow/src/lib.rs | 7 +++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/paralegal-flow/src/args.rs b/crates/paralegal-flow/src/args.rs index 4606b1b511..0788b87cbf 100644 --- a/crates/paralegal-flow/src/args.rs +++ b/crates/paralegal-flow/src/args.rs @@ -106,6 +106,7 @@ impl TryFrom for Args { } } +/// Post-processed command line and environment arguments. #[derive(serde::Serialize, serde::Deserialize)] pub struct Args { /// Print additional logging output (up to the "info" level) @@ -184,7 +185,7 @@ pub struct ClapArgs { } #[derive(Clone, clap::Args)] -pub struct ParseableDumpArgs { +struct ParseableDumpArgs { /// Generate intermediate of various formats and at various stages of /// compilation. A short description of each value is provided here, for a /// more comprehensive explanation refer to the [notion page on @@ -302,6 +303,7 @@ impl std::fmt::Display for LogLevelConfig { } impl Args { + /// Are we targeting a specific crate pub fn target(&self) -> Option<&str> { self.target.as_deref() } @@ -318,6 +320,7 @@ impl Args { &self.anactrl } + /// Configuration related to modeling pub fn modelctrl(&self) -> &ModelCtrl { &self.modelctrl } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index a0760fbe5e..38e5977c35 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -63,13 +63,12 @@ use paralegal_spdg as desc; use crate::{ ana::{MetadataLoader, SPDGGenerator}, ann::db::MarkerCtx, + args::{AnalysisCtrl, Args, ClapArgs, LogLevelConfig}, + consts::INTERMEDIATE_ARTIFACT_EXT, + desc::utils::write_sep, stats::{Stats, TimedStat}, utils::Print, }; -use args::{AnalysisCtrl, Args}; -use args::{ClapArgs, LogLevelConfig}; -use consts::INTERMEDIATE_ARTIFACT_EXT; -use desc::utils::write_sep; use anyhow::{anyhow, Context as _, Result}; use either::Either; From 7a7df79f3ac9bc0191706e12b1075d0e19bffbb1 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 13:43:50 -0700 Subject: [PATCH 45/95] Additional test case for cross crate --- crates/paralegal-flow/tests/cross-crate.rs | 11 +++++++++++ .../tests/cross-crate/dependency/src/lib.rs | 18 ++++++++++++++++++ .../tests/cross-crate/entry/src/main.rs | 11 ++++++++--- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/crates/paralegal-flow/tests/cross-crate.rs b/crates/paralegal-flow/tests/cross-crate.rs index cfce68ff82..764b3db0fb 100644 --- a/crates/paralegal-flow/tests/cross-crate.rs +++ b/crates/paralegal-flow/tests/cross-crate.rs @@ -36,3 +36,14 @@ define_test!(basic_marker: graph -> { let marker = Identifier::new_intern("mark"); assert!(dbg!(&graph.spdg().markers).iter().any(|(_, markers)| markers.contains(&marker))) }); + +define_test!(assigns_marker: graph -> { + let sources = graph.marked(Identifier::new_intern("source")); + let mark = graph.marked(Identifier::new_intern("mark")); + let target = graph.marked(Identifier::new_intern("target")); + assert!(!sources.is_empty()); + assert!(!mark.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_data(&mark)); + assert!(mark.flows_to_data(&target)); +}); diff --git a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs index 12c67474c8..db92f1ea42 100644 --- a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs +++ b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs @@ -6,3 +6,21 @@ pub fn find_me(a: usize, _b: usize) -> usize { pub fn source() -> usize { 0 } + +#[paralegal::marker(mark, return)] +fn taint_it(_: usize) -> usize { + 9 +} + +pub fn assign_marker(a: usize) -> usize { + taint_it(a) +} + +pub fn find_me_generic(a: A, _b: A) -> A { + a +} + +#[paralegal::marker(mark, return)] +pub fn generic_source() -> A { + unimplemented!() +} diff --git a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs index 0d9d42a726..f047d05163 100644 --- a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs +++ b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs @@ -1,8 +1,8 @@ extern crate dependency; -use dependency::{find_me, source}; +use dependency::{assign_marker, find_me, source}; -#[paralegal::marker(source)] +#[paralegal::marker(source, return)] fn src() -> usize { 0 } @@ -12,7 +12,7 @@ fn not_src() -> usize { 1 } -#[paralegal::marker(target)] +#[paralegal::marker(target, arguments = [0])] fn target(u: usize) {} #[paralegal::analyze] @@ -25,4 +25,9 @@ fn basic_marker() { target(source()); } +#[paralegal::analyze] +fn assigns_marker() { + target(assign_marker(src())); +} + fn main() {} From cb685088655f9d0b1ca378b7dbbe8940f470757a Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 13:46:55 -0700 Subject: [PATCH 46/95] Test case for generic cross crate --- crates/paralegal-flow/tests/cross-crate.rs | 22 +++++++++++++++++++ .../tests/cross-crate/dependency/src/lib.rs | 8 +++++-- .../tests/cross-crate/entry/src/main.rs | 12 +++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/crates/paralegal-flow/tests/cross-crate.rs b/crates/paralegal-flow/tests/cross-crate.rs index 764b3db0fb..2c60cd3dde 100644 --- a/crates/paralegal-flow/tests/cross-crate.rs +++ b/crates/paralegal-flow/tests/cross-crate.rs @@ -47,3 +47,25 @@ define_test!(assigns_marker: graph -> { assert!(sources.flows_to_data(&mark)); assert!(mark.flows_to_data(&target)); }); + +define_test!(basic_generic : graph -> { + let src_fn = graph.function("src"); + let src = graph.call_site(&src_fn); + let not_src_fn = graph.function("not_src"); + let not_src = graph.call_site(¬_src_fn); + let target_fn = graph.function("target"); + let target = graph.call_site(&target_fn); + assert!(src.output().flows_to_data(&target.input())); + assert!(!not_src.output().flows_to_data(&target.input())); +}); + +define_test!(assigns_marker_generic: graph -> { + let sources = graph.marked(Identifier::new_intern("source")); + let mark = graph.marked(Identifier::new_intern("mark")); + let target = graph.marked(Identifier::new_intern("target")); + assert!(!sources.is_empty()); + assert!(!mark.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_data(&mark)); + assert!(mark.flows_to_data(&target)); +}); diff --git a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs index db92f1ea42..194dd768be 100644 --- a/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs +++ b/crates/paralegal-flow/tests/cross-crate/dependency/src/lib.rs @@ -8,8 +8,8 @@ pub fn source() -> usize { } #[paralegal::marker(mark, return)] -fn taint_it(_: usize) -> usize { - 9 +fn taint_it(_: A) -> A { + unimplemented!() } pub fn assign_marker(a: usize) -> usize { @@ -24,3 +24,7 @@ pub fn find_me_generic(a: A, _b: A) -> A { pub fn generic_source() -> A { unimplemented!() } + +pub fn assign_marker_generic(a: A) -> A { + taint_it(a) +} diff --git a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs index f047d05163..66f47a24c4 100644 --- a/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs +++ b/crates/paralegal-flow/tests/cross-crate/entry/src/main.rs @@ -1,6 +1,6 @@ extern crate dependency; -use dependency::{assign_marker, find_me, source}; +use dependency::{assign_marker, assign_marker_generic, find_me, find_me_generic, source}; #[paralegal::marker(source, return)] fn src() -> usize { @@ -30,4 +30,14 @@ fn assigns_marker() { target(assign_marker(src())); } +#[paralegal::analyze] +fn basic_generic() { + target(find_me_generic(src(), not_src())) +} + +#[paralegal::analyze] +fn assigns_marker_generic() { + target(assign_marker_generic(src())); +} + fn main() {} From 89f00835733104f6fbd5f7fee842b495ff13dfba Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 13:54:18 -0700 Subject: [PATCH 47/95] Simple generics suppport --- crates/paralegal-flow/src/ana/metadata.rs | 8 ++++---- crates/paralegal-flow/src/discover.rs | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index afd63f49b3..f8b8e73c5d 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -56,10 +56,12 @@ use MetadataLoaderError::*; impl<'tcx> ArtifactLoader<'tcx> for MetadataLoader<'tcx> { fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { - self.get_metadata(function.krate) + let res = self + .get_metadata(function.krate) .ok()? .pdgs - .get(&function.index) + .get(&function.index); + res } } @@ -210,7 +212,6 @@ impl<'tcx> MetadataLoader<'tcx> { let paths = self.tcx.crate_extern_paths(key); for path in paths { let path = path.with_extension(INTERMEDIATE_ARTIFACT_EXT); - println!("Trying to load file {}", path.display()); let Ok(mut file) = File::open(path) else { continue; }; @@ -218,7 +219,6 @@ impl<'tcx> MetadataLoader<'tcx> { file.read_to_end(&mut buf).unwrap(); let mut decoder = ParalegalDecoder::new(self.tcx, buf.as_slice()); let meta = Metadata::decode(&mut decoder); - println!("Successfully loaded"); return Some(meta); } None diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index e9f7d7f6b1..c58683fa54 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -134,10 +134,8 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { name: *name, def_id: id, }); - self.emit_target_collector.push(id); - } else if self.tcx.generics_of(id).count() == 0 { - self.emit_target_collector.push(id) } + self.emit_target_collector.push(id) } _ => (), } From 7085558f5794b6bd4a0fc7caddddf1ece72e6a70 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 15:23:29 -0700 Subject: [PATCH 48/95] Enable cross crate tests in CI --- Makefile.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.toml b/Makefile.toml index 04afef64e2..7532b0986c 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -65,6 +65,8 @@ args = [ "--test", "async_tests", "--no-fail-fast", + "--test", + "cross-crate", ] [tasks.policy-framework-tests] From c5a03fc940581a8ec5e6db05fb6d324414fd9487 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 15:24:13 -0700 Subject: [PATCH 49/95] Clippy fixes --- crates/flowistry_pdg_construction/src/construct.rs | 2 +- crates/flowistry_pdg_construction/src/graph.rs | 2 +- crates/flowistry_pdg_construction/src/utils.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 6c0fdb17bb..dc94e4c038 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -349,7 +349,7 @@ impl<'tcx> PartialGraph<'tcx> { } CallHandling::ApproxAsyncSM(how) => { how( - &constructor, + constructor, &mut self.modular_mutation_visitor(results, state), args, *destination, diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 9ccedfea50..7302d5f491 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -353,7 +353,7 @@ impl<'tcx> PartialGraph<'tcx> { } } -fn as_arg<'tcx>(node: &DepNode<'tcx>, def_id: DefId, arg_count: usize) -> Option> { +fn as_arg(node: &DepNode<'_>, def_id: DefId, arg_count: usize) -> Option> { if node.at.leaf().function != def_id { return None; } diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 60ff001ca8..d0f96cbe33 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -23,7 +23,7 @@ pub trait Captures<'a> {} impl<'a, T: ?Sized> Captures<'a> for T {} /// An async check that does not crash if called on closures. -pub fn is_async<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> bool { +pub fn is_async(tcx: TyCtxt<'_>, def_id: DefId) -> bool { !tcx.is_closure(def_id) && tcx.asyncness(def_id).is_async() } From 6c1c167473e8f5952fc205e89dd0e781b53b9ce0 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 15:57:22 -0700 Subject: [PATCH 50/95] Fix warnings --- crates/paralegal-flow/src/ann/db.rs | 29 ---------------------- crates/paralegal-flow/src/ann/mod.rs | 2 ++ crates/paralegal-flow/src/ann/parse.rs | 1 + crates/paralegal-flow/src/dbg.rs | 25 ------------------- crates/paralegal-flow/src/lib.rs | 1 - crates/paralegal-flow/src/utils/mod.rs | 10 +++----- crates/paralegal-flow/src/utils/resolve.rs | 2 -- 7 files changed, 6 insertions(+), 64 deletions(-) delete mode 100644 crates/paralegal-flow/src/dbg.rs diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 39b0c3be47..ff26823e6a 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -143,22 +143,6 @@ impl<'tcx> MarkerCtx<'tcx> { self.is_attribute_marked(did) || self.is_externally_marked(did) } - /// Direct access to the loaded database of external markers. - #[inline] - pub fn external_annotations(&self) -> &ExternalMarkers { - &self.db().external_annotations - } - - /// Are there markers reachable from this (function)? - /// - /// Returns true if the item itself carries a marker *or* if one of the - /// functions called in its body are marked. - /// - /// XXX Does not take into account reachable type markers - pub fn marker_is_reachable(&self, res: Instance<'tcx>) -> bool { - self.is_marked(res.def_id()) || self.has_transitive_reachable_markers(res) - } - /// Queries the transitive marker cache. pub fn has_transitive_reachable_markers(&self, res: Instance<'tcx>) -> bool { !self.get_reachable_markers(res).is_empty() @@ -379,14 +363,6 @@ impl<'tcx> MarkerCtx<'tcx> { .into_iter() } - pub fn type_has_surface_markers(&self, ty: ty::Ty) -> Option { - let def_id = ty.defid()?; - self.combined_markers(def_id) - .next() - .is_some() - .then_some(def_id) - } - /// All markers placed on this function, directly or through the type plus /// the type that was marked (if any). pub fn all_function_markers<'a>( @@ -445,11 +421,6 @@ impl<'tcx> MarkerCtx<'tcx> { .map(move |ann| (id, Cow::Owned(Annotation::Marker(ann.clone())))) })) } - - pub fn functions_seen(&self) -> Vec> { - let cache = self.0.reachable_markers.borrow(); - cache.keys().copied().collect::>() - } } pub type TypeMarkerElem = (DefId, InternedString); diff --git a/crates/paralegal-flow/src/ann/mod.rs b/crates/paralegal-flow/src/ann/mod.rs index 011293ca42..26ed90bab8 100644 --- a/crates/paralegal-flow/src/ann/mod.rs +++ b/crates/paralegal-flow/src/ann/mod.rs @@ -41,6 +41,7 @@ impl Annotation { } } + #[allow(dead_code)] /// If this is an [`Annotation::OType`], returns the underlying [`TypeId`]. pub fn as_otype(&self) -> Option { match self { @@ -49,6 +50,7 @@ impl Annotation { } } + #[allow(dead_code)] /// If this is an [`Annotation::Exception`], returns the underlying [`ExceptionAnnotation`]. pub fn as_exception(&self) -> Option<&ExceptionAnnotation> { match self { diff --git a/crates/paralegal-flow/src/ann/parse.rs b/crates/paralegal-flow/src/ann/parse.rs index 3dfb043fc9..55532d7f5f 100644 --- a/crates/paralegal-flow/src/ann/parse.rs +++ b/crates/paralegal-flow/src/ann/parse.rs @@ -176,6 +176,7 @@ pub fn assert_token<'a>(k: TokenKind) -> impl FnMut(I<'a>) -> R<'a, ()> { ) } +#[allow(dead_code)] /// Parse something dictionnary-like. /// /// Expects the next token to be a braces delimited subtree containing pairs of diff --git a/crates/paralegal-flow/src/dbg.rs b/crates/paralegal-flow/src/dbg.rs deleted file mode 100644 index a11336176d..0000000000 --- a/crates/paralegal-flow/src/dbg.rs +++ /dev/null @@ -1,25 +0,0 @@ -//! Helpers for debugging -//! -//! Defines pretty printers and dot graph output. -//! -//! Often times the pretty printers wrappers around references to graph structs, -//! like [PrintableMatrix]. These wrappers have -//! `Debug` and/or `Display` implementations so that you can flexibly print them -//! to stdout, a file or a log statement. Some take additional information (such -//! as [TyCtxt]) to get contextual information that is used to make the output -//! more useful. -use rustc_middle::mir; - -/// All locations that a body has (helper) -pub fn locations_of_body<'a: 'tcx, 'tcx>( - body: &'a mir::Body<'tcx>, -) -> impl Iterator + 'a + 'tcx { - body.basic_blocks - .iter_enumerated() - .flat_map(|(block, dat)| { - (0..=dat.statements.len()).map(move |statement_index| mir::Location { - block, - statement_index, - }) - }) -} diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 38e5977c35..5a1795ddf0 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -78,7 +78,6 @@ use either::Either; mod ana; mod ann; mod args; -mod dbg; mod discover; mod stats; //mod sah; diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 4f8798f5a6..7b9ea2a42b 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -321,8 +321,6 @@ pub enum AsFnAndArgsErr<'tcx> { NotAConstant, #[error("is not a function type: {0:?}")] NotFunctionType(ty::TyKind<'tcx>), - #[error("is not a `Val` constant: {0}")] - NotValueLevelConstant(ty::Const<'tcx>), #[error("terminator is not a `Call`")] NotAFunctionCall, #[error("function instance could not be resolved")] @@ -415,6 +413,7 @@ pub enum Overlap<'tcx> { } impl<'tcx> Overlap<'tcx> { + #[allow(dead_code)] pub fn contains_other(self) -> bool { matches!(self, Overlap::Equal | Overlap::Parent(_)) } @@ -652,10 +651,6 @@ pub enum BodyResolutionError { #[error("not a function-like object")] /// The provided id did not refer to a function-like object. NotAFunction, - #[error("body not available")] - /// The provided id refers to an external entity and we have no access to - /// its body - External, /// The function refers to a trait item (not an `impl` item or raw `fn`) #[error("is associated function of trait {0:?}")] IsTraitAssocFn(DefId), @@ -717,7 +712,6 @@ impl<'tcx> TyCtxtExt<'tcx> for TyCtxt<'tcx> { Err(e) => { let sess = self.sess; match e { - BodyResolutionError::External => (), BodyResolutionError::IsTraitAssocFn(r#trait) => { sess.struct_span_warn( self.def_span(local_def_id.to_def_id()), @@ -758,6 +752,7 @@ pub fn with_temporary_logging_level R>(filter: log::LevelFilte r } +#[allow(dead_code)] pub fn time R>(msg: &str, f: F) -> R { info!("Starting {msg}"); let time = std::time::Instant::now(); @@ -794,6 +789,7 @@ impl IntoBodyId for DefId { } } +#[allow(dead_code)] pub fn map_either( either: Either, f: impl FnOnce(A) -> C, diff --git a/crates/paralegal-flow/src/utils/resolve.rs b/crates/paralegal-flow/src/utils/resolve.rs index 78ba5669de..a6ba09907f 100644 --- a/crates/paralegal-flow/src/utils/resolve.rs +++ b/crates/paralegal-flow/src/utils/resolve.rs @@ -35,8 +35,6 @@ pub enum ResolutionError { EmptyStarts, #[error("non-convertible resolution {:?}", .0)] UnconvertibleRes(def::Res), - #[error("could not resolve crate {}", .0)] - CouldNotResolveCrate(String), } #[derive(Clone, Debug)] From 5d46a1de1dc9f3a8c543aebae5c4d01cd9c650d8 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 16:19:57 -0700 Subject: [PATCH 51/95] Clippy wants me to rename this --- crates/paralegal-flow/src/ana/metadata.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index f8b8e73c5d..567358ef8a 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -43,9 +43,9 @@ pub struct MetadataLoader<'tcx> { #[derive(Debug, Error)] pub enum MetadataLoaderError { #[error("no pdg for item {:?}", .0)] - NoPdgForItem(DefId), + PdgForItemMissing(DefId), #[error("no metadata for crate {}", tls::with(|tcx| tcx.crate_name(*.0)))] - NoMetadataForCrate(CrateNum), + MetadataForCrateMissing(CrateNum), #[error("no generics known for call site {0}")] NoGenericsKnownForCallSite(CallString), #[error("no metadata for item {:?} in crate {}", .0, tls::with(|tcx| tcx.crate_name(.0.krate)))] @@ -224,7 +224,7 @@ impl<'tcx> MetadataLoader<'tcx> { None }) .as_ref() - .ok_or(NoMetadataForCrate(key))?; + .ok_or(MetadataForCrateMissing(key))?; Ok(meta) } @@ -240,7 +240,7 @@ impl<'tcx> MetadataLoader<'tcx> { Ok(meta .pdgs .get(&key.index) - .ok_or(NoPdgForItem(key))? + .ok_or(PdgForItemMissing(key))? .get_mono(cs) .ok_or(NoGenericsKnownForCallSite(cs))?) } @@ -250,7 +250,7 @@ impl<'tcx> MetadataLoader<'tcx> { .get_metadata(key.krate)? .pdgs .get(&key.index) - .ok_or(NoPdgForItem(key))? + .ok_or(PdgForItemMissing(key))? .to_petgraph()) } From 88e6db02efcd58c175ff94e4a8d059c1d5f6ac82 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 28 May 2024 16:50:08 -0700 Subject: [PATCH 52/95] Extra docs and dead code removal --- .../src/callback.rs | 1 - .../src/construct.rs | 9 -------- .../flowistry_pdg_construction/src/graph.rs | 11 ++++++--- crates/paralegal-flow/src/ana/encoder.rs | 9 ++++++++ .../paralegal-flow/src/ana/graph_converter.rs | 4 +--- crates/paralegal-flow/src/ana/metadata.rs | 23 +++++++++++++++++++ 6 files changed, 41 insertions(+), 16 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/callback.rs b/crates/flowistry_pdg_construction/src/callback.rs index 816a43b853..68f21e6e28 100644 --- a/crates/flowistry_pdg_construction/src/callback.rs +++ b/crates/flowistry_pdg_construction/src/callback.rs @@ -11,7 +11,6 @@ pub trait CallChangeCallback<'tcx> { _resolution: Instance<'tcx>, _loc: Location, _under_analysis: Instance<'tcx>, - _call_string: Option, _reason: InlineMissReason, ) { } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index dc94e4c038..a44ded3a7d 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -889,14 +889,6 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { return Some(CallHandling::ApproxAsyncSM(handler)); }; - // if !resolved_def_id.is_local() { - // trace!( - // " Bailing because func is non-local: `{}`", - // tcx.def_path_str(resolved_def_id) - // ); - // return None; - // }; - let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args) { Ok(cc) => cc, Err(async_err) => { @@ -905,7 +897,6 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { resolved_fn, location, self.root, - None, InlineMissReason::Async(async_err), ) } diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 7302d5f491..68ec404aef 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -56,10 +56,15 @@ impl PartialEq for DepNode<'_> { place, at, place_pretty: _, - span: _, - is_split: _, + span, + is_split, } = *self; - (place, at).eq(&(other.place, other.at)) + let eq = (place, at).eq(&(other.place, other.at)); + if eq { + debug_assert_eq!(span, other.span); + debug_assert_eq!(is_split, other.is_split); + } + eq } } diff --git a/crates/paralegal-flow/src/ana/encoder.rs b/crates/paralegal-flow/src/ana/encoder.rs index e82f8fa0a3..18e65a4c00 100644 --- a/crates/paralegal-flow/src/ana/encoder.rs +++ b/crates/paralegal-flow/src/ana/encoder.rs @@ -1,3 +1,12 @@ +//! Readers and writers for the intermediate artifacts we store per crate. +//! +//! Most of this code is adapted/copied from `EncodeContext` and `DecodeContext` in +//! `rustc_metadata`. +//! +//! Note that the methods pertaining to allocations of `AllocId`'s are +//! unimplemented and will cause a crash if you try to stick an `AllocId` into +//! the Paralegal artifact. + use std::path::Path; use rustc_hash::FxHashMap; diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index c0cd63baa2..aa8528fc2e 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -539,7 +539,6 @@ impl<'tcx> CallChangeCallback<'tcx> for MyCallback<'tcx> { resolution: Instance<'tcx>, loc: Location, parent: Instance<'tcx>, - call_string: Option, reason: InlineMissReason, ) { let body = self @@ -554,9 +553,8 @@ impl<'tcx> CallChangeCallback<'tcx> for MyCallback<'tcx> { self.tcx.sess.span_err( span, format!( - "Could not inline this function call in {:?}, at {} because {reason:?}. {}", + "Could not inline this function call in {:?}, because {reason:?}. {}", parent.def_id(), - call_string.map_or("root".to_owned(), |c| c.to_string()), Print(|f| if markers_reachable.is_empty() { f.write_str("No markers are reachable") } else { diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 567358ef8a..91a5ce035f 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -1,3 +1,8 @@ +//! Per-crate intermediate data we store. +//! +//! [`Metadata`] is what gets stored, whereas a [`MetadataLoader`] is +//! responsible for reading/writing this data. + use crate::{ ann::{db::MarkerDatabase, Annotation}, consts::INTERMEDIATE_ARTIFACT_EXT, @@ -35,11 +40,14 @@ use super::{ graph_converter::MyCallback, inline_judge::InlineJudge, }; + +/// Manager responsible for reading and writing [`Metadata`] artifacts. pub struct MetadataLoader<'tcx> { tcx: TyCtxt<'tcx>, cache: Cache>>, } +/// The types of errors that can arise from interacting with the [`MetadataLoader`]. #[derive(Debug, Error)] pub enum MetadataLoaderError { #[error("no pdg for item {:?}", .0)] @@ -66,6 +74,13 @@ impl<'tcx> ArtifactLoader<'tcx> for MetadataLoader<'tcx> { } impl<'tcx> MetadataLoader<'tcx> { + /// Traverse the items of this crate, create PDGs and collect other relevant + /// information about them. Write the metadata to disk, but also register + /// them with the loader itself for downstream analyses. + /// + /// Returns which functions should be emitted for policy enforcement (e.g. + /// analysis targets) and a context of discovered markers suitable for query + /// during that analysis. pub fn collect_and_emit_metadata( self: Rc, args: &'static Args, @@ -136,6 +151,11 @@ impl<'tcx> MetadataLoader<'tcx> { }) } } + +/// Intermediate artifacts stored on disc for every crate. +/// +/// Contains PDGs and reduced information about the source code that is needed +/// downstream. #[derive(Clone, Debug, TyEncodable, TyDecodable)] pub struct Metadata<'tcx> { pub pdgs: FxHashMap>, @@ -153,6 +173,8 @@ impl<'tcx> Metadata<'tcx> { } impl<'tcx> Metadata<'tcx> { + /// Given a set of PDGs created, query additional information we need to + /// record from rustc and return a serializable metadata artifact. pub fn from_pdgs( tcx: TyCtxt<'tcx>, pdgs: FxHashMap>, @@ -268,6 +290,7 @@ impl<'tcx> MetadataLoader<'tcx> { } } +/// Effectively a reduced MIR [`Body`](rustc_middle::mir::Body). #[derive(Clone, Debug, TyEncodable, TyDecodable)] pub struct BodyInfo<'tcx> { pub arg_count: usize, From 2c31d83f81807b481ecc3a00b0e661f62e52ae80 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 31 May 2024 14:59:03 -0700 Subject: [PATCH 53/95] Splitting `construct` --- .../src/approximation.rs | 75 ++ .../src/async_support.rs | 3 +- .../src/calling_convention.rs | 2 +- .../src/construct.rs | 847 +----------------- .../flowistry_pdg_construction/src/graph.rs | 47 +- crates/flowistry_pdg_construction/src/lib.rs | 4 +- .../src/local_analysis.rs | 725 +++++++++++++++ .../flowistry_pdg_construction/src/utils.rs | 1 + crates/paralegal-flow/src/ana/metadata.rs | 4 +- 9 files changed, 870 insertions(+), 838 deletions(-) create mode 100644 crates/flowistry_pdg_construction/src/approximation.rs create mode 100644 crates/flowistry_pdg_construction/src/local_analysis.rs diff --git a/crates/flowistry_pdg_construction/src/approximation.rs b/crates/flowistry_pdg_construction/src/approximation.rs new file mode 100644 index 0000000000..623a96f792 --- /dev/null +++ b/crates/flowistry_pdg_construction/src/approximation.rs @@ -0,0 +1,75 @@ +use log::trace; + +use rustc_abi::VariantIdx; + +use rustc_hir::def_id::DefId; +use rustc_index::IndexVec; +use rustc_middle::{ + mir::{visit::Visitor, AggregateKind, Location, Operand, Place, Rvalue}, + ty::TyKind, +}; + +use crate::local_analysis::LocalAnalysis; + +pub(crate) type ApproximationHandler<'tcx, 'a> = + fn(&LocalAnalysis<'tcx, 'a>, &mut dyn Visitor<'tcx>, &[Operand<'tcx>], Place<'tcx>, Location); + +impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { + /// Special case behavior for calls to functions used in desugaring async functions. + /// + /// Ensures that functions like `Pin::new_unchecked` are not modularly-approximated. + pub(crate) fn can_approximate_async_functions( + &self, + def_id: DefId, + ) -> Option> { + let lang_items = self.tcx().lang_items(); + if Some(def_id) == lang_items.new_unchecked_fn() { + Some(Self::approximate_new_unchecked) + } else if Some(def_id) == lang_items.into_future_fn() + // FIXME: better way to get retrieve this stdlib DefId? + || self.tcx().def_path_str(def_id) == "::into_future" + { + Some(Self::approximate_into_future) + } else { + None + } + } + + fn approximate_into_future( + &self, + vis: &mut dyn Visitor<'tcx>, + args: &[Operand<'tcx>], + destination: Place<'tcx>, + location: Location, + ) { + trace!("Handling into_future as assign for {destination:?}"); + let [op] = args else { + unreachable!(); + }; + vis.visit_assign(&destination, &Rvalue::Use(op.clone()), location); + } + + fn approximate_new_unchecked( + &self, + vis: &mut dyn Visitor<'tcx>, + args: &[Operand<'tcx>], + destination: Place<'tcx>, + location: Location, + ) { + let lang_items = self.tcx().lang_items(); + let [op] = args else { + unreachable!(); + }; + let mut operands = IndexVec::new(); + operands.push(op.clone()); + let TyKind::Adt(adt_id, generics) = destination.ty(&self.body, self.tcx()).ty.kind() else { + unreachable!() + }; + assert_eq!(adt_id.did(), lang_items.pin_type().unwrap()); + let aggregate_kind = + AggregateKind::Adt(adt_id.did(), VariantIdx::from_u32(0), generics, None, None); + let rvalue = Rvalue::Aggregate(Box::new(aggregate_kind), operands); + trace!("Handling new_unchecked as assign for {destination:?}"); + vis.visit_assign(&destination, &rvalue, location); + } +} diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 53a1f2d79f..7f426b7465 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -15,7 +15,8 @@ use rustc_middle::{ }; use crate::{ - construct::{push_call_string_root, CallKind, LocalAnalysis}, + graph::push_call_string_root, + local_analysis::{CallKind, LocalAnalysis}, utils, PartialGraph, }; diff --git a/crates/flowistry_pdg_construction/src/calling_convention.rs b/crates/flowistry_pdg_construction/src/calling_convention.rs index 76b1ce91a5..7e151a226b 100644 --- a/crates/flowistry_pdg_construction/src/calling_convention.rs +++ b/crates/flowistry_pdg_construction/src/calling_convention.rs @@ -7,7 +7,7 @@ use rustc_middle::{ ty::TyCtxt, }; -use crate::{async_support::AsyncInfo, construct::CallKind, utils}; +use crate::{async_support::AsyncInfo, local_analysis::CallKind, utils}; pub enum CallingConvention<'tcx, 'a> { Direct(&'a [Operand<'tcx>]), diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index a44ded3a7d..d2fd3724f4 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -8,48 +8,35 @@ //! [`InstructionState`] at each instruction in the procedure. //! 2. [`PartialGraph`] implements [`ResultsVisitor`] over the analysis result -use std::{collections::HashSet, iter, rc::Rc}; +use std::rc::Rc; use either::Either; -use flowistry::mir::placeinfo::PlaceInfo; -use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; -use itertools::Itertools; -use log::{debug, log_enabled, trace, Level}; + +use flowistry_pdg::{CallString, GlobalLocation}; + +use log::trace; use petgraph::graph::DiGraph; -use rustc_abi::VariantIdx; -use rustc_borrowck::consumers::{places_conflict, BodyWithBorrowckFacts, PlaceConflictBias}; -use rustc_hash::{FxHashMap, FxHashSet}; -use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_hash::FxHashMap; +use rustc_hir::def_id::LocalDefId; use rustc_index::IndexVec; use rustc_middle::{ - mir::{ - visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, - Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, - }, - ty::{GenericArg, GenericArgsRef, Instance, List, TyCtxt, TyKind}, -}; -use rustc_mir_dataflow::{ - self as df, fmt::DebugWithContext, Analysis, AnalysisDomain, Results, ResultsVisitor, + mir::{visit::Visitor, AggregateKind, Location, Place, Rvalue, Terminator, TerminatorKind}, + ty::{GenericArgsRef, Instance, TyCtxt}, }; +use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; use rustc_span::ErrorGuaranteed; -use rustc_utils::{ - cache::Cache, - mir::{borrowck_facts, control_dependencies::ControlDependencies}, - BodyExt, PlaceExt, -}; +use rustc_utils::{cache::Cache, PlaceExt}; use crate::{ async_support::*, - calling_convention::*, - graph::{DepEdge, DepGraph, DepNode, PartialGraph, SourceUse, TargetUse}, - mutation::{ModularMutationVisitor, Mutation, Time}, - utils::{ - self, is_async, is_non_default_trait_method, manufacture_substs_for, try_monomorphize, - try_resolve_function, + graph::{ + push_call_string_root, DepEdge, DepGraph, DepNode, PartialGraph, SourceUse, TargetUse, }, - ArtifactLoader, Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, - SkipCall, + local_analysis::{CallHandling, InstructionState, LocalAnalysis}, + mutation::{ModularMutationVisitor, Mutation, Time}, + utils::{manufacture_substs_for, try_resolve_function}, + CallChangeCallback, GraphLoader, }; /// A memoizing constructor of PDGs. @@ -62,13 +49,13 @@ pub struct MemoPdgConstructor<'tcx> { pub(crate) dump_mir: bool, pub(crate) async_info: Rc, pub(crate) pdg_cache: PdgCache<'tcx>, - pub(crate) loader: Box + 'tcx>, + pub(crate) loader: Box + 'tcx>, } impl<'tcx> MemoPdgConstructor<'tcx> { /// Initialize the constructor, parameterized over an [`ArtifactLoader`] for /// retrieving PDGs of functions from dependencies. - pub fn new(tcx: TyCtxt<'tcx>, loader: impl ArtifactLoader<'tcx> + 'tcx) -> Self { + pub fn new(tcx: TyCtxt<'tcx>, loader: impl GraphLoader<'tcx> + 'tcx) -> Self { Self { tcx, call_change_callback: None, @@ -151,23 +138,6 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } -#[derive(PartialEq, Eq, Default, Clone, Debug)] -pub(crate) struct InstructionState<'tcx> { - last_mutation: FxHashMap, FxHashSet>, -} - -impl DebugWithContext for InstructionState<'_> {} - -impl<'tcx> df::JoinSemiLattice for InstructionState<'tcx> { - fn join(&mut self, other: &Self) -> bool { - utils::hashmap_join( - &mut self.last_mutation, - &other.last_mutation, - utils::hashset_join, - ) - } -} - impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>> for PartialGraph<'tcx> { @@ -460,7 +430,7 @@ impl<'tcx> PartialGraph<'tcx> { Either::Right(node) => vec![node], Either::Left(place) => results .analysis - .find_outputs(state, place, location) + .find_outputs(place, location) .into_iter() .map(|t| t.1) .collect(), @@ -496,23 +466,6 @@ impl<'tcx> PartialGraph<'tcx> { type PdgCache<'tcx> = Rc), PartialGraph<'tcx>>>; -pub(crate) struct LocalAnalysis<'tcx, 'a> { - pub(crate) memo: &'a MemoPdgConstructor<'tcx>, - pub(super) root: Instance<'tcx>, - body_with_facts: &'tcx BodyWithBorrowckFacts<'tcx>, - pub(crate) body: Body<'tcx>, - pub(crate) def_id: LocalDefId, - place_info: PlaceInfo<'tcx>, - control_dependencies: ControlDependencies, - pub(crate) body_assignments: utils::BodyAssignments, - start_loc: FxHashSet, -} - -fn other_as_arg<'tcx>(place: Place<'tcx>, body: &Body<'tcx>) -> Option { - (body.local_kind(place.local) == rustc_middle::mir::LocalKind::Arg) - .then(|| place.local.as_u32() as u8 - 1) -} - #[derive(Debug)] enum Inputs<'tcx> { Unresolved { @@ -524,714 +477,6 @@ enum Inputs<'tcx> { }, } -impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { - /// Creates [`GraphConstructor`] for a function resolved as `fn_resolution` in a given `calling_context`. - pub(crate) fn new( - memo: &'a MemoPdgConstructor<'tcx>, - root: Instance<'tcx>, - ) -> LocalAnalysis<'tcx, 'a> { - let tcx = memo.tcx; - let def_id = root.def_id().expect_local(); - let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, def_id); - let param_env = tcx.param_env_reveal_all_normalized(def_id); - // let param_env = match &calling_context { - // Some(cx) => cx.param_env, - // None => ParamEnv::reveal_all(), - // }; - let body = try_monomorphize(root, tcx, param_env, &body_with_facts.body); - - if memo.dump_mir { - use std::io::Write; - let path = tcx.def_path_str(def_id) + ".mir"; - let mut f = std::fs::File::create(path.as_str()).unwrap(); - write!(f, "{}", body.to_string(tcx).unwrap()).unwrap(); - debug!("Dumped debug MIR {path}"); - } - - let place_info = PlaceInfo::build(tcx, def_id.to_def_id(), body_with_facts); - let control_dependencies = body.control_dependencies(); - - let mut start_loc = FxHashSet::default(); - start_loc.insert(RichLocation::Start); - - let body_assignments = utils::find_body_assignments(&body); - - LocalAnalysis { - memo, - root, - body_with_facts, - body, - place_info, - control_dependencies, - start_loc, - def_id, - body_assignments, - } - } - - fn make_dep_node( - &self, - place: Place<'tcx>, - location: impl Into, - ) -> DepNode<'tcx> { - DepNode::new( - place, - self.make_call_string(location), - self.tcx(), - &self.body, - self.place_info.children(place).iter().any(|p| *p != place), - ) - } - - /// Returns all pairs of `(src, edge)`` such that the given `location` is control-dependent on `edge` - /// with input `src`. - fn find_control_inputs(&self, location: Location) -> Vec<(DepNode<'tcx>, DepEdge)> { - let mut blocks_seen = HashSet::::from_iter(Some(location.block)); - let mut block_queue = vec![location.block]; - let mut out = vec![]; - while let Some(block) = block_queue.pop() { - if let Some(ctrl_deps) = self.control_dependencies.dependent_on(block) { - for dep in ctrl_deps.iter() { - let ctrl_loc = self.body.terminator_loc(dep); - let Terminator { - kind: TerminatorKind::SwitchInt { discr, .. }, - .. - } = self.body.basic_blocks[dep].terminator() - else { - if blocks_seen.insert(dep) { - block_queue.push(dep); - } - continue; - }; - let Some(ctrl_place) = discr.place() else { - continue; - }; - let at = self.make_call_string(ctrl_loc); - let src = self.make_dep_node(ctrl_place, ctrl_loc); - let edge = DepEdge::control(at, SourceUse::Operand, TargetUse::Assign); - out.push((src, edge)); - } - } - } - out - } - - fn call_change_callback(&self) -> Option<&dyn CallChangeCallback<'tcx>> { - self.memo.call_change_callback.as_ref().map(Rc::as_ref) - } - - fn async_info(&self) -> &AsyncInfo { - &self.memo.async_info - } - - fn make_call_string(&self, location: impl Into) -> CallString { - CallString::single(GlobalLocation { - function: self.root.def_id(), - location: location.into(), - }) - } - - /// Returns the aliases of `place`. See [`PlaceInfo::aliases`] for details. - pub(crate) fn aliases(&'a self, place: Place<'tcx>) -> impl Iterator> + 'a { - // MASSIVE HACK ALERT: - // The issue is that monomorphization erases regions, due to how it's implemented in rustc. - // However, Flowistry's alias analysis uses regions to figure out aliases. - // To workaround this incompatibility, when we receive a monomorphized place, we try to - // recompute its type in the context of the original region-containing body as far as possible. - // - // For example, say _2: (&'0 impl Foo,) in the original body and _2: (&(i32, i32),) in the monomorphized body. - // Say we ask for aliases (*(_2.0)).0. Then we will retype ((*_2.0).0).0 and receive back (*_2.0: &'0 impl Foo). - // We can ask for the aliases in the context of the original body, receiving e.g. {_1}. - // Then we reproject the aliases with the remaining projection, to create {_1.0}. - // - // This is a massive hack bc it's inefficient and I'm not certain that it's sound. - let place_retyped = utils::retype_place( - place, - self.tcx(), - &self.body_with_facts.body, - self.def_id.to_def_id(), - ); - self.place_info.aliases(place_retyped).iter().map(|alias| { - let mut projection = alias.projection.to_vec(); - projection.extend(&place.projection[place_retyped.projection.len()..]); - Place::make(alias.local, &projection, self.tcx()) - }) - } - - pub(crate) fn tcx(&self) -> TyCtxt<'tcx> { - self.memo.tcx - } - - /// Returns all nodes `src` such that `src` is: - /// 1. Part of the value of `input` - /// 2. The most-recently modified location for `src` - fn find_data_inputs( - &self, - state: &InstructionState<'tcx>, - input: Place<'tcx>, - ) -> Vec> { - // Include all sources of indirection (each reference in the chain) as relevant places. - let provenance = input - .refs_in_projection() - .map(|(place_ref, _)| Place::from_ref(place_ref, self.tcx())); - let inputs = iter::once(input).chain(provenance); - - inputs - // **POINTER-SENSITIVITY:** - // If `input` involves indirection via dereferences, then resolve it to the direct places it could point to. - .flat_map(|place| self.aliases(place)) - .flat_map(|alias| { - // **FIELD-SENSITIVITY:** - // Find all places that have been mutated which conflict with `alias.` - let conflicts = state - .last_mutation - .iter() - .map(|(k, locs)| (*k, locs)) - .filter(move |(place, _)| { - if place.is_indirect() && place.is_arg(&self.body) { - // HACK: `places_conflict` seems to consider it a bug is `borrow_place` - // includes a dereference, which should only happen if `borrow_place` - // is an argument. So we special case that condition and just compare for local equality. - // - // TODO: this is not field-sensitive! - place.local == alias.local - } else { - let mut place = *place; - if let Some((PlaceElem::Deref, rest)) = place.projection.split_last() { - let mut new_place = place; - new_place.projection = self.tcx().mk_place_elems(rest); - if new_place.ty(&self.body, self.tcx()).ty.is_box() { - if new_place.is_indirect() { - // TODO might be unsound: We assume that if - // there are other indirections in here, - // there is an alias that does not have - // indirections in it. - return false; - } - place = new_place; - } - } - places_conflict( - self.tcx(), - &self.body, - place, - alias, - PlaceConflictBias::Overlap, - ) - } - }); - - // Special case: if the `alias` is an un-mutated argument, then include it as a conflict - // coming from the special start location. - let alias_last_mut = if alias.is_arg(&self.body) { - Some((alias, &self.start_loc)) - } else { - None - }; - - // For each `conflict`` last mutated at the locations `last_mut`: - conflicts - .chain(alias_last_mut) - .flat_map(|(conflict, last_mut_locs)| { - // For each last mutated location: - last_mut_locs.iter().map(move |last_mut_loc| { - // Return @ as an input node. - self.make_dep_node(conflict, *last_mut_loc) - }) - }) - }) - .collect() - } - - fn find_outputs( - &self, - _state: &InstructionState<'tcx>, - mutated: Place<'tcx>, - location: Location, - ) -> Vec<(Place<'tcx>, DepNode<'tcx>)> { - // **POINTER-SENSITIVITY:** - // If `mutated` involves indirection via dereferences, then resolve it to the direct places it could point to. - let aliases = self.aliases(mutated).collect_vec(); - - // **FIELD-SENSITIVITY:** we do NOT deal with fields on *writes* (in this function), - // only on *reads* (in `add_input_to_op`). - - // For each mutated `dst`: - aliases - .iter() - .map(|dst| { - // Create a destination node for (DST @ CURRENT_LOC). - (*dst, self.make_dep_node(*dst, location)) - }) - .collect() - } - - /// Updates the last-mutated location for `dst` to the given `location`. - fn apply_mutation( - &self, - state: &mut InstructionState<'tcx>, - location: Location, - mutated: Place<'tcx>, - ) { - self.find_outputs(state, mutated, location) - .into_iter() - .for_each(|(dst, _)| { - // Create a destination node for (DST @ CURRENT_LOC). - - // Clear all previous mutations. - let dst_mutations = state.last_mutation.entry(dst).or_default(); - dst_mutations.clear(); - - // Register that `dst` is mutated at the current location. - dst_mutations.insert(RichLocation::Location(location)); - }) - } - - /// Resolve a function [`Operand`] to a specific [`DefId`] and generic arguments if possible. - pub(crate) fn operand_to_def_id( - &self, - func: &Operand<'tcx>, - ) -> Option<(DefId, &'tcx List>)> { - let ty = func.ty(&self.body, self.tcx()); - utils::type_as_fn(self.tcx(), ty) - } - - fn fmt_fn(&self, def_id: DefId) -> String { - self.tcx().def_path_str(def_id) - } - - /// Special case behavior for calls to functions used in desugaring async functions. - /// - /// Ensures that functions like `Pin::new_unchecked` are not modularly-approximated. - fn can_approximate_async_functions( - &self, - def_id: DefId, - ) -> Option> { - let lang_items = self.tcx().lang_items(); - if Some(def_id) == lang_items.new_unchecked_fn() { - Some(Self::approximate_new_unchecked) - } else if Some(def_id) == lang_items.into_future_fn() - // FIXME: better way to get retrieve this stdlib DefId? - || self.tcx().def_path_str(def_id) == "::into_future" - { - Some(Self::approximate_into_future) - } else { - None - } - } - - fn approximate_into_future( - &self, - vis: &mut dyn Visitor<'tcx>, - args: &[Operand<'tcx>], - destination: Place<'tcx>, - location: Location, - ) { - trace!("Handling into_future as assign for {destination:?}"); - let [op] = args else { - unreachable!(); - }; - vis.visit_assign(&destination, &Rvalue::Use(op.clone()), location); - } - - fn approximate_new_unchecked( - &self, - vis: &mut dyn Visitor<'tcx>, - args: &[Operand<'tcx>], - destination: Place<'tcx>, - location: Location, - ) { - let lang_items = self.tcx().lang_items(); - let [op] = args else { - unreachable!(); - }; - let mut operands = IndexVec::new(); - operands.push(op.clone()); - let TyKind::Adt(adt_id, generics) = destination.ty(&self.body, self.tcx()).ty.kind() else { - unreachable!() - }; - assert_eq!(adt_id.did(), lang_items.pin_type().unwrap()); - let aggregate_kind = - AggregateKind::Adt(adt_id.did(), VariantIdx::from_u32(0), generics, None, None); - let rvalue = Rvalue::Aggregate(Box::new(aggregate_kind), operands); - trace!("Handling new_unchecked as assign for {destination:?}"); - vis.visit_assign(&destination, &rvalue, location); - } - - fn determine_call_handling<'b>( - &'b self, - location: Location, - func: &Operand<'tcx>, - args: &'b [Operand<'tcx>], - ) -> Option> { - let tcx = self.tcx(); - - let (called_def_id, generic_args) = self.operand_to_def_id(func)?; - trace!("Resolved call to function: {}", self.fmt_fn(called_def_id)); - - // Monomorphize the called function with the known generic_args. - let param_env = tcx.param_env_reveal_all_normalized(self.def_id); - let resolved_fn = - utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args)?; - trace!("resolved to instance {resolved_fn:?}"); - let resolved_def_id = resolved_fn.def_id(); - if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { - let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); - trace!(" `{called}` monomorphized to `{resolved}`",); - } - - if is_non_default_trait_method(tcx, resolved_def_id).is_some() { - trace!(" bailing because is unresolvable trait method"); - return None; - } - - if let Some(handler) = self.can_approximate_async_functions(resolved_def_id) { - return Some(CallHandling::ApproxAsyncSM(handler)); - }; - - let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args) { - Ok(cc) => cc, - Err(async_err) => { - if let Some(cb) = self.call_change_callback() { - cb.on_inline_miss( - resolved_fn, - location, - self.root, - InlineMissReason::Async(async_err), - ) - } - return None; - } - }; - - let calling_convention = CallingConvention::from_call_kind(&call_kind, args); - - trace!( - " Handling call! with kind {}", - match &call_kind { - CallKind::Direct => "direct", - CallKind::Indirect => "indirect", - CallKind::AsyncPoll { .. } => "async poll", - } - ); - - // Recursively generate the PDG for the child function. - - let cache_key = resolved_fn; - - let is_cached = self.memo.is_in_cache(cache_key); - - let call_changes = self.call_change_callback().map(|callback| { - let info = CallInfo { - callee: resolved_fn, - call_string: self.make_call_string(location), - is_cached, - async_parent: if let CallKind::AsyncPoll(resolution, _loc, _) = call_kind { - // Special case for async. We ask for skipping not on the closure, but - // on the "async" function that created it. This is needed for - // consistency in skipping. Normally, when "poll" is inlined, mutations - // introduced by the creator of the future are not recorded and instead - // handled here, on the closure. But if the closure is skipped we need - // those mutations to occur. To ensure this we always ask for the - // "CallChanges" on the creator so that both creator and closure have - // the same view of whether they are inlined or "Skip"ped. - Some(resolution) - } else { - None - }, - }; - callback.on_inline(info) - }); - - // Handle async functions at the time of polling, not when the future is created. - if is_async(tcx, resolved_def_id) { - trace!(" Bailing because func is async"); - - // If a skip was requested then "poll" will not be inlined later so we - // bail with "None" here and perform the mutations. Otherwise we bail with - // "Some", knowing that handling "poll" later will handle the mutations. - return (!matches!( - &call_changes, - Some(CallChanges { - skip: SkipCall::Skip, - .. - }) - )) - .then_some(CallHandling::ApproxAsyncFn); - } - - if matches!( - call_changes, - Some(CallChanges { - skip: SkipCall::Skip, - .. - }) - ) { - trace!(" Bailing because user callback said to bail"); - return None; - } - let Some(descriptor) = self.memo.construct_for(cache_key) else { - trace!(" Bailing because cache lookup {cache_key} failed"); - return None; - }; - Some(CallHandling::Ready { - descriptor, - calling_convention, - }) - } - - /// Attempt to inline a call to a function, returning None if call is not inline-able. - fn handle_call( - &self, - state: &mut InstructionState<'tcx>, - location: Location, - func: &Operand<'tcx>, - args: &[Operand<'tcx>], - destination: Place<'tcx>, - ) -> Option<()> { - // Note: my comments here will use "child" to refer to the callee and - // "parent" to refer to the caller, since the words are most visually distinct. - - let preamble = self.determine_call_handling(location, func, args)?; - - trace!("Call handling is {}", preamble.as_ref()); - - let (child_constructor, calling_convention) = match preamble { - CallHandling::Ready { - descriptor, - calling_convention, - } => (descriptor, calling_convention), - CallHandling::ApproxAsyncFn => { - // Register a synthetic assignment of `future = (arg0, arg1, ...)`. - let rvalue = Rvalue::Aggregate( - Box::new(AggregateKind::Tuple), - IndexVec::from_iter(args.iter().cloned()), - ); - self.modular_mutation_visitor(state) - .visit_assign(&destination, &rvalue, location); - return Some(()); - } - CallHandling::ApproxAsyncSM(handler) => { - handler( - self, - &mut self.modular_mutation_visitor(state), - args, - destination, - location, - ); - return Some(()); - } - }; - - let parentable_dsts = child_constructor.parentable_dsts(); - let parent_body = &self.body; - let translate_to_parent = |child: Place<'tcx>| -> Option> { - calling_convention.translate_to_parent( - child, - self.async_info(), - self.tcx(), - parent_body, - self.def_id.to_def_id(), - destination, - ) - }; - - // For each destination node CHILD that is parentable to PLACE, - // add an edge from CHILD -> PLACE. - // - // PRECISION TODO: for a given child place, we only want to connect - // the *last* nodes in the child function to the parent, not *all* of them. - trace!("CHILD -> PARENT EDGES:"); - for (child_dst, _) in parentable_dsts { - if let Some(parent_place) = translate_to_parent(child_dst.place) { - self.apply_mutation(state, location, parent_place); - } - } - - Some(()) - } - - fn modular_mutation_visitor<'b: 'a>( - &'b self, - state: &'a mut InstructionState<'tcx>, - ) -> ModularMutationVisitor<'b, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'b> { - ModularMutationVisitor::new( - &self.place_info, - move |location, mutation: Mutation<'tcx>| { - self.apply_mutation(state, location, mutation.mutated) - }, - ) - } - - pub(super) fn generic_args(&self) -> GenericArgsRef<'tcx> { - self.root.args - } - - fn handle_terminator( - &self, - terminator: &Terminator<'tcx>, - state: &mut InstructionState<'tcx>, - location: Location, - time: Time, - ) { - if let TerminatorKind::Call { - func, - args, - destination, - .. - } = &terminator.kind - { - if self - .handle_call(state, location, func, args, *destination) - .is_none() - { - trace!("Terminator {:?} failed the preamble", terminator.kind); - self.terminator_visitor(state, time) - .visit_terminator(terminator, location) - } - } else { - // Fallback: call the visitor - self.terminator_visitor(state, time) - .visit_terminator(terminator, location) - } - } - - pub(crate) fn construct_partial(&'a self) -> PartialGraph<'tcx> { - if let Some(g) = self.try_handle_as_async() { - return g; - } - - let mut analysis = self - .into_engine(self.tcx(), &self.body) - .iterate_to_fixpoint(); - - let mut final_state = PartialGraph::new( - Asyncness::No, - self.generic_args(), - self.def_id.to_def_id(), - self.body.arg_count, - ); - - analysis.visit_reachable_with(&self.body, &mut final_state); - - let all_returns = self.body.all_returns().map(|ret| ret.block).collect_vec(); - let mut analysis = analysis.into_results_cursor(&self.body); - for block in all_returns { - analysis.seek_to_block_end(block); - let return_state = analysis.get(); - for (place, locations) in &return_state.last_mutation { - let ret_kind = if place.local == RETURN_PLACE { - TargetUse::Return - } else if let Some(num) = other_as_arg(*place, &self.body) { - TargetUse::MutArg(num) - } else { - continue; - }; - for location in locations { - let src = self.make_dep_node(*place, *location); - let dst = self.make_dep_node(*place, RichLocation::End); - let edge = DepEdge::data( - self.make_call_string(self.body.terminator_loc(block)), - SourceUse::Operand, - ret_kind, - ); - final_state.edges.insert((src, dst, edge)); - } - } - } - - final_state - } - - /// Determine the type of call-site. - /// - /// The error case is if we tried to resolve this as async and failed. We - /// know it *is* async but we couldn't determine the information needed to - /// analyze the function, therefore we will have to approximate it. - fn classify_call_kind<'b>( - &'b self, - def_id: DefId, - resolved_def_id: DefId, - original_args: &'b [Operand<'tcx>], - ) -> Result, String> { - match self.try_poll_call_kind(def_id, original_args) { - AsyncDeterminationResult::Resolved(r) => Ok(r), - AsyncDeterminationResult::NotAsync => Ok(self - .try_indirect_call_kind(resolved_def_id) - .unwrap_or(CallKind::Direct)), - AsyncDeterminationResult::Unresolvable(reason) => Err(reason), - } - } - - fn try_indirect_call_kind(&self, def_id: DefId) -> Option> { - // let lang_items = self.tcx.lang_items(); - // let my_impl = self.tcx.impl_of_method(def_id)?; - // let my_trait = self.tcx.trait_id_of_impl(my_impl)?; - // (Some(my_trait) == lang_items.fn_trait() - // || Some(my_trait) == lang_items.fn_mut_trait() - // || Some(my_trait) == lang_items.fn_once_trait()) - // .then_some(CallKind::Indirect) - self.tcx().is_closure(def_id).then_some(CallKind::Indirect) - } - - fn terminator_visitor<'b: 'a>( - &'b self, - state: &'b mut InstructionState<'tcx>, - time: Time, - ) -> ModularMutationVisitor<'b, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'b> { - let mut vis = self.modular_mutation_visitor(state); - vis.set_time(time); - vis - } -} - -pub enum CallKind<'tcx> { - /// A standard function call like `f(x)`. - Direct, - /// A call to a function variable, like `fn foo(f: impl Fn()) { f() }` - Indirect, - /// A poll to an async function, like `f.await`. - AsyncPoll(Instance<'tcx>, Location, Place<'tcx>), -} - -type ApproximationHandler<'tcx, 'a> = - fn(&LocalAnalysis<'tcx, 'a>, &mut dyn Visitor<'tcx>, &[Operand<'tcx>], Place<'tcx>, Location); - -pub(crate) trait TransformCallString { - fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self; -} - -impl TransformCallString for CallString { - fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { - f(*self) - } -} - -impl TransformCallString for DepNode<'_> { - fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { - Self { - at: f(self.at), - ..*self - } - } -} - -impl TransformCallString for DepEdge { - fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { - Self { - at: f(self.at), - ..*self - } - } -} - -pub(crate) fn push_call_string_root( - old: &T, - new_root: GlobalLocation, -) -> T { - old.transform_call_string(|c| c.push_front(new_root)) -} - impl<'tcx> PartialGraph<'tcx> { pub fn to_petgraph(&self) -> DepGraph<'tcx> { let domain = self; @@ -1266,55 +511,3 @@ impl<'tcx> PartialGraph<'tcx> { } } } - -#[derive(strum::AsRefStr)] -enum CallHandling<'tcx, 'a> { - ApproxAsyncFn, - Ready { - calling_convention: CallingConvention<'tcx, 'a>, - descriptor: &'a PartialGraph<'tcx>, - }, - ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), -} - -impl<'tcx, 'a> df::AnalysisDomain<'tcx> for &'a LocalAnalysis<'tcx, 'a> { - type Domain = InstructionState<'tcx>; - - const NAME: &'static str = "GraphConstructor"; - - fn bottom_value(&self, _body: &Body<'tcx>) -> Self::Domain { - InstructionState::default() - } - - fn initialize_start_block(&self, _body: &Body<'tcx>, _state: &mut Self::Domain) {} -} - -impl<'a, 'tcx> df::Analysis<'tcx> for &'a LocalAnalysis<'tcx, 'a> { - fn apply_statement_effect( - &mut self, - state: &mut Self::Domain, - statement: &Statement<'tcx>, - location: Location, - ) { - self.modular_mutation_visitor(state) - .visit_statement(statement, location) - } - - fn apply_terminator_effect<'mir>( - &mut self, - state: &mut Self::Domain, - terminator: &'mir Terminator<'tcx>, - location: Location, - ) -> TerminatorEdges<'mir, 'tcx> { - self.handle_terminator(terminator, state, location, Time::Unspecified); - terminator.edges() - } - - fn apply_call_return_effect( - &mut self, - _state: &mut Self::Domain, - _block: BasicBlock, - _return_places: rustc_middle::mir::CallReturnPlaces<'_, 'tcx>, - ) { - } -} diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 68ec404aef..a8bf0de7d4 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -7,7 +7,7 @@ use std::{ rc::Rc, }; -use flowistry_pdg::CallString; +use flowistry_pdg::{CallString, GlobalLocation}; use internment::Intern; use petgraph::{dot, graph::DiGraph}; use rustc_hash::{FxHashMap, FxHashSet}; @@ -24,7 +24,7 @@ use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; use serde::{Deserialize, Serialize}; -use crate::{construct::TransformCallString, utils::Captures, Asyncness}; +use crate::{utils::Captures, Asyncness}; /// A node in the program dependency graph. /// @@ -403,7 +403,7 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { /// Abstracts over how previously written [`Artifact`]s are retrieved, allowing /// the user of this module to chose where to store them. -pub trait ArtifactLoader<'tcx> { +pub trait GraphLoader<'tcx> { fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>>; } @@ -413,20 +413,55 @@ pub type Artifact<'tcx> = FxHashMap>; /// An [`ArtifactLoader`] that always returns `None`. pub struct NoLoader; -impl<'tcx> ArtifactLoader<'tcx> for NoLoader { +impl<'tcx> GraphLoader<'tcx> for NoLoader { fn load(&self, _: DefId) -> Option<&PartialGraph<'tcx>> { None } } -impl<'tcx, T: ArtifactLoader<'tcx>> ArtifactLoader<'tcx> for Rc { +impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Rc { fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { (**self).load(function) } } -impl<'tcx, T: ArtifactLoader<'tcx>> ArtifactLoader<'tcx> for Box { +impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Box { fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { (**self).load(function) } } + +pub(crate) trait TransformCallString { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self; +} + +impl TransformCallString for CallString { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + f(*self) + } +} + +impl TransformCallString for DepNode<'_> { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + Self { + at: f(self.at), + ..*self + } + } +} + +impl TransformCallString for DepEdge { + fn transform_call_string(&self, f: impl Fn(CallString) -> CallString) -> Self { + Self { + at: f(self.at), + ..*self + } + } +} + +pub(crate) fn push_call_string_root( + old: &T, + new_root: GlobalLocation, +) -> T { + old.transform_call_string(|c| c.push_front(new_root)) +} diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 999fb80fd6..6b0c1a5846 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -16,7 +16,7 @@ extern crate rustc_target; extern crate rustc_type_ir; pub use async_support::{determine_async, is_async_trait_fn, Asyncness}; -pub use graph::{Artifact, ArtifactLoader, DepGraph, NoLoader, PartialGraph}; +pub use graph::{Artifact, DepGraph, GraphLoader, NoLoader, PartialGraph}; pub mod callback; pub use crate::construct::MemoPdgConstructor; pub use callback::{ @@ -24,10 +24,12 @@ pub use callback::{ }; use rustc_middle::ty::{Instance, TyCtxt}; +mod approximation; mod async_support; mod calling_convention; mod construct; pub mod graph; +mod local_analysis; mod mutation; pub mod utils; diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs new file mode 100644 index 0000000000..32cc1bb5d0 --- /dev/null +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -0,0 +1,725 @@ +use std::{collections::HashSet, iter, rc::Rc}; + +use flowistry::mir::placeinfo::PlaceInfo; +use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; +use itertools::Itertools; +use log::{debug, log_enabled, trace, Level}; + +use rustc_borrowck::consumers::{places_conflict, BodyWithBorrowckFacts, PlaceConflictBias}; +use rustc_hash::{FxHashMap, FxHashSet}; +use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_index::IndexVec; +use rustc_middle::{ + mir::{ + visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, + Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, + }, + ty::{GenericArg, GenericArgsRef, Instance, List, TyCtxt}, +}; +use rustc_mir_dataflow::{self as df, fmt::DebugWithContext, Analysis}; + +use rustc_utils::{ + mir::{borrowck_facts, control_dependencies::ControlDependencies}, + BodyExt, PlaceExt, +}; + +use crate::{ + approximation::ApproximationHandler, + async_support::*, + calling_convention::*, + graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, + mutation::{ModularMutationVisitor, Mutation, Time}, + utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, + Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, MemoPdgConstructor, + SkipCall, +}; + +#[derive(PartialEq, Eq, Default, Clone, Debug)] +pub(crate) struct InstructionState<'tcx> { + last_mutation: FxHashMap, FxHashSet>, +} + +impl DebugWithContext for InstructionState<'_> {} + +impl<'tcx> df::JoinSemiLattice for InstructionState<'tcx> { + fn join(&mut self, other: &Self) -> bool { + utils::hashmap_join( + &mut self.last_mutation, + &other.last_mutation, + utils::hashset_join, + ) + } +} + +pub(crate) struct LocalAnalysis<'tcx, 'a> { + pub(crate) memo: &'a MemoPdgConstructor<'tcx>, + pub(super) root: Instance<'tcx>, + body_with_facts: &'tcx BodyWithBorrowckFacts<'tcx>, + pub(crate) body: Body<'tcx>, + pub(crate) def_id: LocalDefId, + pub(crate) place_info: PlaceInfo<'tcx>, + control_dependencies: ControlDependencies, + pub(crate) body_assignments: utils::BodyAssignments, + start_loc: FxHashSet, +} + +impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { + /// Creates [`GraphConstructor`] for a function resolved as `fn_resolution` in a given `calling_context`. + pub(crate) fn new( + memo: &'a MemoPdgConstructor<'tcx>, + root: Instance<'tcx>, + ) -> LocalAnalysis<'tcx, 'a> { + let tcx = memo.tcx; + let def_id = root.def_id().expect_local(); + let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, def_id); + let param_env = tcx.param_env_reveal_all_normalized(def_id); + // let param_env = match &calling_context { + // Some(cx) => cx.param_env, + // None => ParamEnv::reveal_all(), + // }; + let body = try_monomorphize(root, tcx, param_env, &body_with_facts.body); + + if memo.dump_mir { + use std::io::Write; + let path = tcx.def_path_str(def_id) + ".mir"; + let mut f = std::fs::File::create(path.as_str()).unwrap(); + write!(f, "{}", body.to_string(tcx).unwrap()).unwrap(); + debug!("Dumped debug MIR {path}"); + } + + let place_info = PlaceInfo::build(tcx, def_id.to_def_id(), body_with_facts); + let control_dependencies = body.control_dependencies(); + + let mut start_loc = FxHashSet::default(); + start_loc.insert(RichLocation::Start); + + let body_assignments = utils::find_body_assignments(&body); + + LocalAnalysis { + memo, + root, + body_with_facts, + body, + place_info, + control_dependencies, + start_loc, + def_id, + body_assignments, + } + } + + fn make_dep_node( + &self, + place: Place<'tcx>, + location: impl Into, + ) -> DepNode<'tcx> { + DepNode::new( + place, + self.make_call_string(location), + self.tcx(), + &self.body, + self.place_info.children(place).iter().any(|p| *p != place), + ) + } + + /// Returns all pairs of `(src, edge)`` such that the given `location` is control-dependent on `edge` + /// with input `src`. + pub(crate) fn find_control_inputs(&self, location: Location) -> Vec<(DepNode<'tcx>, DepEdge)> { + let mut blocks_seen = HashSet::::from_iter(Some(location.block)); + let mut block_queue = vec![location.block]; + let mut out = vec![]; + while let Some(block) = block_queue.pop() { + if let Some(ctrl_deps) = self.control_dependencies.dependent_on(block) { + for dep in ctrl_deps.iter() { + let ctrl_loc = self.body.terminator_loc(dep); + let Terminator { + kind: TerminatorKind::SwitchInt { discr, .. }, + .. + } = self.body.basic_blocks[dep].terminator() + else { + if blocks_seen.insert(dep) { + block_queue.push(dep); + } + continue; + }; + let Some(ctrl_place) = discr.place() else { + continue; + }; + let at = self.make_call_string(ctrl_loc); + let src = self.make_dep_node(ctrl_place, ctrl_loc); + let edge = DepEdge::control(at, SourceUse::Operand, TargetUse::Assign); + out.push((src, edge)); + } + } + } + out + } + + fn call_change_callback(&self) -> Option<&dyn CallChangeCallback<'tcx>> { + self.memo.call_change_callback.as_ref().map(Rc::as_ref) + } + + pub(crate) fn async_info(&self) -> &AsyncInfo { + &self.memo.async_info + } + + pub(crate) fn make_call_string(&self, location: impl Into) -> CallString { + CallString::single(GlobalLocation { + function: self.root.def_id(), + location: location.into(), + }) + } + + /// Returns the aliases of `place`. See [`PlaceInfo::aliases`] for details. + pub(crate) fn aliases(&'a self, place: Place<'tcx>) -> impl Iterator> + 'a { + // MASSIVE HACK ALERT: + // The issue is that monomorphization erases regions, due to how it's implemented in rustc. + // However, Flowistry's alias analysis uses regions to figure out aliases. + // To workaround this incompatibility, when we receive a monomorphized place, we try to + // recompute its type in the context of the original region-containing body as far as possible. + // + // For example, say _2: (&'0 impl Foo,) in the original body and _2: (&(i32, i32),) in the monomorphized body. + // Say we ask for aliases (*(_2.0)).0. Then we will retype ((*_2.0).0).0 and receive back (*_2.0: &'0 impl Foo). + // We can ask for the aliases in the context of the original body, receiving e.g. {_1}. + // Then we reproject the aliases with the remaining projection, to create {_1.0}. + // + // This is a massive hack bc it's inefficient and I'm not certain that it's sound. + let place_retyped = utils::retype_place( + place, + self.tcx(), + &self.body_with_facts.body, + self.def_id.to_def_id(), + ); + self.place_info.aliases(place_retyped).iter().map(|alias| { + let mut projection = alias.projection.to_vec(); + projection.extend(&place.projection[place_retyped.projection.len()..]); + Place::make(alias.local, &projection, self.tcx()) + }) + } + + pub(crate) fn tcx(&self) -> TyCtxt<'tcx> { + self.memo.tcx + } + + /// Returns all nodes `src` such that `src` is: + /// 1. Part of the value of `input` + /// 2. The most-recently modified location for `src` + pub(crate) fn find_data_inputs( + &self, + state: &InstructionState<'tcx>, + input: Place<'tcx>, + ) -> Vec> { + // Include all sources of indirection (each reference in the chain) as relevant places. + let provenance = input + .refs_in_projection() + .map(|(place_ref, _)| Place::from_ref(place_ref, self.tcx())); + let inputs = iter::once(input).chain(provenance); + + inputs + // **POINTER-SENSITIVITY:** + // If `input` involves indirection via dereferences, then resolve it to the direct places it could point to. + .flat_map(|place| self.aliases(place)) + .flat_map(|alias| { + // **FIELD-SENSITIVITY:** + // Find all places that have been mutated which conflict with `alias.` + let conflicts = state + .last_mutation + .iter() + .map(|(k, locs)| (*k, locs)) + .filter(move |(place, _)| { + if place.is_indirect() && place.is_arg(&self.body) { + // HACK: `places_conflict` seems to consider it a bug is `borrow_place` + // includes a dereference, which should only happen if `borrow_place` + // is an argument. So we special case that condition and just compare for local equality. + // + // TODO: this is not field-sensitive! + place.local == alias.local + } else { + let mut place = *place; + if let Some((PlaceElem::Deref, rest)) = place.projection.split_last() { + let mut new_place = place; + new_place.projection = self.tcx().mk_place_elems(rest); + if new_place.ty(&self.body, self.tcx()).ty.is_box() { + if new_place.is_indirect() { + // TODO might be unsound: We assume that if + // there are other indirections in here, + // there is an alias that does not have + // indirections in it. + return false; + } + place = new_place; + } + } + places_conflict( + self.tcx(), + &self.body, + place, + alias, + PlaceConflictBias::Overlap, + ) + } + }); + + // Special case: if the `alias` is an un-mutated argument, then include it as a conflict + // coming from the special start location. + let alias_last_mut = if alias.is_arg(&self.body) { + Some((alias, &self.start_loc)) + } else { + None + }; + + // For each `conflict`` last mutated at the locations `last_mut`: + conflicts + .chain(alias_last_mut) + .flat_map(|(conflict, last_mut_locs)| { + // For each last mutated location: + last_mut_locs.iter().map(move |last_mut_loc| { + // Return @ as an input node. + self.make_dep_node(conflict, *last_mut_loc) + }) + }) + }) + .collect() + } + + pub(crate) fn find_outputs( + &self, + mutated: Place<'tcx>, + location: Location, + ) -> Vec<(Place<'tcx>, DepNode<'tcx>)> { + // **POINTER-SENSITIVITY:** + // If `mutated` involves indirection via dereferences, then resolve it to the direct places it could point to. + let aliases = self.aliases(mutated).collect_vec(); + + // **FIELD-SENSITIVITY:** we do NOT deal with fields on *writes* (in this function), + // only on *reads* (in `add_input_to_op`). + + // For each mutated `dst`: + aliases + .iter() + .map(|dst| { + // Create a destination node for (DST @ CURRENT_LOC). + (*dst, self.make_dep_node(*dst, location)) + }) + .collect() + } + + /// Updates the last-mutated location for `dst` to the given `location`. + fn apply_mutation( + &self, + state: &mut InstructionState<'tcx>, + location: Location, + mutated: Place<'tcx>, + ) { + self.find_outputs(mutated, location) + .into_iter() + .for_each(|(dst, _)| { + // Create a destination node for (DST @ CURRENT_LOC). + + // Clear all previous mutations. + let dst_mutations = state.last_mutation.entry(dst).or_default(); + dst_mutations.clear(); + + // Register that `dst` is mutated at the current location. + dst_mutations.insert(RichLocation::Location(location)); + }) + } + + /// Resolve a function [`Operand`] to a specific [`DefId`] and generic arguments if possible. + pub(crate) fn operand_to_def_id( + &self, + func: &Operand<'tcx>, + ) -> Option<(DefId, &'tcx List>)> { + let ty = func.ty(&self.body, self.tcx()); + utils::type_as_fn(self.tcx(), ty) + } + + fn fmt_fn(&self, def_id: DefId) -> String { + self.tcx().def_path_str(def_id) + } + + pub(crate) fn determine_call_handling<'b>( + &'b self, + location: Location, + func: &Operand<'tcx>, + args: &'b [Operand<'tcx>], + ) -> Option> { + let tcx = self.tcx(); + + let (called_def_id, generic_args) = self.operand_to_def_id(func)?; + trace!("Resolved call to function: {}", self.fmt_fn(called_def_id)); + + // Monomorphize the called function with the known generic_args. + let param_env = tcx.param_env_reveal_all_normalized(self.def_id); + let resolved_fn = + utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args)?; + trace!("resolved to instance {resolved_fn:?}"); + let resolved_def_id = resolved_fn.def_id(); + if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { + let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); + trace!(" `{called}` monomorphized to `{resolved}`",); + } + + if is_non_default_trait_method(tcx, resolved_def_id).is_some() { + trace!(" bailing because is unresolvable trait method"); + return None; + } + + if let Some(handler) = self.can_approximate_async_functions(resolved_def_id) { + return Some(CallHandling::ApproxAsyncSM(handler)); + }; + + let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args) { + Ok(cc) => cc, + Err(async_err) => { + if let Some(cb) = self.call_change_callback() { + cb.on_inline_miss( + resolved_fn, + location, + self.root, + InlineMissReason::Async(async_err), + ) + } + return None; + } + }; + + let calling_convention = CallingConvention::from_call_kind(&call_kind, args); + + trace!( + " Handling call! with kind {}", + match &call_kind { + CallKind::Direct => "direct", + CallKind::Indirect => "indirect", + CallKind::AsyncPoll { .. } => "async poll", + } + ); + + // Recursively generate the PDG for the child function. + + let cache_key = resolved_fn; + + let is_cached = self.memo.is_in_cache(cache_key); + + let call_changes = self.call_change_callback().map(|callback| { + let info = CallInfo { + callee: resolved_fn, + call_string: self.make_call_string(location), + is_cached, + async_parent: if let CallKind::AsyncPoll(resolution, _loc, _) = call_kind { + // Special case for async. We ask for skipping not on the closure, but + // on the "async" function that created it. This is needed for + // consistency in skipping. Normally, when "poll" is inlined, mutations + // introduced by the creator of the future are not recorded and instead + // handled here, on the closure. But if the closure is skipped we need + // those mutations to occur. To ensure this we always ask for the + // "CallChanges" on the creator so that both creator and closure have + // the same view of whether they are inlined or "Skip"ped. + Some(resolution) + } else { + None + }, + }; + callback.on_inline(info) + }); + + // Handle async functions at the time of polling, not when the future is created. + if is_async(tcx, resolved_def_id) { + trace!(" Bailing because func is async"); + + // If a skip was requested then "poll" will not be inlined later so we + // bail with "None" here and perform the mutations. Otherwise we bail with + // "Some", knowing that handling "poll" later will handle the mutations. + return (!matches!( + &call_changes, + Some(CallChanges { + skip: SkipCall::Skip, + .. + }) + )) + .then_some(CallHandling::ApproxAsyncFn); + } + + if matches!( + call_changes, + Some(CallChanges { + skip: SkipCall::Skip, + .. + }) + ) { + trace!(" Bailing because user callback said to bail"); + return None; + } + let Some(descriptor) = self.memo.construct_for(cache_key) else { + trace!(" Bailing because cache lookup {cache_key} failed"); + return None; + }; + Some(CallHandling::Ready { + descriptor, + calling_convention, + }) + } + + /// Attempt to inline a call to a function, returning None if call is not inline-able. + fn handle_call( + &self, + state: &mut InstructionState<'tcx>, + location: Location, + func: &Operand<'tcx>, + args: &[Operand<'tcx>], + destination: Place<'tcx>, + ) -> Option<()> { + // Note: my comments here will use "child" to refer to the callee and + // "parent" to refer to the caller, since the words are most visually distinct. + + let preamble = self.determine_call_handling(location, func, args)?; + + trace!("Call handling is {}", preamble.as_ref()); + + let (child_constructor, calling_convention) = match preamble { + CallHandling::Ready { + descriptor, + calling_convention, + } => (descriptor, calling_convention), + CallHandling::ApproxAsyncFn => { + // Register a synthetic assignment of `future = (arg0, arg1, ...)`. + let rvalue = Rvalue::Aggregate( + Box::new(AggregateKind::Tuple), + IndexVec::from_iter(args.iter().cloned()), + ); + self.modular_mutation_visitor(state) + .visit_assign(&destination, &rvalue, location); + return Some(()); + } + CallHandling::ApproxAsyncSM(handler) => { + handler( + self, + &mut self.modular_mutation_visitor(state), + args, + destination, + location, + ); + return Some(()); + } + }; + + let parentable_dsts = child_constructor.parentable_dsts(); + let parent_body = &self.body; + let translate_to_parent = |child: Place<'tcx>| -> Option> { + calling_convention.translate_to_parent( + child, + self.async_info(), + self.tcx(), + parent_body, + self.def_id.to_def_id(), + destination, + ) + }; + + // For each destination node CHILD that is parentable to PLACE, + // add an edge from CHILD -> PLACE. + // + // PRECISION TODO: for a given child place, we only want to connect + // the *last* nodes in the child function to the parent, not *all* of them. + trace!("CHILD -> PARENT EDGES:"); + for (child_dst, _) in parentable_dsts { + if let Some(parent_place) = translate_to_parent(child_dst.place) { + self.apply_mutation(state, location, parent_place); + } + } + + Some(()) + } + + fn modular_mutation_visitor<'b: 'a>( + &'b self, + state: &'a mut InstructionState<'tcx>, + ) -> ModularMutationVisitor<'b, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'b> { + ModularMutationVisitor::new( + &self.place_info, + move |location, mutation: Mutation<'tcx>| { + self.apply_mutation(state, location, mutation.mutated) + }, + ) + } + + pub(super) fn generic_args(&self) -> GenericArgsRef<'tcx> { + self.root.args + } + + fn handle_terminator( + &self, + terminator: &Terminator<'tcx>, + state: &mut InstructionState<'tcx>, + location: Location, + time: Time, + ) { + if let TerminatorKind::Call { + func, + args, + destination, + .. + } = &terminator.kind + { + if self + .handle_call(state, location, func, args, *destination) + .is_none() + { + trace!("Terminator {:?} failed the preamble", terminator.kind); + self.terminator_visitor(state, time) + .visit_terminator(terminator, location) + } + } else { + // Fallback: call the visitor + self.terminator_visitor(state, time) + .visit_terminator(terminator, location) + } + } + + pub(crate) fn construct_partial(&'a self) -> PartialGraph<'tcx> { + if let Some(g) = self.try_handle_as_async() { + return g; + } + + let mut analysis = self + .into_engine(self.tcx(), &self.body) + .iterate_to_fixpoint(); + + let mut final_state = PartialGraph::new( + Asyncness::No, + self.generic_args(), + self.def_id.to_def_id(), + self.body.arg_count, + ); + + analysis.visit_reachable_with(&self.body, &mut final_state); + + let all_returns = self.body.all_returns().map(|ret| ret.block).collect_vec(); + let mut analysis = analysis.into_results_cursor(&self.body); + for block in all_returns { + analysis.seek_to_block_end(block); + let return_state = analysis.get(); + for (place, locations) in &return_state.last_mutation { + let ret_kind = if place.local == RETURN_PLACE { + TargetUse::Return + } else if let Some(num) = other_as_arg(*place, &self.body) { + TargetUse::MutArg(num) + } else { + continue; + }; + for location in locations { + let src = self.make_dep_node(*place, *location); + let dst = self.make_dep_node(*place, RichLocation::End); + let edge = DepEdge::data( + self.make_call_string(self.body.terminator_loc(block)), + SourceUse::Operand, + ret_kind, + ); + final_state.edges.insert((src, dst, edge)); + } + } + } + + final_state + } + + /// Determine the type of call-site. + /// + /// The error case is if we tried to resolve this as async and failed. We + /// know it *is* async but we couldn't determine the information needed to + /// analyze the function, therefore we will have to approximate it. + fn classify_call_kind<'b>( + &'b self, + def_id: DefId, + resolved_def_id: DefId, + original_args: &'b [Operand<'tcx>], + ) -> Result, String> { + match self.try_poll_call_kind(def_id, original_args) { + AsyncDeterminationResult::Resolved(r) => Ok(r), + AsyncDeterminationResult::NotAsync => Ok(self + .try_indirect_call_kind(resolved_def_id) + .unwrap_or(CallKind::Direct)), + AsyncDeterminationResult::Unresolvable(reason) => Err(reason), + } + } + + fn try_indirect_call_kind(&self, def_id: DefId) -> Option> { + self.tcx().is_closure(def_id).then_some(CallKind::Indirect) + } + + fn terminator_visitor<'b: 'a>( + &'b self, + state: &'b mut InstructionState<'tcx>, + time: Time, + ) -> ModularMutationVisitor<'b, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'b> { + let mut vis = self.modular_mutation_visitor(state); + vis.set_time(time); + vis + } +} + +impl<'tcx, 'a> df::AnalysisDomain<'tcx> for &'a LocalAnalysis<'tcx, 'a> { + type Domain = InstructionState<'tcx>; + + const NAME: &'static str = "LocalDGPConstruction"; + + fn bottom_value(&self, _body: &Body<'tcx>) -> Self::Domain { + InstructionState::default() + } + + fn initialize_start_block(&self, _body: &Body<'tcx>, _state: &mut Self::Domain) {} +} + +impl<'a, 'tcx> df::Analysis<'tcx> for &'a LocalAnalysis<'tcx, 'a> { + fn apply_statement_effect( + &mut self, + state: &mut Self::Domain, + statement: &Statement<'tcx>, + location: Location, + ) { + self.modular_mutation_visitor(state) + .visit_statement(statement, location) + } + + fn apply_terminator_effect<'mir>( + &mut self, + state: &mut Self::Domain, + terminator: &'mir Terminator<'tcx>, + location: Location, + ) -> TerminatorEdges<'mir, 'tcx> { + self.handle_terminator(terminator, state, location, Time::Unspecified); + terminator.edges() + } + + fn apply_call_return_effect( + &mut self, + _state: &mut Self::Domain, + _block: BasicBlock, + _return_places: rustc_middle::mir::CallReturnPlaces<'_, 'tcx>, + ) { + } +} + +pub enum CallKind<'tcx> { + /// A standard function call like `f(x)`. + Direct, + /// A call to a function variable, like `fn foo(f: impl Fn()) { f() }` + Indirect, + /// A poll to an async function, like `f.await`. + AsyncPoll(Instance<'tcx>, Location, Place<'tcx>), +} + +#[derive(strum::AsRefStr)] +pub(crate) enum CallHandling<'tcx, 'a> { + ApproxAsyncFn, + Ready { + calling_convention: CallingConvention<'tcx, 'a>, + descriptor: &'a PartialGraph<'tcx>, + }, + ApproxAsyncSM(ApproximationHandler<'tcx, 'a>), +} + +fn other_as_arg<'tcx>(place: Place<'tcx>, body: &Body<'tcx>) -> Option { + (body.local_kind(place.local) == rustc_middle::mir::LocalKind::Arg) + .then(|| place.local.as_u32() as u8 - 1) +} diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index d0f96cbe33..f4f8621122 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -1,6 +1,7 @@ use std::{collections::hash_map::Entry, hash::Hash}; use either::Either; + use itertools::Itertools; use log::trace; use rustc_hash::{FxHashMap, FxHashSet}; diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 91a5ce035f..ff300d1b29 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -15,7 +15,7 @@ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ - graph::InternedString, ArtifactLoader, Asyncness, DepGraph, MemoPdgConstructor, PartialGraph, + graph::InternedString, GraphLoader, Asyncness, DepGraph, MemoPdgConstructor, PartialGraph, }; use rustc_hash::FxHashMap; @@ -62,7 +62,7 @@ pub enum MetadataLoaderError { use MetadataLoaderError::*; -impl<'tcx> ArtifactLoader<'tcx> for MetadataLoader<'tcx> { +impl<'tcx> GraphLoader<'tcx> for MetadataLoader<'tcx> { fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { let res = self .get_metadata(function.krate) From d88d98ef2fc4965d3ff7278b86bbc17db926878a Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 31 May 2024 15:00:49 -0700 Subject: [PATCH 54/95] Unused trait import --- crates/flowistry_pdg_construction/src/construct.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index d2fd3724f4..43caa459ef 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -26,7 +26,7 @@ use rustc_middle::{ }; use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; use rustc_span::ErrorGuaranteed; -use rustc_utils::{cache::Cache, PlaceExt}; +use rustc_utils::cache::Cache; use crate::{ async_support::*, From 0f56d7fa4db6e7debf6d876457d0d10845384021 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 31 May 2024 15:34:25 -0700 Subject: [PATCH 55/95] Fmt --- crates/paralegal-flow/src/ana/metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index ff300d1b29..0fa2a5dd21 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -15,7 +15,7 @@ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ - graph::InternedString, GraphLoader, Asyncness, DepGraph, MemoPdgConstructor, PartialGraph, + graph::InternedString, Asyncness, DepGraph, GraphLoader, MemoPdgConstructor, PartialGraph, }; use rustc_hash::FxHashMap; From 52370079851f880d668e098a69fbc2e53a909484 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 5 Jun 2024 14:47:55 -0700 Subject: [PATCH 56/95] Delaying emitting errors --- Cargo.lock | 1 + Cargo.toml | 1 + crates/flowistry_pdg_construction/Cargo.toml | 1 + .../src/async_support.rs | 15 +- .../src/construct.rs | 194 +++++++++++------- .../flowistry_pdg_construction/src/graph.rs | 24 ++- crates/flowistry_pdg_construction/src/lib.rs | 7 +- .../src/local_analysis.rs | 79 ++++--- .../flowistry_pdg_construction/src/utils.rs | 39 ++-- crates/paralegal-flow/Cargo.toml | 2 +- crates/paralegal-flow/src/ana/metadata.rs | 41 ++-- 11 files changed, 249 insertions(+), 155 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 521d977739..f5c4e054a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -494,6 +494,7 @@ dependencies = [ "rustc_utils 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=e990ded60afc928f76293fb9ad265c58405da1a7)", "serde", "strum", + "thiserror", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 625b66952b..2158df52df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ indexical = "0.3.1" serde = "1.0.188" petgraph = { version = "0.6", features = ["serde-1"] } strum = { version = "0.25", features = ["derive"] } +thiserror = "1" # rustc_utils = { version = "=0.7.4-nightly-2023-08-25", features = [ # "indexical", # ] } diff --git a/crates/flowistry_pdg_construction/Cargo.toml b/crates/flowistry_pdg_construction/Cargo.toml index a5320feb31..365af8352b 100644 --- a/crates/flowistry_pdg_construction/Cargo.toml +++ b/crates/flowistry_pdg_construction/Cargo.toml @@ -25,6 +25,7 @@ flowistry_pdg = { version = "0.1.0", path = "../flowistry_pdg", features = [ flowistry = { workspace = true } serde = { workspace = true, features = ["derive"] } strum = { workspace = true } +thiserror = { workspace = true } [dev-dependencies] rustc_utils = { workspace = true, features = ["indexical", "test"] } diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 7f426b7465..3abd85bff7 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -195,11 +195,16 @@ pub enum AsyncDeterminationResult { } impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { - pub(crate) fn try_handle_as_async(&self) -> Option> { - let (generator_fn, location, asyncness) = - determine_async(self.tcx(), self.def_id, &self.body)?; + pub(crate) fn try_handle_as_async(&self) -> anyhow::Result>> { + let Some((generator_fn, location, asyncness)) = + determine_async(self.tcx(), self.def_id, &self.body) + else { + return Ok(None); + }; - let g = self.memo.construct_for(generator_fn)?; + let Some(g) = self.memo.construct_for(generator_fn)? else { + return Ok(None); + }; let gloc = GlobalLocation { function: self.def_id.to_def_id(), location: flowistry_pdg::RichLocation::Location(location), @@ -208,7 +213,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { //let g_generics = std::mem::replace(&mut new_g.graph.generics, self.generic_args()); new_g.asyncness = asyncness; new_g.monos.insert(CallString::single(gloc), new_g.generics); - Some(new_g) + Ok(Some(new_g)) } pub(crate) fn try_poll_call_kind<'b>( diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 43caa459ef..9ffcdf895c 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -10,6 +10,7 @@ use std::rc::Rc; +use anyhow::anyhow; use either::Either; use flowistry_pdg::{CallString, GlobalLocation}; @@ -18,14 +19,14 @@ use log::trace; use petgraph::graph::DiGraph; use rustc_hash::FxHashMap; -use rustc_hir::def_id::LocalDefId; +use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; +use rustc_macros::{Decodable, Encodable}; use rustc_middle::{ mir::{visit::Visitor, AggregateKind, Location, Place, Rvalue, Terminator, TerminatorKind}, ty::{GenericArgsRef, Instance, TyCtxt}, }; use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; -use rustc_span::ErrorGuaranteed; use rustc_utils::cache::Cache; use crate::{ @@ -52,6 +53,39 @@ pub struct MemoPdgConstructor<'tcx> { pub(crate) loader: Box + 'tcx>, } +#[derive(Debug, thiserror::Error, Encodable, Decodable, Clone)] +pub enum ConstructionErr { + // Would prefer to make `generics` `GenericArgsRef<'tcx>` but the `Error` + // implementation only allows `'static` types. + #[error("failed to resolve an instance for {function:?} with generic arguments {generics}")] + InstanceResolutionFailed { function: DefId, generics: String }, + #[error("entered impossible state")] + Impossible, + #[error("failed to load external function {function:?}")] + FailedLoadingExternalFunction { function: DefId }, + #[error("failed with rustc error")] + RustcReportedError, + #[error("crate exists but item is not found {function:?}")] + CrateExistsButItemIsNotFound { function: DefId }, + #[error("could not create generic arguments for {function:?} because too mah predicates were present ({number})")] + TooManyPredicatesForSynthesizingGenerics { function: DefId, number: u32 }, + #[error("found bound variables in predicates of {function:?}")] + BoundVariablesInPredicates { function: DefId }, + #[error("has trait ref with binder {function:?}")] + TraitRefWithBinder { function: DefId }, + #[error("cannot use constants as generic parameters in controllers")] + ConstantInGenerics { function: DefId }, +} + +impl ConstructionErr { + pub fn instance_resolution_failed(function: DefId, generics: GenericArgsRef) -> Self { + Self::InstanceResolutionFailed { + function, + generics: format!("{generics:?}"), + } + } +} + impl<'tcx> MemoPdgConstructor<'tcx> { /// Initialize the constructor, parameterized over an [`ArtifactLoader`] for /// retrieving PDGs of functions from dependencies. @@ -84,29 +118,38 @@ impl<'tcx> MemoPdgConstructor<'tcx> { /// Construct the intermediate PDG for this function. Instantiates any /// generic arguments as `dyn `. - pub fn construct_root<'a>(&'a self, function: LocalDefId) -> Option<&'a PartialGraph<'tcx>> { - let generics = manufacture_substs_for(self.tcx, function.to_def_id()).unwrap(); + pub fn construct_root<'a>( + &'a self, + function: LocalDefId, + ) -> Result<&'a PartialGraph<'tcx>, ConstructionErr> { + let generics = manufacture_substs_for(self.tcx, function.to_def_id())?; let resolution = try_resolve_function( self.tcx, function.to_def_id(), self.tcx.param_env_reveal_all_normalized(function), generics, - )?; + ) + .ok_or_else(|| { + ConstructionErr::instance_resolution_failed(function.to_def_id(), generics) + })?; self.construct_for(resolution) + .and_then(|f| f.ok_or(ConstructionErr::Impossible)) } pub(crate) fn construct_for<'a>( &'a self, resolution: Instance<'tcx>, - ) -> Option<&'a PartialGraph<'tcx>> { + ) -> Result>, ConstructionErr> { let def_id = resolution.def_id(); let generics = resolution.args; if let Some(local) = def_id.as_local() { - self.pdg_cache.get_maybe_recursive((local, generics), |_| { - let g = LocalAnalysis::new(self, resolution).construct_partial(); + Ok(self.pdg_cache.get_maybe_recursive((local, generics), |_| { + let g = LocalAnalysis::new(self, resolution) + .construct_partial() + .unwrap(); g.check_invariants(); g - }) + })) } else { self.loader.load(def_id) } @@ -117,23 +160,16 @@ impl<'tcx> MemoPdgConstructor<'tcx> { if let Some(local) = resolution.def_id().as_local() { self.pdg_cache.is_in_cache(&(local, resolution.args)) } else { - self.loader.load(resolution.def_id()).is_some() + matches!(self.loader.load(resolution.def_id()), Ok(Some(_))) } } /// Construct a final PDG for this function. Same as /// [`Self::construct_root`] this instantiates all generics as `dyn`. - pub fn construct_graph(&self, function: LocalDefId) -> Result, ErrorGuaranteed> { - let _args = manufacture_substs_for(self.tcx, function.to_def_id())?; - let g = self - .construct_root(function) - .ok_or_else(|| { - self.tcx.sess.span_err( - self.tcx.def_span(function), - "Could not construct graph for this function", - ) - })? - .to_petgraph(); + pub fn construct_graph(&self, function: LocalDefId) -> Result, ConstructionErr> { + let _args = manufacture_substs_for(self.tcx, function.to_def_id()) + .map_err(|_| anyhow!("rustc error")); + let g = self.construct_root(function)?.to_petgraph(); Ok(g) } } @@ -194,29 +230,29 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t return; } - if self - .handle_as_inline(results, state, terminator, location) - .is_none() - { - trace!("Handling terminator {:?} as not inlined", terminator.kind); - let mut arg_vis = ModularMutationVisitor::new( - &results.analysis.place_info, - move |location, mutation| { - self.register_mutation( - results, - state, - Inputs::Unresolved { - places: mutation.inputs, - }, - Either::Left(mutation.mutated), - location, - mutation.mutation_reason, - ) - }, - ); - arg_vis.set_time(Time::Before); - arg_vis.visit_terminator(terminator, location); + match self.handle_as_inline(results, state, terminator, location) { + Ok(false) => (), + Ok(true) => return, + Err(e) => { + results.analysis.tcx().sess.warn(e.to_string()); + } } + trace!("Handling terminator {:?} as not inlined", terminator.kind); + let mut arg_vis = + ModularMutationVisitor::new(&results.analysis.place_info, move |location, mutation| { + self.register_mutation( + results, + state, + Inputs::Unresolved { + places: mutation.inputs, + }, + Either::Left(mutation.mutated), + location, + mutation.mutation_reason, + ) + }); + arg_vis.set_time(Time::Before); + arg_vis.visit_terminator(terminator, location); } fn visit_terminator_after_primary_effect( @@ -231,7 +267,7 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t if matches!( constructor.determine_call_handling(location, func, args), - Some(CallHandling::Ready { .. }) + Ok(Some(CallHandling::Ready { .. })) ) { return; } @@ -276,13 +312,14 @@ impl<'tcx> PartialGraph<'tcx> { }) } + /// returns whether we were able to successfully handle this as inline fn handle_as_inline<'a>( &mut self, results: &Results<'tcx, &'a LocalAnalysis<'tcx, 'a>>, state: &<&'a LocalAnalysis<'tcx, 'a> as AnalysisDomain<'tcx>>::Domain, terminator: &Terminator<'tcx>, location: Location, - ) -> Option<()> { + ) -> anyhow::Result { let TerminatorKind::Call { func, args, @@ -290,7 +327,7 @@ impl<'tcx> PartialGraph<'tcx> { .. } = &terminator.kind else { - return None; + return Ok(false); }; let constructor = results.analysis; let gloc = GlobalLocation { @@ -298,36 +335,39 @@ impl<'tcx> PartialGraph<'tcx> { function: constructor.def_id.to_def_id(), }; - let (child_descriptor, calling_convention) = - match constructor.determine_call_handling(location, func, args)? { - CallHandling::Ready { - calling_convention, - descriptor, - } => (descriptor, calling_convention), - CallHandling::ApproxAsyncFn => { - // Register a synthetic assignment of `future = (arg0, arg1, ...)`. - let rvalue = Rvalue::Aggregate( - Box::new(AggregateKind::Tuple), - IndexVec::from_iter(args.iter().cloned()), - ); - self.modular_mutation_visitor(results, state).visit_assign( - destination, - &rvalue, - location, - ); - return Some(()); - } - CallHandling::ApproxAsyncSM(how) => { - how( - constructor, - &mut self.modular_mutation_visitor(results, state), - args, - *destination, - location, - ); - return Some(()); - } - }; + let Some(handling) = constructor.determine_call_handling(location, func, args)? else { + return Ok(false); + }; + + let (child_descriptor, calling_convention) = match handling { + CallHandling::Ready { + calling_convention, + descriptor, + } => (descriptor, calling_convention), + CallHandling::ApproxAsyncFn => { + // Register a synthetic assignment of `future = (arg0, arg1, ...)`. + let rvalue = Rvalue::Aggregate( + Box::new(AggregateKind::Tuple), + IndexVec::from_iter(args.iter().cloned()), + ); + self.modular_mutation_visitor(results, state).visit_assign( + destination, + &rvalue, + location, + ); + return Ok(true); + } + CallHandling::ApproxAsyncSM(how) => { + how( + constructor, + &mut self.modular_mutation_visitor(results, state), + args, + *destination, + location, + ); + return Ok(true); + } + }; let child_graph = push_call_string_root(child_descriptor, gloc); @@ -389,7 +429,7 @@ impl<'tcx> PartialGraph<'tcx> { self.monos.extend(child_graph.monos); self.monos .insert(CallString::single(gloc), child_descriptor.generics); - Some(()) + Ok(true) } fn register_mutation<'a>( diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index a8bf0de7d4..7e81fcf81f 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -24,7 +24,7 @@ use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; use serde::{Deserialize, Serialize}; -use crate::{utils::Captures, Asyncness}; +use crate::{construct::ConstructionErr, utils::Captures, Asyncness}; /// A node in the program dependency graph. /// @@ -404,29 +404,39 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { /// Abstracts over how previously written [`Artifact`]s are retrieved, allowing /// the user of this module to chose where to store them. pub trait GraphLoader<'tcx> { - fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>>; + /// Try loading the graph for this function. + /// + /// This is intended to return `Err` in cases where an expectation is + /// violated. For instance if we request a function from a crate that + /// *should* have been analyzed or if `function` does not refer to a + /// function item. + /// + /// This should return `Ok(None)` in cases where the target is not expected + /// to have it's partial graph present. For instance if `function` refers to + /// an item in a crate that was not selected for analysis. + fn load(&self, function: DefId) -> Result>, ConstructionErr>; } /// Intermediate data that gets stored for each crate. pub type Artifact<'tcx> = FxHashMap>; -/// An [`ArtifactLoader`] that always returns `None`. +/// An [`ArtifactLoader`] that always returns `Ok(None)`. pub struct NoLoader; impl<'tcx> GraphLoader<'tcx> for NoLoader { - fn load(&self, _: DefId) -> Option<&PartialGraph<'tcx>> { - None + fn load(&self, _: DefId) -> Result>, ConstructionErr> { + Ok(None) } } impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Rc { - fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { + fn load(&self, function: DefId) -> Result>, ConstructionErr> { (**self).load(function) } } impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Box { - fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { + fn load(&self, function: DefId) -> Result>, ConstructionErr> { (**self).load(function) } } diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 6b0c1a5846..e689597b6a 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -16,6 +16,7 @@ extern crate rustc_target; extern crate rustc_type_ir; pub use async_support::{determine_async, is_async_trait_fn, Asyncness}; +pub use construct::ConstructionErr; pub use graph::{Artifact, DepGraph, GraphLoader, NoLoader, PartialGraph}; pub mod callback; pub use crate::construct::MemoPdgConstructor; @@ -36,5 +37,9 @@ pub mod utils; /// Computes a global program dependence graph (PDG) starting from the root function specified by `def_id`. pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: Instance<'tcx>) -> DepGraph<'tcx> { let constructor = MemoPdgConstructor::new(tcx, NoLoader); - constructor.construct_for(params).unwrap().to_petgraph() + constructor + .construct_for(params) + .and_then(|f| f.ok_or(ConstructionErr::Impossible.into())) + .unwrap() + .to_petgraph() } diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 32cc1bb5d0..88b5d97e8d 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -1,5 +1,6 @@ use std::{collections::HashSet, iter, rc::Rc}; +use anyhow::anyhow; use flowistry::mir::placeinfo::PlaceInfo; use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; use itertools::Itertools; @@ -27,6 +28,7 @@ use crate::{ approximation::ApproximationHandler, async_support::*, calling_convention::*, + construct::ConstructionErr, graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, @@ -343,16 +345,21 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { location: Location, func: &Operand<'tcx>, args: &'b [Operand<'tcx>], - ) -> Option> { + ) -> anyhow::Result>> { let tcx = self.tcx(); - let (called_def_id, generic_args) = self.operand_to_def_id(func)?; + let (called_def_id, generic_args) = self + .operand_to_def_id(func) + .ok_or_else(|| anyhow!("operand {func:?} is not of function type"))?; trace!("Resolved call to function: {}", self.fmt_fn(called_def_id)); // Monomorphize the called function with the known generic_args. let param_env = tcx.param_env_reveal_all_normalized(self.def_id); let resolved_fn = - utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args)?; + utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args) + .ok_or_else(|| { + ConstructionErr::instance_resolution_failed(called_def_id, generic_args) + })?; trace!("resolved to instance {resolved_fn:?}"); let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { @@ -362,11 +369,11 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { if is_non_default_trait_method(tcx, resolved_def_id).is_some() { trace!(" bailing because is unresolvable trait method"); - return None; + return Ok(None); } if let Some(handler) = self.can_approximate_async_functions(resolved_def_id) { - return Some(CallHandling::ApproxAsyncSM(handler)); + return Ok(Some(CallHandling::ApproxAsyncSM(handler))); }; let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args) { @@ -380,7 +387,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { InlineMissReason::Async(async_err), ) } - return None; + return Ok(None); } }; @@ -430,14 +437,14 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { // If a skip was requested then "poll" will not be inlined later so we // bail with "None" here and perform the mutations. Otherwise we bail with // "Some", knowing that handling "poll" later will handle the mutations. - return (!matches!( + return Ok((!matches!( &call_changes, Some(CallChanges { skip: SkipCall::Skip, .. }) )) - .then_some(CallHandling::ApproxAsyncFn); + .then_some(CallHandling::ApproxAsyncFn)); } if matches!( @@ -448,19 +455,21 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { }) ) { trace!(" Bailing because user callback said to bail"); - return None; + return Ok(None); } - let Some(descriptor) = self.memo.construct_for(cache_key) else { + let Some(descriptor) = self.memo.construct_for(cache_key)? else { trace!(" Bailing because cache lookup {cache_key} failed"); - return None; + return Ok(None); }; - Some(CallHandling::Ready { + Ok(Some(CallHandling::Ready { descriptor, calling_convention, - }) + })) } - /// Attempt to inline a call to a function, returning None if call is not inline-able. + /// Attempt to inline a call to a function. + /// + /// The return indicates whether we were successfully able to perform the inlining. fn handle_call( &self, state: &mut InstructionState<'tcx>, @@ -468,11 +477,13 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { func: &Operand<'tcx>, args: &[Operand<'tcx>], destination: Place<'tcx>, - ) -> Option<()> { + ) -> anyhow::Result { // Note: my comments here will use "child" to refer to the callee and // "parent" to refer to the caller, since the words are most visually distinct. - let preamble = self.determine_call_handling(location, func, args)?; + let Some(preamble) = self.determine_call_handling(location, func, args)? else { + return Ok(false); + }; trace!("Call handling is {}", preamble.as_ref()); @@ -489,7 +500,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { ); self.modular_mutation_visitor(state) .visit_assign(&destination, &rvalue, location); - return Some(()); + return Ok(true); } CallHandling::ApproxAsyncSM(handler) => { handler( @@ -499,7 +510,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { destination, location, ); - return Some(()); + return Ok(true); } }; @@ -528,7 +539,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } } - Some(()) + Ok(true) } fn modular_mutation_visitor<'b: 'a>( @@ -561,24 +572,24 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { .. } = &terminator.kind { - if self - .handle_call(state, location, func, args, *destination) - .is_none() - { - trace!("Terminator {:?} failed the preamble", terminator.kind); - self.terminator_visitor(state, time) - .visit_terminator(terminator, location) + match self.handle_call(state, location, func, args, *destination) { + Err(e) => { + self.tcx().sess.warn(e.to_string()); + } + Ok(false) => { + trace!("Terminator {:?} failed the preamble", terminator.kind); + } + Ok(true) => return, } - } else { - // Fallback: call the visitor - self.terminator_visitor(state, time) - .visit_terminator(terminator, location) } + // Fallback: call the visitor + self.terminator_visitor(state, time) + .visit_terminator(terminator, location) } - pub(crate) fn construct_partial(&'a self) -> PartialGraph<'tcx> { - if let Some(g) = self.try_handle_as_async() { - return g; + pub(crate) fn construct_partial(&'a self) -> anyhow::Result> { + if let Some(g) = self.try_handle_as_async()? { + return Ok(g); } let mut analysis = self @@ -620,7 +631,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } } - final_state + Ok(final_state) } /// Determine the type of call-site. diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index f4f8621122..82d7450c89 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -20,6 +20,8 @@ use rustc_span::ErrorGuaranteed; use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; +use crate::construct::ConstructionErr; + pub trait Captures<'a> {} impl<'a, T: ?Sized> Captures<'a> for T {} @@ -216,7 +218,7 @@ pub fn ty_resolve<'tcx>(ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Ty<'tcx> { pub fn manufacture_substs_for( tcx: TyCtxt<'_>, function: DefId, -) -> Result<&List>, ErrorGuaranteed> { +) -> Result<&List>, ConstructionErr> { use rustc_middle::ty::{ Binder, BoundRegionKind, DynKind, ExistentialPredicate, ExistentialProjection, ExistentialTraitRef, GenericParamDefKind, ImplPolarity, ParamTy, Region, TraitPredicate, @@ -245,17 +247,14 @@ pub fn manufacture_substs_for( ))) } GenericParamDefKind::Const { .. } => { - return Err(tcx.sess.span_err( - tcx.def_span(param.def_id), - "Cannot use constants as generic parameters in controllers", - )) + return Err(ConstructionErr::ConstantInGenerics { function }); } GenericParamDefKind::Type { .. } => (), }; let param_as_ty = ParamTy::for_def(param); let constraints = predicates.predicates.iter().enumerate().rev().filter_map( - |(pidx, clause)| { + |(_pidx, clause)| { trace!(" Trying clause {clause:?}"); let pred = if let Some(trait_ref) = clause.as_trait_clause() { trace!(" is trait clause"); @@ -264,10 +263,7 @@ pub fn manufacture_substs_for( return None; }; let Some(TraitPredicate { trait_ref, .. }) = trait_ref.no_bound_vars() else { - return Some(Err(tcx.sess.span_err( - tcx.def_span(param.def_id), - format!("Trait ref had binder {trait_ref:?}"), - ))); + return Some(Err(ConstructionErr::TraitRefWithBinder { function })); }; if !matches!(trait_ref.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { @@ -284,9 +280,9 @@ pub fn manufacture_substs_for( } else if let Some(pred) = clause.as_projection_clause() { trace!(" is projection clause"); let Some(pred) = pred.no_bound_vars() else { - return Some(Err(tcx - .sess - .span_err(predicates.spans[pidx], "Bound vars in predicate"))); + return Some(Err(ConstructionErr::BoundVariablesInPredicates { + function: function, + })); }; if !matches!(pred.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { trace!(" Bailing because self type is not param type"); @@ -308,10 +304,21 @@ pub fn manufacture_substs_for( let mut predicates = constraints.collect::, _>>()?; trace!(" collected predicates {predicates:?}"); match predicates.len() { - 0 => predicates.push(Binder::dummy(ExistentialPredicate::Trait(ExistentialTraitRef { def_id: tcx.get_diagnostic_item(rustc_span::sym::Any).expect("The `Any` item is not defined."), args: List::empty() }))), + 0 => predicates.push(Binder::dummy(ExistentialPredicate::Trait( + ExistentialTraitRef { + def_id: tcx + .get_diagnostic_item(rustc_span::sym::Any) + .expect("The `Any` item is not defined."), + args: List::empty(), + }, + ))), 1 => (), - _ => - return Err(tcx.sess.span_err(tcx.def_span(function), format!("Could not create dynamic arguments for this function because more than one predicate were required: {predicates:?}"))), + _ => { + return Err(ConstructionErr::TooManyPredicatesForSynthesizingGenerics { + function: function, + number: predicates.len() as u32, + }) + } }; let poly_predicate = tcx.mk_poly_existential_predicates_from_iter(predicates.into_iter()); trace!(" poly predicate {poly_predicate:?}"); diff --git a/crates/paralegal-flow/Cargo.toml b/crates/paralegal-flow/Cargo.toml index 85edf0cb5c..662a41380c 100644 --- a/crates/paralegal-flow/Cargo.toml +++ b/crates/paralegal-flow/Cargo.toml @@ -43,7 +43,7 @@ enum-map = "2.7" serial_test = "2.0.0" itertools = "0.12" anyhow = "1.0.72" -thiserror = "1" +thiserror = { workspace = true } serde_bare = "0.5.0" toml = "0.7" diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 0fa2a5dd21..ac8907dcc3 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -15,7 +15,8 @@ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ - graph::InternedString, Asyncness, DepGraph, GraphLoader, MemoPdgConstructor, PartialGraph, + graph::InternedString, Asyncness, ConstructionErr, DepGraph, GraphLoader, MemoPdgConstructor, + PartialGraph, }; use rustc_hash::FxHashMap; @@ -31,7 +32,7 @@ use rustc_middle::{ }; use rustc_serialize::{Decodable, Encodable}; -use anyhow::Result; +use anyhow::{anyhow, Result}; use rustc_utils::{cache::Cache, mir::borrowck_facts}; use thiserror::Error; @@ -63,13 +64,18 @@ pub enum MetadataLoaderError { use MetadataLoaderError::*; impl<'tcx> GraphLoader<'tcx> for MetadataLoader<'tcx> { - fn load(&self, function: DefId) -> Option<&PartialGraph<'tcx>> { - let res = self - .get_metadata(function.krate) - .ok()? + fn load(&self, function: DefId) -> Result>, ConstructionErr> { + let Ok(meta) = self.get_metadata(function.krate) else { + return Ok(None); + }; + let res = meta .pdgs - .get(&function.index); - res + .get(&function.index) + .ok_or(ConstructionErr::CrateExistsButItemIsNotFound { function })? + .as_ref() + .map_err(Clone::clone)?; + + Ok(Some(res)) } } @@ -99,10 +105,8 @@ impl<'tcx> MetadataLoader<'tcx> { let pdgs = emit_targets .into_iter() .map(|t| { - ( - t.local_def_index, - (*constructor.construct_root(t).unwrap()).clone(), - ) + let graph = constructor.construct_root(t); + (t.local_def_index, graph.map(Clone::clone)) }) .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); @@ -152,13 +156,15 @@ impl<'tcx> MetadataLoader<'tcx> { } } +pub type PdgMap<'tcx> = FxHashMap, ConstructionErr>>; + /// Intermediate artifacts stored on disc for every crate. /// /// Contains PDGs and reduced information about the source code that is needed /// downstream. #[derive(Clone, Debug, TyEncodable, TyDecodable)] pub struct Metadata<'tcx> { - pub pdgs: FxHashMap>, + pub pdgs: PdgMap<'tcx>, pub bodies: FxHashMap>, pub local_annotations: HashMap>, pub reachable_markers: HashMap<(DefIndex, GenericArgsRef<'tcx>), Box<[InternedString]>>, @@ -177,12 +183,13 @@ impl<'tcx> Metadata<'tcx> { /// record from rustc and return a serializable metadata artifact. pub fn from_pdgs( tcx: TyCtxt<'tcx>, - pdgs: FxHashMap>, + pdgs: PdgMap<'tcx>, markers: &MarkerDatabase<'tcx>, ) -> Self { let mut bodies: FxHashMap = Default::default(); for call_string in pdgs .values() + .filter_map(|e| e.as_ref().ok()) .flat_map(|subgraph| subgraph.mentioned_call_string()) { for location in call_string.iter() { @@ -263,6 +270,8 @@ impl<'tcx> MetadataLoader<'tcx> { .pdgs .get(&key.index) .ok_or(PdgForItemMissing(key))? + .as_ref() + .map_err(Clone::clone)? .get_mono(cs) .ok_or(NoGenericsKnownForCallSite(cs))?) } @@ -273,6 +282,8 @@ impl<'tcx> MetadataLoader<'tcx> { .pdgs .get(&key.index) .ok_or(PdgForItemMissing(key))? + .as_ref() + .map_err(Clone::clone)? .to_petgraph()) } @@ -283,6 +294,8 @@ impl<'tcx> MetadataLoader<'tcx> { .ok()? .pdgs .get(&key.index)? + .as_ref() + .ok()? .asyncness(), ) })() From 514d8f7a712c9d7837bd376e795803372b2fd474 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 5 Jun 2024 16:31:21 -0700 Subject: [PATCH 57/95] Accumulate and propagate errors --- .../src/async_support.rs | 6 +- .../src/construct.rs | 176 ++++++++++++------ .../flowistry_pdg_construction/src/graph.rs | 10 +- crates/flowistry_pdg_construction/src/lib.rs | 3 +- .../src/local_analysis.rs | 100 +++++----- crates/paralegal-flow/src/ana/metadata.rs | 47 +++-- 6 files changed, 213 insertions(+), 129 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 3abd85bff7..a175a0d114 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -17,7 +17,7 @@ use rustc_middle::{ use crate::{ graph::push_call_string_root, local_analysis::{CallKind, LocalAnalysis}, - utils, PartialGraph, + utils, ConstructionErr, PartialGraph, }; /// Describe in which way a function is `async`. @@ -195,7 +195,9 @@ pub enum AsyncDeterminationResult { } impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { - pub(crate) fn try_handle_as_async(&self) -> anyhow::Result>> { + pub(crate) fn try_handle_as_async( + &self, + ) -> Result>, Vec> { let Some((generator_fn, location, asyncness)) = determine_async(self.tcx(), self.def_id, &self.body) else { diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 9ffcdf895c..31e0f62934 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -15,15 +15,18 @@ use either::Either; use flowistry_pdg::{CallString, GlobalLocation}; +use itertools::Itertools; use log::trace; use petgraph::graph::DiGraph; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable}; use rustc_middle::{ - mir::{visit::Visitor, AggregateKind, Location, Place, Rvalue, Terminator, TerminatorKind}, + mir::{ + visit::Visitor, AggregateKind, Location, Operand, Place, Rvalue, Terminator, TerminatorKind, + }, ty::{GenericArgsRef, Instance, TyCtxt}, }; use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; @@ -53,7 +56,7 @@ pub struct MemoPdgConstructor<'tcx> { pub(crate) loader: Box + 'tcx>, } -#[derive(Debug, thiserror::Error, Encodable, Decodable, Clone)] +#[derive(Debug, thiserror::Error, Encodable, Decodable, Clone, Hash, Eq, PartialEq)] pub enum ConstructionErr { // Would prefer to make `generics` `GenericArgsRef<'tcx>` but the `Error` // implementation only allows `'static` types. @@ -67,7 +70,7 @@ pub enum ConstructionErr { RustcReportedError, #[error("crate exists but item is not found {function:?}")] CrateExistsButItemIsNotFound { function: DefId }, - #[error("could not create generic arguments for {function:?} because too mah predicates were present ({number})")] + #[error("could not create generic arguments for {function:?} because too many predicates were present ({number})")] TooManyPredicatesForSynthesizingGenerics { function: DefId, number: u32 }, #[error("found bound variables in predicates of {function:?}")] BoundVariablesInPredicates { function: DefId }, @@ -75,6 +78,8 @@ pub enum ConstructionErr { TraitRefWithBinder { function: DefId }, #[error("cannot use constants as generic parameters in controllers")] ConstantInGenerics { function: DefId }, + #[error("operand is not function type {op}")] + OperandIsNotFunctionType { op: String }, } impl ConstructionErr { @@ -84,6 +89,12 @@ impl ConstructionErr { generics: format!("{generics:?}"), } } + + pub fn operand_is_not_function_type(op: &Operand) -> Self { + Self::OperandIsNotFunctionType { + op: format!("{op:?}"), + } + } } impl<'tcx> MemoPdgConstructor<'tcx> { @@ -121,8 +132,9 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub fn construct_root<'a>( &'a self, function: LocalDefId, - ) -> Result<&'a PartialGraph<'tcx>, ConstructionErr> { - let generics = manufacture_substs_for(self.tcx, function.to_def_id())?; + ) -> Result<&'a PartialGraph<'tcx>, Vec> { + let generics = + manufacture_substs_for(self.tcx, function.to_def_id()).map_err(|i| vec![i])?; let resolution = try_resolve_function( self.tcx, function.to_def_id(), @@ -130,26 +142,31 @@ impl<'tcx> MemoPdgConstructor<'tcx> { generics, ) .ok_or_else(|| { - ConstructionErr::instance_resolution_failed(function.to_def_id(), generics) + vec![ConstructionErr::instance_resolution_failed( + function.to_def_id(), + generics, + )] })?; self.construct_for(resolution) - .and_then(|f| f.ok_or(ConstructionErr::Impossible)) + .and_then(|f| f.ok_or(vec![ConstructionErr::Impossible])) } pub(crate) fn construct_for<'a>( &'a self, resolution: Instance<'tcx>, - ) -> Result>, ConstructionErr> { + ) -> Result>, Vec> { let def_id = resolution.def_id(); let generics = resolution.args; if let Some(local) = def_id.as_local() { - Ok(self.pdg_cache.get_maybe_recursive((local, generics), |_| { - let g = LocalAnalysis::new(self, resolution) - .construct_partial() - .unwrap(); - g.check_invariants(); - g - })) + self.pdg_cache + .get_maybe_recursive((local, generics), |_| { + let g = LocalAnalysis::new(self, resolution).construct_partial()?; + g.check_invariants(); + Ok(g) + }) + .map(Result::as_ref) + .transpose() + .map_err(Clone::clone) } else { self.loader.load(def_id) } @@ -166,7 +183,10 @@ impl<'tcx> MemoPdgConstructor<'tcx> { /// Construct a final PDG for this function. Same as /// [`Self::construct_root`] this instantiates all generics as `dyn`. - pub fn construct_graph(&self, function: LocalDefId) -> Result, ConstructionErr> { + pub fn construct_graph( + &self, + function: LocalDefId, + ) -> Result, Vec> { let _args = manufacture_substs_for(self.tcx, function.to_def_id()) .map_err(|_| anyhow!("rustc error")); let g = self.construct_root(function)?.to_petgraph(); @@ -174,19 +194,45 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } -impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>> - for PartialGraph<'tcx> +pub(crate) struct WithConstructionErrors { + pub(crate) inner: A, + pub errors: FxHashSet, +} + +impl WithConstructionErrors { + pub fn new(inner: A) -> Self { + Self { + inner, + errors: Default::default(), + } + } + + pub fn into_result(self) -> Result> { + if self.errors.is_empty() { + Ok(self.inner) + } else { + Err(self.errors.into_iter().collect()) + } + } +} + +type DfResults<'mir, 'tcx> = Results<'tcx, DfAna<'mir, 'tcx>>; + +type DfAna<'mir, 'tcx> = WithConstructionErrors<&'mir LocalAnalysis<'tcx, 'mir>>; + +impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, DfResults<'mir, 'tcx>> + for WithConstructionErrors> { - type FlowState = <&'mir LocalAnalysis<'tcx, 'mir> as AnalysisDomain<'tcx>>::Domain; + type FlowState = as AnalysisDomain<'tcx>>::Domain; fn visit_statement_before_primary_effect( &mut self, - results: &Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, + results: &DfResults<'mir, 'tcx>, state: &Self::FlowState, statement: &'mir rustc_middle::mir::Statement<'tcx>, location: Location, ) { - let mut vis = self.modular_mutation_visitor(results, state); + let mut vis = self.inner.modular_mutation_visitor(results, state); vis.visit_statement(statement, location) } @@ -209,14 +255,14 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t /// call site. fn visit_terminator_before_primary_effect( &mut self, - results: &Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, + results: &DfResults<'mir, 'tcx>, state: &Self::FlowState, terminator: &'mir rustc_middle::mir::Terminator<'tcx>, location: Location, ) { if let TerminatorKind::SwitchInt { discr, .. } = &terminator.kind { if let Some(place) = discr.place() { - self.register_mutation( + self.inner.register_mutation( results, state, Inputs::Unresolved { @@ -230,17 +276,19 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t return; } - match self.handle_as_inline(results, state, terminator, location) { + match self + .inner + .handle_as_inline(results, state, terminator, location) + { Ok(false) => (), Ok(true) => return, - Err(e) => { - results.analysis.tcx().sess.warn(e.to_string()); - } + Err(e) => self.errors.extend(e), } trace!("Handling terminator {:?} as not inlined", terminator.kind); - let mut arg_vis = - ModularMutationVisitor::new(&results.analysis.place_info, move |location, mutation| { - self.register_mutation( + let mut arg_vis = ModularMutationVisitor::new( + &results.analysis.inner.place_info, + move |location, mutation| { + self.inner.register_mutation( results, state, Inputs::Unresolved { @@ -250,20 +298,21 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t location, mutation.mutation_reason, ) - }); + }, + ); arg_vis.set_time(Time::Before); arg_vis.visit_terminator(terminator, location); } fn visit_terminator_after_primary_effect( &mut self, - results: &Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, - state: &<&'mir LocalAnalysis<'tcx, 'mir> as AnalysisDomain<'tcx>>::Domain, + results: &DfResults<'mir, 'tcx>, + state: &Self::FlowState, terminator: &'mir rustc_middle::mir::Terminator<'tcx>, location: Location, ) { if let TerminatorKind::Call { func, args, .. } = &terminator.kind { - let constructor = results.analysis; + let constructor = results.analysis.inner; if matches!( constructor.determine_call_handling(location, func, args), @@ -274,9 +323,10 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t } trace!("Handling terminator {:?} as not inlined", terminator.kind); - let mut arg_vis = - ModularMutationVisitor::new(&results.analysis.place_info, move |location, mutation| { - self.register_mutation( + let mut arg_vis = ModularMutationVisitor::new( + &results.analysis.inner.place_info, + move |location, mutation| { + self.inner.register_mutation( results, state, Inputs::Unresolved { @@ -286,7 +336,8 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t location, mutation.mutation_reason, ) - }); + }, + ); arg_vis.set_time(Time::After); arg_vis.visit_terminator(terminator, location); } @@ -295,31 +346,34 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, Results<'tcx, &'mir LocalAnalysis<'t impl<'tcx> PartialGraph<'tcx> { fn modular_mutation_visitor<'a, 'mir>( &'a mut self, - results: &'a Results<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>, + results: &'a DfResults<'mir, 'tcx>, state: &'a InstructionState<'tcx>, ) -> ModularMutationVisitor<'a, 'tcx, impl FnMut(Location, Mutation<'tcx>) + 'a> { - ModularMutationVisitor::new(&results.analysis.place_info, move |location, mutation| { - self.register_mutation( - results, - state, - Inputs::Unresolved { - places: mutation.inputs, - }, - Either::Left(mutation.mutated), - location, - mutation.mutation_reason, - ) - }) + ModularMutationVisitor::new( + &results.analysis.inner.place_info, + move |location, mutation| { + self.register_mutation( + results, + state, + Inputs::Unresolved { + places: mutation.inputs, + }, + Either::Left(mutation.mutated), + location, + mutation.mutation_reason, + ) + }, + ) } /// returns whether we were able to successfully handle this as inline fn handle_as_inline<'a>( &mut self, - results: &Results<'tcx, &'a LocalAnalysis<'tcx, 'a>>, - state: &<&'a LocalAnalysis<'tcx, 'a> as AnalysisDomain<'tcx>>::Domain, + results: &DfResults<'a, 'tcx>, + state: &'a InstructionState<'tcx>, terminator: &Terminator<'tcx>, location: Location, - ) -> anyhow::Result { + ) -> Result> { let TerminatorKind::Call { func, args, @@ -329,7 +383,7 @@ impl<'tcx> PartialGraph<'tcx> { else { return Ok(false); }; - let constructor = results.analysis; + let constructor = results.analysis.inner; let gloc = GlobalLocation { location: location.into(), function: constructor.def_id.to_def_id(), @@ -434,7 +488,7 @@ impl<'tcx> PartialGraph<'tcx> { fn register_mutation<'a>( &mut self, - results: &Results<'tcx, &'a LocalAnalysis<'tcx, 'a>>, + results: &DfResults<'a, 'tcx>, state: &InstructionState<'tcx>, inputs: Inputs<'tcx>, mutated: Either, DepNode<'tcx>>, @@ -442,7 +496,7 @@ impl<'tcx> PartialGraph<'tcx> { target_use: TargetUse, ) { trace!("Registering mutation to {mutated:?} with inputs {inputs:?} at {location:?}"); - let constructor = results.analysis; + let constructor = results.analysis.inner; let ctrl_inputs = constructor.find_control_inputs(location); trace!(" Found control inputs {ctrl_inputs:?}"); @@ -468,8 +522,7 @@ impl<'tcx> PartialGraph<'tcx> { let outputs = match mutated { Either::Right(node) => vec![node], - Either::Left(place) => results - .analysis + Either::Left(place) => constructor .find_outputs(place, location) .into_iter() .map(|t| t.1) @@ -504,7 +557,8 @@ impl<'tcx> PartialGraph<'tcx> { } } -type PdgCache<'tcx> = Rc), PartialGraph<'tcx>>>; +type PdgCache<'tcx> = + Rc), Result, Vec>>>; #[derive(Debug)] enum Inputs<'tcx> { diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 7e81fcf81f..bd92bd1d2e 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -401,6 +401,8 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { } } +pub type GraphLoaderError = Vec; + /// Abstracts over how previously written [`Artifact`]s are retrieved, allowing /// the user of this module to chose where to store them. pub trait GraphLoader<'tcx> { @@ -414,7 +416,7 @@ pub trait GraphLoader<'tcx> { /// This should return `Ok(None)` in cases where the target is not expected /// to have it's partial graph present. For instance if `function` refers to /// an item in a crate that was not selected for analysis. - fn load(&self, function: DefId) -> Result>, ConstructionErr>; + fn load(&self, function: DefId) -> Result>, GraphLoaderError>; } /// Intermediate data that gets stored for each crate. @@ -424,19 +426,19 @@ pub type Artifact<'tcx> = FxHashMap>; pub struct NoLoader; impl<'tcx> GraphLoader<'tcx> for NoLoader { - fn load(&self, _: DefId) -> Result>, ConstructionErr> { + fn load(&self, _: DefId) -> Result>, GraphLoaderError> { Ok(None) } } impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Rc { - fn load(&self, function: DefId) -> Result>, ConstructionErr> { + fn load(&self, function: DefId) -> Result>, GraphLoaderError> { (**self).load(function) } } impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Box { - fn load(&self, function: DefId) -> Result>, ConstructionErr> { + fn load(&self, function: DefId) -> Result>, GraphLoaderError> { (**self).load(function) } } diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index e689597b6a..850c6220ee 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -39,7 +39,8 @@ pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: Instance<'tcx>) -> DepGraph< let constructor = MemoPdgConstructor::new(tcx, NoLoader); constructor .construct_for(params) - .and_then(|f| f.ok_or(ConstructionErr::Impossible.into())) + .unwrap() + .ok_or(ConstructionErr::Impossible) .unwrap() .to_petgraph() } diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 88b5d97e8d..b484d3426e 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -28,7 +28,7 @@ use crate::{ approximation::ApproximationHandler, async_support::*, calling_convention::*, - construct::ConstructionErr, + construct::{ConstructionErr, WithConstructionErrors}, graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, @@ -345,12 +345,12 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { location: Location, func: &Operand<'tcx>, args: &'b [Operand<'tcx>], - ) -> anyhow::Result>> { + ) -> Result>, Vec> { let tcx = self.tcx(); let (called_def_id, generic_args) = self .operand_to_def_id(func) - .ok_or_else(|| anyhow!("operand {func:?} is not of function type"))?; + .ok_or_else(|| vec![ConstructionErr::operand_is_not_function_type(func)])?; trace!("Resolved call to function: {}", self.fmt_fn(called_def_id)); // Monomorphize the called function with the known generic_args. @@ -358,7 +358,10 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { let resolved_fn = utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args) .ok_or_else(|| { - ConstructionErr::instance_resolution_failed(called_def_id, generic_args) + vec![ConstructionErr::instance_resolution_failed( + called_def_id, + generic_args, + )] })?; trace!("resolved to instance {resolved_fn:?}"); let resolved_def_id = resolved_fn.def_id(); @@ -477,7 +480,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { func: &Operand<'tcx>, args: &[Operand<'tcx>], destination: Place<'tcx>, - ) -> anyhow::Result { + ) -> Result> { // Note: my comments here will use "child" to refer to the callee and // "parent" to refer to the caller, since the words are most visually distinct. @@ -558,53 +561,30 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.root.args } - fn handle_terminator( - &self, - terminator: &Terminator<'tcx>, - state: &mut InstructionState<'tcx>, - location: Location, - time: Time, - ) { - if let TerminatorKind::Call { - func, - args, - destination, - .. - } = &terminator.kind - { - match self.handle_call(state, location, func, args, *destination) { - Err(e) => { - self.tcx().sess.warn(e.to_string()); - } - Ok(false) => { - trace!("Terminator {:?} failed the preamble", terminator.kind); - } - Ok(true) => return, - } - } - // Fallback: call the visitor - self.terminator_visitor(state, time) - .visit_terminator(terminator, location) - } - - pub(crate) fn construct_partial(&'a self) -> anyhow::Result> { + pub(crate) fn construct_partial(&'a self) -> Result, Vec> { if let Some(g) = self.try_handle_as_async()? { return Ok(g); } - let mut analysis = self + let mut analysis = WithConstructionErrors::new(self) .into_engine(self.tcx(), &self.body) .iterate_to_fixpoint(); - let mut final_state = PartialGraph::new( + if !analysis.analysis.errors.is_empty() { + return Err(analysis.analysis.errors.into_iter().collect()); + } + + let mut final_state = WithConstructionErrors::new(PartialGraph::new( Asyncness::No, self.generic_args(), self.def_id.to_def_id(), self.body.arg_count, - ); + )); analysis.visit_reachable_with(&self.body, &mut final_state); + let mut final_state = final_state.into_result()?; + let all_returns = self.body.all_returns().map(|ret| ret.block).collect_vec(); let mut analysis = analysis.into_results_cursor(&self.body); for block in all_returns { @@ -669,10 +649,45 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } } -impl<'tcx, 'a> df::AnalysisDomain<'tcx> for &'a LocalAnalysis<'tcx, 'a> { +impl<'tcx, 'a> WithConstructionErrors<&'_ LocalAnalysis<'tcx, 'a>> { + fn handle_terminator( + &mut self, + terminator: &Terminator<'tcx>, + state: &mut InstructionState<'tcx>, + location: Location, + time: Time, + ) { + if let TerminatorKind::Call { + func, + args, + destination, + .. + } = &terminator.kind + { + match self + .inner + .handle_call(state, location, func, args, *destination) + { + Err(e) => { + self.errors.extend(e); + } + Ok(false) => { + trace!("Terminator {:?} failed the preamble", terminator.kind); + } + Ok(true) => return, + } + } + // Fallback: call the visitor + self.inner + .terminator_visitor(state, time) + .visit_terminator(terminator, location) + } +} + +impl<'tcx, 'a> df::AnalysisDomain<'tcx> for WithConstructionErrors<&'a LocalAnalysis<'tcx, 'a>> { type Domain = InstructionState<'tcx>; - const NAME: &'static str = "LocalDGPConstruction"; + const NAME: &'static str = "LocalPdgConstruction"; fn bottom_value(&self, _body: &Body<'tcx>) -> Self::Domain { InstructionState::default() @@ -681,14 +696,15 @@ impl<'tcx, 'a> df::AnalysisDomain<'tcx> for &'a LocalAnalysis<'tcx, 'a> { fn initialize_start_block(&self, _body: &Body<'tcx>, _state: &mut Self::Domain) {} } -impl<'a, 'tcx> df::Analysis<'tcx> for &'a LocalAnalysis<'tcx, 'a> { +impl<'a, 'tcx> df::Analysis<'tcx> for WithConstructionErrors<&'a LocalAnalysis<'tcx, 'a>> { fn apply_statement_effect( &mut self, state: &mut Self::Domain, statement: &Statement<'tcx>, location: Location, ) { - self.modular_mutation_visitor(state) + self.inner + .modular_mutation_visitor(state) .visit_statement(statement, location) } diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index ac8907dcc3..341f1e7f37 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -11,7 +11,7 @@ use crate::{ Args, DefId, HashMap, MarkerCtx, }; -use std::path::Path; +use std::{fmt::write, path::Path}; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ @@ -64,14 +64,14 @@ pub enum MetadataLoaderError { use MetadataLoaderError::*; impl<'tcx> GraphLoader<'tcx> for MetadataLoader<'tcx> { - fn load(&self, function: DefId) -> Result>, ConstructionErr> { + fn load(&self, function: DefId) -> Result>, Vec> { let Ok(meta) = self.get_metadata(function.krate) else { return Ok(None); }; let res = meta .pdgs .get(&function.index) - .ok_or(ConstructionErr::CrateExistsButItemIsNotFound { function })? + .ok_or_else(|| vec![ConstructionErr::CrateExistsButItemIsNotFound { function }])? .as_ref() .map_err(Clone::clone)?; @@ -156,7 +156,22 @@ impl<'tcx> MetadataLoader<'tcx> { } } -pub type PdgMap<'tcx> = FxHashMap, ConstructionErr>>; +#[derive(Debug)] +struct ConstructionErrors(Vec); + +impl std::error::Error for ConstructionErrors {} + +impl std::fmt::Display for ConstructionErrors { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for e in self.0.iter() { + e.fmt(f)?; + f.write_str(", ")?; + } + Ok(()) + } +} + +pub type PdgMap<'tcx> = FxHashMap, Vec>>; /// Intermediate artifacts stored on disc for every crate. /// @@ -257,6 +272,12 @@ impl<'tcx> MetadataLoader<'tcx> { Ok(meta) } + pub fn get_partial_graph(&self, key: DefId) -> Result<&PartialGraph<'tcx>> { + let meta = self.get_metadata(key.krate)?; + let result = meta.pdgs.get(&key.index).ok_or(PdgForItemMissing(key))?; + Ok(result.as_ref().map_err(|e| ConstructionErrors(e.clone()))?) + } + pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>> { let meta = self.get_metadata(key.krate)?; let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); @@ -265,26 +286,14 @@ impl<'tcx> MetadataLoader<'tcx> { pub fn get_mono(&self, cs: CallString) -> Result> { let key = cs.root().function; - let meta = self.get_metadata(key.krate)?; - Ok(meta - .pdgs - .get(&key.index) - .ok_or(PdgForItemMissing(key))? - .as_ref() - .map_err(Clone::clone)? + Ok(self + .get_partial_graph(key)? .get_mono(cs) .ok_or(NoGenericsKnownForCallSite(cs))?) } pub fn get_pdg(&self, key: DefId) -> Result> { - Ok(self - .get_metadata(key.krate)? - .pdgs - .get(&key.index) - .ok_or(PdgForItemMissing(key))? - .as_ref() - .map_err(Clone::clone)? - .to_petgraph()) + Ok(self.get_partial_graph(key)?.to_petgraph()) } pub fn get_asyncness(&self, key: DefId) -> Asyncness { From 95e4cea81eb916d7d9354bb12fb3c7d8793fd35d Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 5 Jun 2024 16:41:52 -0700 Subject: [PATCH 58/95] Make instance resolution that fails on dyn a warning --- .../src/local_analysis.rs | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index b484d3426e..b5685b295f 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -1,6 +1,5 @@ use std::{collections::HashSet, iter, rc::Rc}; -use anyhow::anyhow; use flowistry::mir::placeinfo::PlaceInfo; use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; use itertools::Itertools; @@ -15,7 +14,7 @@ use rustc_middle::{ visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, }, - ty::{GenericArg, GenericArgsRef, Instance, List, TyCtxt}, + ty::{GenericArg, GenericArgKind, GenericArgsRef, Instance, List, TyCtxt, TyKind}, }; use rustc_mir_dataflow::{self as df, fmt::DebugWithContext, Analysis}; @@ -355,14 +354,20 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { // Monomorphize the called function with the known generic_args. let param_env = tcx.param_env_reveal_all_normalized(self.def_id); - let resolved_fn = + let Some(resolved_fn) = utils::try_resolve_function(self.tcx(), called_def_id, param_env, generic_args) - .ok_or_else(|| { - vec![ConstructionErr::instance_resolution_failed( - called_def_id, - generic_args, - )] - })?; + else { + if let Some(d) = generic_args.iter().find(|arg| matches!(arg.unpack(), GenericArgKind::Type(t) if matches!(t.kind(), TyKind::Dynamic(..)))) { + self.tcx().sess.span_warn(self.tcx().def_span(called_def_id), format!("could not resolve instance due to dynamic argument: {d:?}")); + return Ok(None); + } else { + return Err( + vec![ConstructionErr::instance_resolution_failed( + called_def_id, + generic_args, + )]); + } + }; trace!("resolved to instance {resolved_fn:?}"); let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { From a85ffd1078d3d6f44475bb723ee92cfb5974cc3f Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 6 Jun 2024 15:38:20 -0700 Subject: [PATCH 59/95] Serializable async errors --- .../src/async_support.rs | 159 +++++++++++--- .../src/callback.rs | 6 +- .../src/construct.rs | 201 ++++++++++++++---- .../flowistry_pdg_construction/src/graph.rs | 10 +- crates/flowistry_pdg_construction/src/lib.rs | 2 +- .../src/local_analysis.rs | 51 +++-- .../flowistry_pdg_construction/tests/pdg.rs | 59 +++++ .../paralegal-flow/src/ana/graph_converter.rs | 16 +- crates/paralegal-flow/src/ana/metadata.rs | 95 ++++++--- 9 files changed, 462 insertions(+), 137 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index a175a0d114..94e4f31ea8 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -1,20 +1,22 @@ -use std::rc::Rc; +use std::{borrow::Cow, fmt::Display, rc::Rc}; use either::Either; use flowistry_pdg::{CallString, GlobalLocation}; use itertools::Itertools; use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::{DefId, LocalDefId}; -use rustc_macros::{Decodable, Encodable}; +use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ - AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, + AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, SourceInfo, Statement, StatementKind, Terminator, TerminatorKind, }, ty::{GenericArgsRef, Instance, TyCtxt}, }; +use rustc_span::Span; use crate::{ + construct::EmittableError, graph::push_call_string_root, local_analysis::{CallKind, LocalAnalysis}, utils, ConstructionErr, PartialGraph, @@ -188,16 +190,96 @@ pub fn determine_async<'tcx>( } #[derive(Debug, Clone, PartialEq, Eq)] -pub enum AsyncDeterminationResult { +pub enum AsyncDeterminationResult<'tcx, T> { Resolved(T), - Unresolvable(String), + Unresolvable(ConstructionErr<'tcx>), NotAsync, } +#[derive(Debug, Encodable, Decodable, Clone, Hash, Eq, PartialEq)] +pub enum OperandShapeViolation { + IsNotAPlace, + IsNotLocal, + HasNoAssignments, + WrongNumberOfAssignments(u16), +} + +impl Display for OperandShapeViolation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use OperandShapeViolation::*; + if let WrongNumberOfAssignments(n) = self { + return write!(f, "wrong number of assignments, expected 1, got {n}"); + }; + let str = match self { + IsNotAPlace => "is not a place", + IsNotLocal => "is not local", + HasNoAssignments => "is never assigned", + WrongNumberOfAssignments(..) => unreachable!(), + }; + f.write_str(str) + } +} + +#[derive(Debug, Encodable, Decodable, Clone, Hash, Eq, PartialEq)] +pub enum AsyncResolutionErr { + WrongOperandShape { + span: Span, + reason: OperandShapeViolation, + }, + PinnedAssignmentIsNotACall { + span: Span, + }, + AssignmentToPinNewIsNotAStatement { + span: Span, + }, + AssignmentToAliasOfPinNewInputIsNotACall { + span: Span, + }, + AssignmentToIntoFutureInputIsNotACall { + span: Span, + }, + ChaseTargetIsNotAFunction { + span: Span, + }, +} + +impl<'tcx> EmittableError<'tcx> for AsyncResolutionErr { + fn span(&self, _tcx: TyCtxt<'tcx>) -> Option { + use AsyncResolutionErr::*; + match self { + WrongOperandShape { span, .. } + | PinnedAssignmentIsNotACall { span } + | AssignmentToAliasOfPinNewInputIsNotACall { span } + | AssignmentToIntoFutureInputIsNotACall { span } + | ChaseTargetIsNotAFunction { span } + | AssignmentToPinNewIsNotAStatement { span } => Some(*span), + } + } + + fn msg(&self, _tcx: TyCtxt<'tcx>, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use AsyncResolutionErr::*; + if let WrongOperandShape { reason, .. } = self { + return write!(f, "operator has an unexpected shape: {reason}"); + } + f.write_str(match self { + PinnedAssignmentIsNotACall { .. } => "pinned assignment is not a call", + AssignmentToPinNewIsNotAStatement { .. } => "assignment to Pin::new is not a statement", + AssignmentToAliasOfPinNewInputIsNotACall { .. } => { + "assignment to Pin::new input is not a call" + } + AssignmentToIntoFutureInputIsNotACall { .. } => { + "assignment to into_future input is not a call" + } + ChaseTargetIsNotAFunction { .. } => "chase target is not a function", + WrongOperandShape { .. } => unreachable!(), + }) + } +} + impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { pub(crate) fn try_handle_as_async( &self, - ) -> Result>, Vec> { + ) -> Result>, Vec>> { let Some((generator_fn, location, asyncness)) = determine_async(self.tcx(), self.def_id, &self.body) else { @@ -222,10 +304,11 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { &'b self, def_id: DefId, original_args: &'b [Operand<'tcx>], - ) -> AsyncDeterminationResult> { + span: Span, + ) -> AsyncDeterminationResult<'tcx, CallKind<'tcx>> { let lang_items = self.tcx().lang_items(); if lang_items.future_poll_fn() == Some(def_id) { - match self.find_async_args(original_args) { + match self.find_async_args(original_args, span) { Ok((fun, loc, args)) => { AsyncDeterminationResult::Resolved(CallKind::AsyncPoll(fun, loc, args)) } @@ -240,26 +323,42 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { fn find_async_args<'b>( &'b self, args: &'b [Operand<'tcx>], - ) -> Result<(Instance<'tcx>, Location, Place<'tcx>), String> { + call_span: Span, + ) -> Result<(Instance<'tcx>, Location, Place<'tcx>), ConstructionErr<'tcx>> { + macro_rules! async_err { + ($msg:expr) => { + return Err(ConstructionErr::AsyncResolutionErr($msg)) + }; + } macro_rules! let_assert { - ($p:pat = $e:expr, $($arg:tt)*) => { + ($p:pat = $e:expr, $msg:expr) => { let $p = $e else { - let msg = format!($($arg)*); - return Err(format!("Abandoning attempt to handle async because pattern {} could not be matched to {:?}: {}", stringify!($p), $e, msg)); + async_err!($msg); }; - } + }; } - let get_def_for_op = |op: &Operand<'tcx>| -> Result { - let_assert!(Some(place) = op.place(), "Arg is not a place"); + let get_def_for_op = |op: &Operand<'tcx>| -> Result { + let mk_err = |reason| AsyncResolutionErr::WrongOperandShape { + span: call_span, + reason, + }; + let_assert!( + Some(place) = op.place(), + mk_err(OperandShapeViolation::IsNotAPlace) + ); let_assert!( Some(local) = place.as_local(), - "Place {place:?} is not a local" + mk_err(OperandShapeViolation::IsNotLocal) ); let_assert!( Some(locs) = &self.body_assignments.get(&local), - "Local has no assignments" + mk_err(OperandShapeViolation::HasNoAssignments) ); - assert!(locs.len() == 1); + if locs.len() != 1 { + async_err!(mk_err(OperandShapeViolation::WrongNumberOfAssignments( + locs.len() as u16, + ))); + } Ok(locs[0]) }; @@ -271,7 +370,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { }, .. }) = &self.body.stmt_at(get_def_for_op(&args[0])?), - "Pinned assignment is not a call" + AsyncResolutionErr::PinnedAssignmentIsNotACall { span: call_span } ); debug_assert!(new_pin_args.len() == 1); @@ -286,7 +385,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { kind: StatementKind::Assign(box (_, Rvalue::Use(future2))), .. }) = &self.body.stmt_at(get_def_for_op(&Operand::Move(future))?), - "Assignment to pin::new input is not a statement" + AsyncResolutionErr::AssignmentToPinNewIsNotAStatement { span: call_span } ); let_assert!( @@ -297,7 +396,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { }, .. }) = &self.body.stmt_at(get_def_for_op(future2)?), - "Assignment to alias of pin::new input is not a call" + AsyncResolutionErr::AssignmentToAliasOfPinNewInputIsNotACall { span: call_span } ); let mut chase_target = Err(&into_future_args[0]); @@ -325,17 +424,23 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { ), )) => Ok((*def_id, *generic_args, *lhs, async_fn_call_loc)), StatementKind::Assign(box (_, Rvalue::Use(target))) => { - let (op, generics) = self - .operand_to_def_id(target) - .ok_or_else(|| "Nope".to_string())?; + let Some((op, generics)) = self.operand_to_def_id(target) else { + async_err!(AsyncResolutionErr::ChaseTargetIsNotAFunction { + span: call_span + }) + }; Ok((op, generics, target.place().unwrap(), async_fn_call_loc)) } _ => { - panic!("Assignment to into_future input is not a call: {stmt:?}"); + async_err!(AsyncResolutionErr::AssignmentToIntoFutureInputIsNotACall { + span: call_span, + }); } }, _ => { - panic!("Assignment to into_future input is not a call: {stmt:?}"); + async_err!(AsyncResolutionErr::AssignmentToIntoFutureInputIsNotACall { + span: call_span, + }); } }; } @@ -348,7 +453,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.tcx().param_env_reveal_all_normalized(self.def_id), generics, ) - .ok_or("Resolving function failed")?; + .ok_or_else(|| ConstructionErr::instance_resolution_failed(op, generics, call_span))?; Ok((resolution, async_fn_call_loc, calling_convention)) } diff --git a/crates/flowistry_pdg_construction/src/callback.rs b/crates/flowistry_pdg_construction/src/callback.rs index 68f21e6e28..db0b69b170 100644 --- a/crates/flowistry_pdg_construction/src/callback.rs +++ b/crates/flowistry_pdg_construction/src/callback.rs @@ -3,6 +3,8 @@ use flowistry_pdg::{rustc_portable::Location, CallString}; use rustc_middle::ty::Instance; +use crate::ConstructionErr; + pub trait CallChangeCallback<'tcx> { fn on_inline(&self, info: CallInfo<'tcx>) -> CallChanges; @@ -33,8 +35,8 @@ impl<'tcx> CallChangeCallback<'tcx> for CallChangeCallbackFn<'tcx> { } #[derive(Debug)] -pub enum InlineMissReason { - Async(String), +pub enum InlineMissReason<'tcx> { + Async(ConstructionErr<'tcx>), } impl Default for CallChanges { diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 31e0f62934..8887d3a29f 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -8,7 +8,7 @@ //! [`InstructionState`] at each instruction in the procedure. //! 2. [`PartialGraph`] implements [`ResultsVisitor`] over the analysis result -use std::rc::Rc; +use std::{borrow::Cow, fmt::Display, rc::Rc}; use anyhow::anyhow; use either::Either; @@ -22,7 +22,7 @@ use petgraph::graph::DiGraph; use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; -use rustc_macros::{Decodable, Encodable}; +use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ visit::Visitor, AggregateKind, Location, Operand, Place, Rvalue, Terminator, TerminatorKind, @@ -30,6 +30,7 @@ use rustc_middle::{ ty::{GenericArgsRef, Instance, TyCtxt}, }; use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; +use rustc_span::Span; use rustc_utils::cache::Cache; use crate::{ @@ -56,37 +57,139 @@ pub struct MemoPdgConstructor<'tcx> { pub(crate) loader: Box + 'tcx>, } -#[derive(Debug, thiserror::Error, Encodable, Decodable, Clone, Hash, Eq, PartialEq)] -pub enum ConstructionErr { - // Would prefer to make `generics` `GenericArgsRef<'tcx>` but the `Error` - // implementation only allows `'static` types. - #[error("failed to resolve an instance for {function:?} with generic arguments {generics}")] - InstanceResolutionFailed { function: DefId, generics: String }, - #[error("entered impossible state")] +#[derive(Debug, TyEncodable, TyDecodable, Clone, Hash, Eq, PartialEq)] +pub enum ConstructionErr<'tcx> { + InstanceResolutionFailed { + function: DefId, + generics: GenericArgsRef<'tcx>, + span: Span, + }, Impossible, - #[error("failed to load external function {function:?}")] - FailedLoadingExternalFunction { function: DefId }, - #[error("failed with rustc error")] + FailedLoadingExternalFunction { + function: DefId, + span: Span, + }, RustcReportedError, - #[error("crate exists but item is not found {function:?}")] - CrateExistsButItemIsNotFound { function: DefId }, - #[error("could not create generic arguments for {function:?} because too many predicates were present ({number})")] - TooManyPredicatesForSynthesizingGenerics { function: DefId, number: u32 }, - #[error("found bound variables in predicates of {function:?}")] - BoundVariablesInPredicates { function: DefId }, - #[error("has trait ref with binder {function:?}")] - TraitRefWithBinder { function: DefId }, - #[error("cannot use constants as generic parameters in controllers")] - ConstantInGenerics { function: DefId }, - #[error("operand is not function type {op}")] - OperandIsNotFunctionType { op: String }, + CrateExistsButItemIsNotFound { + function: DefId, + }, + TooManyPredicatesForSynthesizingGenerics { + function: DefId, + number: u32, + }, + BoundVariablesInPredicates { + function: DefId, + }, + TraitRefWithBinder { + function: DefId, + }, + ConstantInGenerics { + function: DefId, + }, + OperandIsNotFunctionType { + op: String, + }, + AsyncResolutionErr(AsyncResolutionErr), +} + +pub trait EmittableError<'tcx> { + fn span(&self, _tcx: TyCtxt<'tcx>) -> Option { + None + } + fn msg(&self, tcx: TyCtxt<'tcx>, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result; + + fn emit(&self, tcx: TyCtxt<'tcx>) { + default_emit_error(self, tcx) + } +} + +pub fn default_emit_error<'tcx>(e: &(impl EmittableError<'tcx> + ?Sized), tcx: TyCtxt<'tcx>) { + struct FmtWithTcx<'tcx, A> { + tcx: TyCtxt<'tcx>, + inner: A, + } + impl<'tcx, A: EmittableError<'tcx> + ?Sized> Display for FmtWithTcx<'tcx, &'_ A> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.msg(self.tcx, f) + } + } + + let msg = format!("{}", FmtWithTcx { tcx, inner: e }); + if let Some(span) = e.span(tcx) { + tcx.sess.span_err(span, msg); + } else { + tcx.sess.err(msg); + } } -impl ConstructionErr { - pub fn instance_resolution_failed(function: DefId, generics: GenericArgsRef) -> Self { +impl<'tcx> EmittableError<'tcx> for ConstructionErr<'tcx> { + fn span(&self, tcx: TyCtxt<'tcx>) -> Option { + use ConstructionErr::*; + match self { + AsyncResolutionErr(e) => e.span(tcx), + InstanceResolutionFailed { span, .. } | FailedLoadingExternalFunction { span, .. } => { + Some(*span) + } + BoundVariablesInPredicates { function } + | TraitRefWithBinder { function } + | ConstantInGenerics { function } => Some(tcx.def_span(*function)), + _ => None, + } + } + + fn msg(&self, tcx: TyCtxt, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use ConstructionErr::*; + match self { + InstanceResolutionFailed { + function, generics, .. + } => write!( + f, + "could not resolve instance for {} with generics {generics:?}", + tcx.def_path_debug_str(*function) + ), + Impossible => f.write_str("internal compiler error, this state should be impossible"), + FailedLoadingExternalFunction { function, .. } => write!( + f, + "failed loading external function {}", + tcx.def_path_debug_str(*function) + ), + RustcReportedError => f.write_str("see previously reported errors"), + CrateExistsButItemIsNotFound { function } => write!( + f, + "found a crate for item {}, but could not find a PDG for it", + tcx.def_path_debug_str(*function) + ), + TooManyPredicatesForSynthesizingGenerics { number, .. } => write!( + f, + "only one predicate can be synthesized to a `dyn`, found {number}" + ), + BoundVariablesInPredicates { .. } => { + f.write_str("bound variables in predicates are not supported") + } + TraitRefWithBinder { .. } => { + f.write_str("trait refs for `dyn` synthesis cannot have binders") + } + ConstantInGenerics { .. } => { + f.write_str("constants in generics for are not supported for analysis entrypoints") + } + OperandIsNotFunctionType { op } => { + write!(f, "operand {op} is not of function type") + } + AsyncResolutionErr(e) => e.msg(tcx, f), + } + } +} + +impl<'tcx> ConstructionErr<'tcx> { + pub fn instance_resolution_failed( + function: DefId, + generics: GenericArgsRef<'tcx>, + span: Span, + ) -> Self { Self::InstanceResolutionFailed { function, - generics: format!("{generics:?}"), + generics, + span, } } @@ -132,7 +235,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub fn construct_root<'a>( &'a self, function: LocalDefId, - ) -> Result<&'a PartialGraph<'tcx>, Vec> { + ) -> Result<&'a PartialGraph<'tcx>, Vec>> { let generics = manufacture_substs_for(self.tcx, function.to_def_id()).map_err(|i| vec![i])?; let resolution = try_resolve_function( @@ -145,6 +248,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { vec![ConstructionErr::instance_resolution_failed( function.to_def_id(), generics, + self.tcx.def_span(function), )] })?; self.construct_for(resolution) @@ -154,7 +258,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub(crate) fn construct_for<'a>( &'a self, resolution: Instance<'tcx>, - ) -> Result>, Vec> { + ) -> Result>, Vec>> { let def_id = resolution.def_id(); let generics = resolution.args; if let Some(local) = def_id.as_local() { @@ -186,7 +290,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub fn construct_graph( &self, function: LocalDefId, - ) -> Result, Vec> { + ) -> Result, Vec>> { let _args = manufacture_substs_for(self.tcx, function.to_def_id()) .map_err(|_| anyhow!("rustc error")); let g = self.construct_root(function)?.to_petgraph(); @@ -194,12 +298,12 @@ impl<'tcx> MemoPdgConstructor<'tcx> { } } -pub(crate) struct WithConstructionErrors { +pub(crate) struct WithConstructionErrors<'tcx, A> { pub(crate) inner: A, - pub errors: FxHashSet, + pub errors: FxHashSet>, } -impl WithConstructionErrors { +impl<'tcx, A> WithConstructionErrors<'tcx, A> { pub fn new(inner: A) -> Self { Self { inner, @@ -207,7 +311,7 @@ impl WithConstructionErrors { } } - pub fn into_result(self) -> Result> { + pub fn into_result(self) -> Result>> { if self.errors.is_empty() { Ok(self.inner) } else { @@ -218,10 +322,10 @@ impl WithConstructionErrors { type DfResults<'mir, 'tcx> = Results<'tcx, DfAna<'mir, 'tcx>>; -type DfAna<'mir, 'tcx> = WithConstructionErrors<&'mir LocalAnalysis<'tcx, 'mir>>; +type DfAna<'mir, 'tcx> = WithConstructionErrors<'tcx, &'mir LocalAnalysis<'tcx, 'mir>>; impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, DfResults<'mir, 'tcx>> - for WithConstructionErrors> + for WithConstructionErrors<'tcx, PartialGraph<'tcx>> { type FlowState = as AnalysisDomain<'tcx>>::Domain; @@ -315,7 +419,12 @@ impl<'mir, 'tcx> ResultsVisitor<'mir, 'tcx, DfResults<'mir, 'tcx>> let constructor = results.analysis.inner; if matches!( - constructor.determine_call_handling(location, func, args), + constructor.determine_call_handling( + location, + func, + args, + terminator.source_info.span + ), Ok(Some(CallHandling::Ready { .. })) ) { return; @@ -373,7 +482,7 @@ impl<'tcx> PartialGraph<'tcx> { state: &'a InstructionState<'tcx>, terminator: &Terminator<'tcx>, location: Location, - ) -> Result> { + ) -> Result>> { let TerminatorKind::Call { func, args, @@ -389,7 +498,13 @@ impl<'tcx> PartialGraph<'tcx> { function: constructor.def_id.to_def_id(), }; - let Some(handling) = constructor.determine_call_handling(location, func, args)? else { + let Some(handling) = constructor.determine_call_handling( + location, + func, + args, + terminator.source_info.span, + )? + else { return Ok(false); }; @@ -557,8 +672,12 @@ impl<'tcx> PartialGraph<'tcx> { } } -type PdgCache<'tcx> = - Rc), Result, Vec>>>; +type PdgCache<'tcx> = Rc< + Cache< + (LocalDefId, GenericArgsRef<'tcx>), + Result, Vec>>, + >, +>; #[derive(Debug)] enum Inputs<'tcx> { diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index bd92bd1d2e..a87db44e9d 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -401,7 +401,7 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { } } -pub type GraphLoaderError = Vec; +pub type GraphLoaderError<'tcx> = Vec>; /// Abstracts over how previously written [`Artifact`]s are retrieved, allowing /// the user of this module to chose where to store them. @@ -416,7 +416,7 @@ pub trait GraphLoader<'tcx> { /// This should return `Ok(None)` in cases where the target is not expected /// to have it's partial graph present. For instance if `function` refers to /// an item in a crate that was not selected for analysis. - fn load(&self, function: DefId) -> Result>, GraphLoaderError>; + fn load(&self, function: DefId) -> Result>, GraphLoaderError<'tcx>>; } /// Intermediate data that gets stored for each crate. @@ -426,19 +426,19 @@ pub type Artifact<'tcx> = FxHashMap>; pub struct NoLoader; impl<'tcx> GraphLoader<'tcx> for NoLoader { - fn load(&self, _: DefId) -> Result>, GraphLoaderError> { + fn load(&self, _: DefId) -> Result>, GraphLoaderError<'tcx>> { Ok(None) } } impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Rc { - fn load(&self, function: DefId) -> Result>, GraphLoaderError> { + fn load(&self, function: DefId) -> Result>, GraphLoaderError<'tcx>> { (**self).load(function) } } impl<'tcx, T: GraphLoader<'tcx>> GraphLoader<'tcx> for Box { - fn load(&self, function: DefId) -> Result>, GraphLoaderError> { + fn load(&self, function: DefId) -> Result>, GraphLoaderError<'tcx>> { (**self).load(function) } } diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 850c6220ee..e4e0d8a03f 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -19,7 +19,7 @@ pub use async_support::{determine_async, is_async_trait_fn, Asyncness}; pub use construct::ConstructionErr; pub use graph::{Artifact, DepGraph, GraphLoader, NoLoader, PartialGraph}; pub mod callback; -pub use crate::construct::MemoPdgConstructor; +pub use crate::construct::{default_emit_error, EmittableError, MemoPdgConstructor}; pub use callback::{ CallChangeCallback, CallChangeCallbackFn, CallChanges, CallInfo, InlineMissReason, SkipCall, }; diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index b5685b295f..29a9003dda 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -18,6 +18,7 @@ use rustc_middle::{ }; use rustc_mir_dataflow::{self as df, fmt::DebugWithContext, Analysis}; +use rustc_span::Span; use rustc_utils::{ mir::{borrowck_facts, control_dependencies::ControlDependencies}, BodyExt, PlaceExt, @@ -344,7 +345,8 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { location: Location, func: &Operand<'tcx>, args: &'b [Operand<'tcx>], - ) -> Result>, Vec> { + span: Span, + ) -> Result>, Vec>> { let tcx = self.tcx(); let (called_def_id, generic_args) = self @@ -365,6 +367,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { vec![ConstructionErr::instance_resolution_failed( called_def_id, generic_args, + span )]); } }; @@ -384,18 +387,10 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { return Ok(Some(CallHandling::ApproxAsyncSM(handler))); }; - let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args) { + let call_kind = match self.classify_call_kind(called_def_id, resolved_def_id, args, span) { Ok(cc) => cc, Err(async_err) => { - if let Some(cb) = self.call_change_callback() { - cb.on_inline_miss( - resolved_fn, - location, - self.root, - InlineMissReason::Async(async_err), - ) - } - return Ok(None); + return Err(vec![async_err]); } }; @@ -485,11 +480,12 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { func: &Operand<'tcx>, args: &[Operand<'tcx>], destination: Place<'tcx>, - ) -> Result> { + span: Span, + ) -> Result>> { // Note: my comments here will use "child" to refer to the callee and // "parent" to refer to the caller, since the words are most visually distinct. - let Some(preamble) = self.determine_call_handling(location, func, args)? else { + let Some(preamble) = self.determine_call_handling(location, func, args, span)? else { return Ok(false); }; @@ -566,7 +562,9 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.root.args } - pub(crate) fn construct_partial(&'a self) -> Result, Vec> { + pub(crate) fn construct_partial( + &'a self, + ) -> Result, Vec>> { if let Some(g) = self.try_handle_as_async()? { return Ok(g); } @@ -629,8 +627,9 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { def_id: DefId, resolved_def_id: DefId, original_args: &'b [Operand<'tcx>], - ) -> Result, String> { - match self.try_poll_call_kind(def_id, original_args) { + span: Span, + ) -> Result, ConstructionErr<'tcx>> { + match self.try_poll_call_kind(def_id, original_args, span) { AsyncDeterminationResult::Resolved(r) => Ok(r), AsyncDeterminationResult::NotAsync => Ok(self .try_indirect_call_kind(resolved_def_id) @@ -654,7 +653,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } } -impl<'tcx, 'a> WithConstructionErrors<&'_ LocalAnalysis<'tcx, 'a>> { +impl<'tcx, 'a> WithConstructionErrors<'tcx, &'_ LocalAnalysis<'tcx, 'a>> { fn handle_terminator( &mut self, terminator: &Terminator<'tcx>, @@ -669,10 +668,14 @@ impl<'tcx, 'a> WithConstructionErrors<&'_ LocalAnalysis<'tcx, 'a>> { .. } = &terminator.kind { - match self - .inner - .handle_call(state, location, func, args, *destination) - { + match self.inner.handle_call( + state, + location, + func, + args, + *destination, + terminator.source_info.span, + ) { Err(e) => { self.errors.extend(e); } @@ -689,7 +692,9 @@ impl<'tcx, 'a> WithConstructionErrors<&'_ LocalAnalysis<'tcx, 'a>> { } } -impl<'tcx, 'a> df::AnalysisDomain<'tcx> for WithConstructionErrors<&'a LocalAnalysis<'tcx, 'a>> { +impl<'tcx, 'a> df::AnalysisDomain<'tcx> + for WithConstructionErrors<'tcx, &'a LocalAnalysis<'tcx, 'a>> +{ type Domain = InstructionState<'tcx>; const NAME: &'static str = "LocalPdgConstruction"; @@ -701,7 +706,7 @@ impl<'tcx, 'a> df::AnalysisDomain<'tcx> for WithConstructionErrors<&'a LocalAnal fn initialize_start_block(&self, _body: &Body<'tcx>, _state: &mut Self::Domain) {} } -impl<'a, 'tcx> df::Analysis<'tcx> for WithConstructionErrors<&'a LocalAnalysis<'tcx, 'a>> { +impl<'a, 'tcx> df::Analysis<'tcx> for WithConstructionErrors<'tcx, &'a LocalAnalysis<'tcx, 'a>> { fn apply_statement_effect( &mut self, state: &mut Self::Domain, diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index 36309021fc..e78d53f757 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -3,6 +3,7 @@ extern crate either; extern crate rustc_hir; extern crate rustc_middle; +extern crate rustc_span; use std::collections::HashSet; @@ -17,6 +18,7 @@ use rustc_middle::{ mir::{Terminator, TerminatorKind}, ty::TyCtxt, }; +use rustc_span::Symbol; use rustc_utils::{mir::borrowck_facts, source_map::find_bodies::find_bodies}; fn get_main(tcx: TyCtxt<'_>) -> LocalDefId { @@ -781,3 +783,60 @@ pdg_test! { } }, } + +pdg_test! { + spawn_and_loop_await, + { + use std::future::Future; + use std::task::{Poll, Context}; + use std::pin::Pin; + + struct JoinHandle(Box>); + + impl Future for JoinHandle { + type Output = T; + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.map_unchecked_mut(|p| p.0.as_mut()).poll(cx) + } + } + + pub fn spawn(future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + JoinHandle(Box::new(future)) + } + + pub async fn main() { + let mut tasks = vec![]; + for i in [0,1] { + let task: JoinHandle<_> = spawn(async move { + println!("{i}"); + Ok::<_, String>(0) + }); + tasks.push(task); + } + + for h in tasks { + if let Err(e) = h.await { + panic!("{e}") + } + } + } + }, + |tcx, params| { + params.with_call_change_callback(CallChangeCallbackFn::new(move |info| { + let name = tcx.opt_item_name(info.callee.def_id()); + let name2 = tcx.opt_parent(info.callee.def_id()).and_then(|c| tcx.opt_item_name(c)); + let is_spawn = |name: Option<&Symbol>| name.map_or(false, |n| n.as_str().contains("spawn")); + let mut changes = CallChanges::default(); + if is_spawn(name.as_ref()) || is_spawn(name2.as_ref()) + { + changes = changes.with_skip(SkipCall::Skip); + }; + changes + })); + }, + (i -> h) +} diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index aa8528fc2e..ddbe71c2f2 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -13,15 +13,16 @@ use rustc_middle::{ use std::{cell::RefCell, fmt::Display, rc::Rc}; use super::{ - default_index, metadata::BodyInfo, path_for_item, src_loc_for_span, RustcInstructionKind, - SPDGGenerator, + default_index, + metadata::{BodyInfo, MetadataLoaderError}, + path_for_item, src_loc_for_span, RustcInstructionKind, SPDGGenerator, }; -use anyhow::Result; +use anyhow::{anyhow, Result}; use either::Either; use flowistry_pdg_construction::{ graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, utils::try_monomorphize, - CallChangeCallback, CallChanges, CallInfo, InlineMissReason, + CallChangeCallback, CallChanges, CallInfo, EmittableError, InlineMissReason, SkipCall::Skip, }; use petgraph::{ @@ -66,7 +67,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { target: &'a FnToAnalyze, ) -> Result { let local_def_id = target.def_id; - let dep_graph = Self::create_flowistry_graph(generator, local_def_id)?; + let dep_graph = Self::create_flowistry_graph(generator, local_def_id).map_err(|e| { + e.emit(generator.tcx); + anyhow!("construction error") + })?; if generator.opts.dbg().dump_flowistry_pdg() { dep_graph.generate_graphviz(format!( @@ -332,7 +336,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { fn create_flowistry_graph( generator: &SPDGGenerator<'tcx>, def_id: LocalDefId, - ) -> Result> { + ) -> Result, MetadataLoaderError<'tcx>> { generator.metadata_loader.get_pdg(def_id.to_def_id()) } diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 341f1e7f37..22d8fcb0d2 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -11,12 +11,12 @@ use crate::{ Args, DefId, HashMap, MarkerCtx, }; -use std::{fmt::write, path::Path}; +use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ - graph::InternedString, Asyncness, ConstructionErr, DepGraph, GraphLoader, MemoPdgConstructor, - PartialGraph, + default_emit_error, graph::InternedString, Asyncness, ConstructionErr, DepGraph, + EmittableError, GraphLoader, MemoPdgConstructor, PartialGraph, }; use rustc_hash::FxHashMap; @@ -32,9 +32,7 @@ use rustc_middle::{ }; use rustc_serialize::{Decodable, Encodable}; -use anyhow::{anyhow, Result}; use rustc_utils::{cache::Cache, mir::borrowck_facts}; -use thiserror::Error; use super::{ encoder::{ParalegalDecoder, ParalegalEncoder}, @@ -49,22 +47,56 @@ pub struct MetadataLoader<'tcx> { } /// The types of errors that can arise from interacting with the [`MetadataLoader`]. -#[derive(Debug, Error)] -pub enum MetadataLoaderError { - #[error("no pdg for item {:?}", .0)] +#[derive(Debug)] +pub enum MetadataLoaderError<'tcx> { PdgForItemMissing(DefId), - #[error("no metadata for crate {}", tls::with(|tcx| tcx.crate_name(*.0)))] MetadataForCrateMissing(CrateNum), - #[error("no generics known for call site {0}")] NoGenericsKnownForCallSite(CallString), - #[error("no metadata for item {:?} in crate {}", .0, tls::with(|tcx| tcx.crate_name(.0.krate)))] NoSuchItemInCate(DefId), + ConstructionErrors(Vec>), +} + +impl<'tcx> EmittableError<'tcx> for MetadataLoaderError<'tcx> { + fn msg(&self, tcx: TyCtxt<'tcx>, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use MetadataLoaderError::*; + match self { + PdgForItemMissing(def) => { + write!(f, "found no pdg for item {}", tcx.def_path_debug_str(*def)).into() + } + MetadataForCrateMissing(krate) => { + write!(f, "no metadata found for crate {}", tcx.crate_name(*krate)).into() + } + NoGenericsKnownForCallSite(cs) => { + write!(f, "no generics known for call site {cs}").into() + } + NoSuchItemInCate(it) => write!( + f, + "no such item {} found in crate {}", + tcx.def_path_debug_str(*it), + tcx.crate_name(it.krate) + ), + ConstructionErrors(e) => f.write_str("construction errors"), + } + } + + fn emit(&self, tcx: TyCtxt<'tcx>) { + if let MetadataLoaderError::ConstructionErrors(e) = self { + for e in e { + e.emit(tcx); + } + return; + } + default_emit_error(self, tcx) + } } use MetadataLoaderError::*; impl<'tcx> GraphLoader<'tcx> for MetadataLoader<'tcx> { - fn load(&self, function: DefId) -> Result>, Vec> { + fn load( + &self, + function: DefId, + ) -> Result>, Vec>> { let Ok(meta) = self.get_metadata(function.krate) else { return Ok(None); }; @@ -157,21 +189,9 @@ impl<'tcx> MetadataLoader<'tcx> { } #[derive(Debug)] -struct ConstructionErrors(Vec); +struct ConstructionErrors<'tcx>(Vec>); -impl std::error::Error for ConstructionErrors {} - -impl std::fmt::Display for ConstructionErrors { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for e in self.0.iter() { - e.fmt(f)?; - f.write_str(", ")?; - } - Ok(()) - } -} - -pub type PdgMap<'tcx> = FxHashMap, Vec>>; +pub type PdgMap<'tcx> = FxHashMap, Vec>>>; /// Intermediate artifacts stored on disc for every crate. /// @@ -249,7 +269,10 @@ impl<'tcx> MetadataLoader<'tcx> { }) } - pub fn get_metadata(&self, key: CrateNum) -> Result<&Metadata<'tcx>> { + pub fn get_metadata( + &self, + key: CrateNum, + ) -> Result<&Metadata<'tcx>, MetadataLoaderError<'tcx>> { let meta = self .cache .get(key, |_| { @@ -272,19 +295,27 @@ impl<'tcx> MetadataLoader<'tcx> { Ok(meta) } - pub fn get_partial_graph(&self, key: DefId) -> Result<&PartialGraph<'tcx>> { + pub fn get_partial_graph( + &self, + key: DefId, + ) -> Result<&PartialGraph<'tcx>, MetadataLoaderError<'tcx>> { let meta = self.get_metadata(key.krate)?; let result = meta.pdgs.get(&key.index).ok_or(PdgForItemMissing(key))?; - Ok(result.as_ref().map_err(|e| ConstructionErrors(e.clone()))?) + Ok(result + .as_ref() + .map_err(|e| MetadataLoaderError::ConstructionErrors(e.clone()))?) } - pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>> { + pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>, MetadataLoaderError<'tcx>> { let meta = self.get_metadata(key.krate)?; let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); Ok(res?) } - pub fn get_mono(&self, cs: CallString) -> Result> { + pub fn get_mono( + &self, + cs: CallString, + ) -> Result, MetadataLoaderError<'tcx>> { let key = cs.root().function; Ok(self .get_partial_graph(key)? @@ -292,7 +323,7 @@ impl<'tcx> MetadataLoader<'tcx> { .ok_or(NoGenericsKnownForCallSite(cs))?) } - pub fn get_pdg(&self, key: DefId) -> Result> { + pub fn get_pdg(&self, key: DefId) -> Result, MetadataLoaderError<'tcx>> { Ok(self.get_partial_graph(key)?.to_petgraph()) } From 22cc70763b47eb13ccb197da541be646bfacd6b8 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 7 Jun 2024 11:00:46 -0700 Subject: [PATCH 60/95] Remove unused code --- crates/flowistry_pdg_construction/src/calling_convention.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/flowistry_pdg_construction/src/calling_convention.rs b/crates/flowistry_pdg_construction/src/calling_convention.rs index 7e151a226b..cfbf541acc 100644 --- a/crates/flowistry_pdg_construction/src/calling_convention.rs +++ b/crates/flowistry_pdg_construction/src/calling_convention.rs @@ -107,7 +107,6 @@ impl<'tcx, 'a> CallingConvention<'tcx, 'a> { (closure_arg.place()?, &child.projection[..]) } else { let tuple_arg = tupled_arguments.place()?; - let _projection = child.projection.to_vec(); let field = FieldIdx::from_usize(child.local.as_usize() - 2); let field_ty = tuple_arg.ty(parent_body, tcx).field_ty(tcx, field); ( From 9b8a3ff51ef656b623419c254854c257bdf8f415 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sat, 8 Jun 2024 10:51:44 -0700 Subject: [PATCH 61/95] Clippy --- .../src/async_support.rs | 6 +++--- .../src/construct.rs | 5 ++--- .../src/local_analysis.rs | 3 +-- .../flowistry_pdg_construction/src/utils.rs | 6 +++--- crates/paralegal-flow/src/ana/metadata.rs | 21 +++++++++---------- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 94e4f31ea8..4f52b2f34f 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -1,14 +1,14 @@ -use std::{borrow::Cow, fmt::Display, rc::Rc}; +use std::{fmt::Display, rc::Rc}; use either::Either; use flowistry_pdg::{CallString, GlobalLocation}; use itertools::Itertools; use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::{DefId, LocalDefId}; -use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; +use rustc_macros::{Decodable, Encodable}; use rustc_middle::{ mir::{ - AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, SourceInfo, Statement, + AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }, ty::{GenericArgsRef, Instance, TyCtxt}, diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 8887d3a29f..1e8fcc8cd3 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -8,21 +8,20 @@ //! [`InstructionState`] at each instruction in the procedure. //! 2. [`PartialGraph`] implements [`ResultsVisitor`] over the analysis result -use std::{borrow::Cow, fmt::Display, rc::Rc}; +use std::{fmt::Display, rc::Rc}; use anyhow::anyhow; use either::Either; use flowistry_pdg::{CallString, GlobalLocation}; -use itertools::Itertools; use log::trace; use petgraph::graph::DiGraph; use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; -use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; +use rustc_macros::{TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ visit::Visitor, AggregateKind, Location, Operand, Place, Rvalue, Terminator, TerminatorKind, diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 29a9003dda..cd4ccbe175 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -32,8 +32,7 @@ use crate::{ graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, - Asyncness, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, MemoPdgConstructor, - SkipCall, + Asyncness, CallChangeCallback, CallChanges, CallInfo, MemoPdgConstructor, SkipCall, }; #[derive(PartialEq, Eq, Default, Clone, Debug)] diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 82d7450c89..022b4f04f7 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -16,7 +16,7 @@ use rustc_middle::{ self, EarlyBinder, GenericArg, GenericArgsRef, Instance, List, ParamEnv, Ty, TyCtxt, TyKind, }, }; -use rustc_span::ErrorGuaranteed; + use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; @@ -281,7 +281,7 @@ pub fn manufacture_substs_for( trace!(" is projection clause"); let Some(pred) = pred.no_bound_vars() else { return Some(Err(ConstructionErr::BoundVariablesInPredicates { - function: function, + function, })); }; if !matches!(pred.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { @@ -315,7 +315,7 @@ pub fn manufacture_substs_for( 1 => (), _ => { return Err(ConstructionErr::TooManyPredicatesForSynthesizingGenerics { - function: function, + function, number: predicates.len() as u32, }) } diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 22d8fcb0d2..a865cae9de 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -28,7 +28,7 @@ use rustc_middle::{ BasicBlock, BasicBlockData, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, Location, Statement, Terminator, TerminatorKind, }, - ty::{tls, EarlyBinder, GenericArgsRef, Ty, TyCtxt}, + ty::{EarlyBinder, GenericArgsRef, Ty, TyCtxt}, }; use rustc_serialize::{Decodable, Encodable}; @@ -61,13 +61,13 @@ impl<'tcx> EmittableError<'tcx> for MetadataLoaderError<'tcx> { use MetadataLoaderError::*; match self { PdgForItemMissing(def) => { - write!(f, "found no pdg for item {}", tcx.def_path_debug_str(*def)).into() + write!(f, "found no pdg for item {}", tcx.def_path_debug_str(*def)) } MetadataForCrateMissing(krate) => { - write!(f, "no metadata found for crate {}", tcx.crate_name(*krate)).into() + write!(f, "no metadata found for crate {}", tcx.crate_name(*krate)) } NoGenericsKnownForCallSite(cs) => { - write!(f, "no generics known for call site {cs}").into() + write!(f, "no generics known for call site {cs}") } NoSuchItemInCate(it) => write!( f, @@ -75,7 +75,7 @@ impl<'tcx> EmittableError<'tcx> for MetadataLoaderError<'tcx> { tcx.def_path_debug_str(*it), tcx.crate_name(it.krate) ), - ConstructionErrors(e) => f.write_str("construction errors"), + ConstructionErrors(_e) => f.write_str("construction errors"), } } @@ -301,15 +301,15 @@ impl<'tcx> MetadataLoader<'tcx> { ) -> Result<&PartialGraph<'tcx>, MetadataLoaderError<'tcx>> { let meta = self.get_metadata(key.krate)?; let result = meta.pdgs.get(&key.index).ok_or(PdgForItemMissing(key))?; - Ok(result + result .as_ref() - .map_err(|e| MetadataLoaderError::ConstructionErrors(e.clone()))?) + .map_err(|e| MetadataLoaderError::ConstructionErrors(e.clone())) } pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>, MetadataLoaderError<'tcx>> { let meta = self.get_metadata(key.krate)?; let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); - Ok(res?) + res } pub fn get_mono( @@ -317,10 +317,9 @@ impl<'tcx> MetadataLoader<'tcx> { cs: CallString, ) -> Result, MetadataLoaderError<'tcx>> { let key = cs.root().function; - Ok(self - .get_partial_graph(key)? + self.get_partial_graph(key)? .get_mono(cs) - .ok_or(NoGenericsKnownForCallSite(cs))?) + .ok_or(NoGenericsKnownForCallSite(cs)) } pub fn get_pdg(&self, key: DefId) -> Result, MetadataLoaderError<'tcx>> { From 38c4888509bfc83dba043548537c119b15e10b66 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 10 Jun 2024 14:37:45 -0700 Subject: [PATCH 62/95] Clippy --- crates/paralegal-flow/src/utils/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 3f1c7b03da..5eb242ccce 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -4,7 +4,7 @@ extern crate smallvec; use rustc_target::spec::abi::Abi; use thiserror::Error; -use smallvec::SmallVec; + use crate::{desc::Identifier, rustc_span::ErrorGuaranteed, Either, Symbol, TyCtxt}; From a0d1027b9f2b5539919d66a685469c83414f010f Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 10 Jun 2024 15:59:44 -0700 Subject: [PATCH 63/95] Fmt --- crates/paralegal-flow/src/utils/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 5eb242ccce..9088750433 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -4,8 +4,6 @@ extern crate smallvec; use rustc_target::spec::abi::Abi; use thiserror::Error; - - use crate::{desc::Identifier, rustc_span::ErrorGuaranteed, Either, Symbol, TyCtxt}; pub use flowistry_pdg_construction::utils::is_non_default_trait_method; From be9551910679c1c8696e5eba27c3c17a306b2993 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 10 Jun 2024 16:00:27 -0700 Subject: [PATCH 64/95] Reenable MIR dumping --- crates/paralegal-flow/src/ana/metadata.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index a865cae9de..42cb130151 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -130,10 +130,12 @@ impl<'tcx> MetadataLoader<'tcx> { let emit_targets = collector.emit_target_collector; let marker_ctx: MarkerCtx = collector.marker_ctx.into(); let mut constructor = MemoPdgConstructor::new(tcx, self.clone()); - constructor.with_call_change_callback(MyCallback { - tcx, - judge: InlineJudge::new(marker_ctx.clone(), tcx, args.anactrl()), - }); + constructor + .with_call_change_callback(MyCallback { + tcx, + judge: InlineJudge::new(marker_ctx.clone(), tcx, args.anactrl()), + }) + .with_dump_mir(args.dbg().dump_mir()); let pdgs = emit_targets .into_iter() .map(|t| { From 7eac28962f22440302b624d3946ff6a526609a58 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 10 Jun 2024 16:00:58 -0700 Subject: [PATCH 65/95] Make calling convention matching more explicit --- .../src/calling_convention.rs | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/calling_convention.rs b/crates/flowistry_pdg_construction/src/calling_convention.rs index cfbf541acc..161402488c 100644 --- a/crates/flowistry_pdg_construction/src/calling_convention.rs +++ b/crates/flowistry_pdg_construction/src/calling_convention.rs @@ -66,24 +66,26 @@ impl<'tcx, 'a> CallingConvention<'tcx, 'a> { ) -> Option<(Place<'tcx>, &[PlaceElem<'tcx>])> { let result = match self { // Async return must be handled special, because it gets wrapped in `Poll::Ready` - Self::Async { .. } if child.local == RETURN_PLACE => { - let in_poll = destination.project_deeper( - &[PlaceElem::Downcast(None, async_info.poll_ready_variant_idx)], - tcx, - ); - let field_idx = async_info.poll_ready_field_idx; - let child_inner_return_type = in_poll - .ty(parent_body.local_decls(), tcx) - .field_ty(tcx, field_idx); - ( - in_poll.project_deeper( - &[PlaceElem::Field(field_idx, child_inner_return_type)], + _ if child.local == RETURN_PLACE => match self { + Self::Async { .. } => { + let in_poll = destination.project_deeper( + &[PlaceElem::Downcast(None, async_info.poll_ready_variant_idx)], tcx, - ), - &child.projection[..], - ) - } - _ if child.local == RETURN_PLACE => (destination, &child.projection[..]), + ); + let field_idx = async_info.poll_ready_field_idx; + let child_inner_return_type = in_poll + .ty(parent_body.local_decls(), tcx) + .field_ty(tcx, field_idx); + ( + in_poll.project_deeper( + &[PlaceElem::Field(field_idx, child_inner_return_type)], + tcx, + ), + &child.projection[..], + ) + } + _ => (destination, &child.projection[..]), + }, // Map arguments to the argument array Self::Direct(args) => ( args[child.local.as_usize() - 1].place()?, From 7b4468a9c40d6514c4345511e6878916e19ef82a Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 10 Jun 2024 16:15:48 -0700 Subject: [PATCH 66/95] New test case for calling async_trait --- crates/paralegal-policy/tests/lemmy.rs | 43 +++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/crates/paralegal-policy/tests/lemmy.rs b/crates/paralegal-policy/tests/lemmy.rs index a9ca283fc1..72ff44de23 100644 --- a/crates/paralegal-policy/tests/lemmy.rs +++ b/crates/paralegal-policy/tests/lemmy.rs @@ -4,7 +4,7 @@ use std::{collections::hash_map::RandomState, sync::Arc}; use helpers::{Result, Test}; use paralegal_policy::{ - assert_error, assert_warning, Context, Diagnostics, EdgeSelection, NodeExt, + assert_error, assert_warning, Context, Diagnostics, EdgeSelection, NodeExt, NodeQueries, }; use paralegal_spdg::{GlobalNode, Identifier}; @@ -128,6 +128,47 @@ fn support_calling_async_trait_0_1_53() -> Result<()> { test.run(calling_async_trait_policy) } +#[test] +fn call_async_trait_single_inline() -> Result<()> { + let mut test = Test::new(stringify!( + #[paralegal::marker(marked, return)] + fn apply_marker(i: T) -> T { + i + } + + struct Ctx; + #[async_trait::async_trait(?Send)] + trait Trait { + async fn transform(&self, i: usize) -> usize; + } + + #[async_trait::async_trait(?Send)] + impl Trait for Ctx { + async fn transform(&self, i: usize) -> usize { + apply_marker(i) + } + } + + #[paralegal::analyze] + async fn main() { + assert_eq!(Ctx.transform(0).await, 0); + } + ))?; + test.with_dep(["async-trait@=0.1.53"]); + test.run(|ctx| { + let marked = ctx + .marked_nodes(Identifier::new_intern("marked")) + .collect::>(); + assert!(!marked.is_empty()); + for src in marked.iter() { + for sink in marked.iter() { + assert!(src == sink || !src.flows_to(*sink, &ctx, EdgeSelection::Data)); + } + } + Ok(()) + }) +} + #[test] fn transitive_control_flow() -> Result<()> { let test = Test::new(stringify!( From ad5ed281d51562fd58324e373447f7d35ae1250b Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 10 Jun 2024 16:20:18 -0700 Subject: [PATCH 67/95] Test old version and latest --- crates/paralegal-policy/tests/lemmy.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/crates/paralegal-policy/tests/lemmy.rs b/crates/paralegal-policy/tests/lemmy.rs index 72ff44de23..a35710834b 100644 --- a/crates/paralegal-policy/tests/lemmy.rs +++ b/crates/paralegal-policy/tests/lemmy.rs @@ -128,8 +128,7 @@ fn support_calling_async_trait_0_1_53() -> Result<()> { test.run(calling_async_trait_policy) } -#[test] -fn call_async_trait_single_inline() -> Result<()> { +fn call_async_trait_single_inline_with_version(v: &str) -> Result<()> { let mut test = Test::new(stringify!( #[paralegal::marker(marked, return)] fn apply_marker(i: T) -> T { @@ -154,7 +153,7 @@ fn call_async_trait_single_inline() -> Result<()> { assert_eq!(Ctx.transform(0).await, 0); } ))?; - test.with_dep(["async-trait@=0.1.53"]); + test.with_dep([v]); test.run(|ctx| { let marked = ctx .marked_nodes(Identifier::new_intern("marked")) @@ -169,6 +168,16 @@ fn call_async_trait_single_inline() -> Result<()> { }) } +#[test] +fn call_async_trait_single_inline_0_1_53() -> Result<()> { + call_async_trait_single_inline_with_version("async_trait@=0.1.53") +} + +#[test] +fn call_async_trait_single_inline_latest() -> Result<()> { + call_async_trait_single_inline_with_version("async_trait") +} + #[test] fn transitive_control_flow() -> Result<()> { let test = Test::new(stringify!( From 6c6cc753c2abb7772477218065b78a7a6eabf46c Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 10 Jun 2024 18:30:52 -0700 Subject: [PATCH 68/95] Rename error types --- .../src/async_support.rs | 14 ++--- .../src/callback.rs | 4 +- .../src/construct.rs | 34 ++++++------ .../flowistry_pdg_construction/src/graph.rs | 4 +- crates/flowistry_pdg_construction/src/lib.rs | 4 +- .../src/local_analysis.rs | 16 +++--- .../flowistry_pdg_construction/src/utils.rs | 14 +++-- .../paralegal-flow/src/ana/graph_converter.rs | 4 +- crates/paralegal-flow/src/ana/metadata.rs | 52 +++++++++---------- 9 files changed, 69 insertions(+), 77 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 4f52b2f34f..e974a13163 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -19,7 +19,7 @@ use crate::{ construct::EmittableError, graph::push_call_string_root, local_analysis::{CallKind, LocalAnalysis}, - utils, ConstructionErr, PartialGraph, + utils, Error, PartialGraph, }; /// Describe in which way a function is `async`. @@ -192,7 +192,7 @@ pub fn determine_async<'tcx>( #[derive(Debug, Clone, PartialEq, Eq)] pub enum AsyncDeterminationResult<'tcx, T> { Resolved(T), - Unresolvable(ConstructionErr<'tcx>), + Unresolvable(Error<'tcx>), NotAsync, } @@ -279,7 +279,7 @@ impl<'tcx> EmittableError<'tcx> for AsyncResolutionErr { impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { pub(crate) fn try_handle_as_async( &self, - ) -> Result>, Vec>> { + ) -> Result>, Vec>> { let Some((generator_fn, location, asyncness)) = determine_async(self.tcx(), self.def_id, &self.body) else { @@ -324,10 +324,10 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { &'b self, args: &'b [Operand<'tcx>], call_span: Span, - ) -> Result<(Instance<'tcx>, Location, Place<'tcx>), ConstructionErr<'tcx>> { + ) -> Result<(Instance<'tcx>, Location, Place<'tcx>), Error<'tcx>> { macro_rules! async_err { ($msg:expr) => { - return Err(ConstructionErr::AsyncResolutionErr($msg)) + return Err(Error::AsyncResolutionErr($msg)) }; } macro_rules! let_assert { @@ -337,7 +337,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { }; }; } - let get_def_for_op = |op: &Operand<'tcx>| -> Result { + let get_def_for_op = |op: &Operand<'tcx>| -> Result { let mk_err = |reason| AsyncResolutionErr::WrongOperandShape { span: call_span, reason, @@ -453,7 +453,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.tcx().param_env_reveal_all_normalized(self.def_id), generics, ) - .ok_or_else(|| ConstructionErr::instance_resolution_failed(op, generics, call_span))?; + .ok_or_else(|| Error::instance_resolution_failed(op, generics, call_span))?; Ok((resolution, async_fn_call_loc, calling_convention)) } diff --git a/crates/flowistry_pdg_construction/src/callback.rs b/crates/flowistry_pdg_construction/src/callback.rs index db0b69b170..503eae8372 100644 --- a/crates/flowistry_pdg_construction/src/callback.rs +++ b/crates/flowistry_pdg_construction/src/callback.rs @@ -3,7 +3,7 @@ use flowistry_pdg::{rustc_portable::Location, CallString}; use rustc_middle::ty::Instance; -use crate::ConstructionErr; +use crate::Error; pub trait CallChangeCallback<'tcx> { fn on_inline(&self, info: CallInfo<'tcx>) -> CallChanges; @@ -36,7 +36,7 @@ impl<'tcx> CallChangeCallback<'tcx> for CallChangeCallbackFn<'tcx> { #[derive(Debug)] pub enum InlineMissReason<'tcx> { - Async(ConstructionErr<'tcx>), + Async(Error<'tcx>), } impl Default for CallChanges { diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 1e8fcc8cd3..4fefc3041e 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -57,7 +57,7 @@ pub struct MemoPdgConstructor<'tcx> { } #[derive(Debug, TyEncodable, TyDecodable, Clone, Hash, Eq, PartialEq)] -pub enum ConstructionErr<'tcx> { +pub enum Error<'tcx> { InstanceResolutionFailed { function: DefId, generics: GenericArgsRef<'tcx>, @@ -121,9 +121,9 @@ pub fn default_emit_error<'tcx>(e: &(impl EmittableError<'tcx> + ?Sized), tcx: T } } -impl<'tcx> EmittableError<'tcx> for ConstructionErr<'tcx> { +impl<'tcx> EmittableError<'tcx> for Error<'tcx> { fn span(&self, tcx: TyCtxt<'tcx>) -> Option { - use ConstructionErr::*; + use Error::*; match self { AsyncResolutionErr(e) => e.span(tcx), InstanceResolutionFailed { span, .. } | FailedLoadingExternalFunction { span, .. } => { @@ -137,7 +137,7 @@ impl<'tcx> EmittableError<'tcx> for ConstructionErr<'tcx> { } fn msg(&self, tcx: TyCtxt, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use ConstructionErr::*; + use Error::*; match self { InstanceResolutionFailed { function, generics, .. @@ -179,7 +179,7 @@ impl<'tcx> EmittableError<'tcx> for ConstructionErr<'tcx> { } } -impl<'tcx> ConstructionErr<'tcx> { +impl<'tcx> Error<'tcx> { pub fn instance_resolution_failed( function: DefId, generics: GenericArgsRef<'tcx>, @@ -234,7 +234,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub fn construct_root<'a>( &'a self, function: LocalDefId, - ) -> Result<&'a PartialGraph<'tcx>, Vec>> { + ) -> Result<&'a PartialGraph<'tcx>, Vec>> { let generics = manufacture_substs_for(self.tcx, function.to_def_id()).map_err(|i| vec![i])?; let resolution = try_resolve_function( @@ -244,20 +244,20 @@ impl<'tcx> MemoPdgConstructor<'tcx> { generics, ) .ok_or_else(|| { - vec![ConstructionErr::instance_resolution_failed( + vec![Error::instance_resolution_failed( function.to_def_id(), generics, self.tcx.def_span(function), )] })?; self.construct_for(resolution) - .and_then(|f| f.ok_or(vec![ConstructionErr::Impossible])) + .and_then(|f| f.ok_or(vec![Error::Impossible])) } pub(crate) fn construct_for<'a>( &'a self, resolution: Instance<'tcx>, - ) -> Result>, Vec>> { + ) -> Result>, Vec>> { let def_id = resolution.def_id(); let generics = resolution.args; if let Some(local) = def_id.as_local() { @@ -289,7 +289,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub fn construct_graph( &self, function: LocalDefId, - ) -> Result, Vec>> { + ) -> Result, Vec>> { let _args = manufacture_substs_for(self.tcx, function.to_def_id()) .map_err(|_| anyhow!("rustc error")); let g = self.construct_root(function)?.to_petgraph(); @@ -299,7 +299,7 @@ impl<'tcx> MemoPdgConstructor<'tcx> { pub(crate) struct WithConstructionErrors<'tcx, A> { pub(crate) inner: A, - pub errors: FxHashSet>, + pub errors: FxHashSet>, } impl<'tcx, A> WithConstructionErrors<'tcx, A> { @@ -310,7 +310,7 @@ impl<'tcx, A> WithConstructionErrors<'tcx, A> { } } - pub fn into_result(self) -> Result>> { + pub fn into_result(self) -> Result>> { if self.errors.is_empty() { Ok(self.inner) } else { @@ -481,7 +481,7 @@ impl<'tcx> PartialGraph<'tcx> { state: &'a InstructionState<'tcx>, terminator: &Terminator<'tcx>, location: Location, - ) -> Result>> { + ) -> Result>> { let TerminatorKind::Call { func, args, @@ -671,12 +671,8 @@ impl<'tcx> PartialGraph<'tcx> { } } -type PdgCache<'tcx> = Rc< - Cache< - (LocalDefId, GenericArgsRef<'tcx>), - Result, Vec>>, - >, ->; +type PdgCache<'tcx> = + Rc), Result, Vec>>>>; #[derive(Debug)] enum Inputs<'tcx> { diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index a87db44e9d..4a3d4eef1e 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -24,7 +24,7 @@ use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; use serde::{Deserialize, Serialize}; -use crate::{construct::ConstructionErr, utils::Captures, Asyncness}; +use crate::{construct::Error, utils::Captures, Asyncness}; /// A node in the program dependency graph. /// @@ -401,7 +401,7 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { } } -pub type GraphLoaderError<'tcx> = Vec>; +pub type GraphLoaderError<'tcx> = Vec>; /// Abstracts over how previously written [`Artifact`]s are retrieved, allowing /// the user of this module to chose where to store them. diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index e4e0d8a03f..1b8bbae962 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -16,7 +16,7 @@ extern crate rustc_target; extern crate rustc_type_ir; pub use async_support::{determine_async, is_async_trait_fn, Asyncness}; -pub use construct::ConstructionErr; +pub use construct::Error; pub use graph::{Artifact, DepGraph, GraphLoader, NoLoader, PartialGraph}; pub mod callback; pub use crate::construct::{default_emit_error, EmittableError, MemoPdgConstructor}; @@ -40,7 +40,7 @@ pub fn compute_pdg<'tcx>(tcx: TyCtxt<'tcx>, params: Instance<'tcx>) -> DepGraph< constructor .construct_for(params) .unwrap() - .ok_or(ConstructionErr::Impossible) + .ok_or(Error::Impossible) .unwrap() .to_petgraph() } diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index f9e403d097..88c6d98964 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -28,7 +28,7 @@ use crate::{ approximation::ApproximationHandler, async_support::*, calling_convention::*, - construct::{ConstructionErr, WithConstructionErrors}, + construct::{Error, WithConstructionErrors}, graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, @@ -345,12 +345,14 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { func: &Operand<'tcx>, args: &'b [Operand<'tcx>], span: Span, - ) -> Result>, Vec>> { + ) -> Result>, Vec>> { let tcx = self.tcx(); + trace!("Considering call at {location:?} in {:?}", self.def_id); + let (called_def_id, generic_args) = self .operand_to_def_id(func) - .ok_or_else(|| vec![ConstructionErr::operand_is_not_function_type(func)])?; + .ok_or_else(|| vec![Error::operand_is_not_function_type(func)])?; trace!("Resolved call to function: {}", self.fmt_fn(called_def_id)); // Monomorphize the called function with the known generic_args. @@ -363,7 +365,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { return Ok(None); } else { return Err( - vec![ConstructionErr::instance_resolution_failed( + vec![Error::instance_resolution_failed( called_def_id, generic_args, span @@ -480,7 +482,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { args: &[Operand<'tcx>], destination: Place<'tcx>, span: Span, - ) -> Result>> { + ) -> Result>> { // Note: my comments here will use "child" to refer to the callee and // "parent" to refer to the caller, since the words are most visually distinct. @@ -563,7 +565,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { pub(crate) fn construct_partial( &'a self, - ) -> Result, Vec>> { + ) -> Result, Vec>> { if let Some(g) = self.try_handle_as_async()? { return Ok(g); } @@ -627,7 +629,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { resolved_def_id: DefId, original_args: &'b [Operand<'tcx>], span: Span, - ) -> Result, ConstructionErr<'tcx>> { + ) -> Result, Error<'tcx>> { match self.try_poll_call_kind(def_id, original_args, span) { AsyncDeterminationResult::Resolved(r) => Ok(r), AsyncDeterminationResult::NotAsync => Ok(self diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 022b4f04f7..1fcfd013c4 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -20,7 +20,7 @@ use rustc_middle::{ use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; -use crate::construct::ConstructionErr; +use crate::construct::Error; pub trait Captures<'a> {} impl<'a, T: ?Sized> Captures<'a> for T {} @@ -218,7 +218,7 @@ pub fn ty_resolve<'tcx>(ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Ty<'tcx> { pub fn manufacture_substs_for( tcx: TyCtxt<'_>, function: DefId, -) -> Result<&List>, ConstructionErr> { +) -> Result<&List>, Error> { use rustc_middle::ty::{ Binder, BoundRegionKind, DynKind, ExistentialPredicate, ExistentialProjection, ExistentialTraitRef, GenericParamDefKind, ImplPolarity, ParamTy, Region, TraitPredicate, @@ -247,7 +247,7 @@ pub fn manufacture_substs_for( ))) } GenericParamDefKind::Const { .. } => { - return Err(ConstructionErr::ConstantInGenerics { function }); + return Err(Error::ConstantInGenerics { function }); } GenericParamDefKind::Type { .. } => (), }; @@ -263,7 +263,7 @@ pub fn manufacture_substs_for( return None; }; let Some(TraitPredicate { trait_ref, .. }) = trait_ref.no_bound_vars() else { - return Some(Err(ConstructionErr::TraitRefWithBinder { function })); + return Some(Err(Error::TraitRefWithBinder { function })); }; if !matches!(trait_ref.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { @@ -280,9 +280,7 @@ pub fn manufacture_substs_for( } else if let Some(pred) = clause.as_projection_clause() { trace!(" is projection clause"); let Some(pred) = pred.no_bound_vars() else { - return Some(Err(ConstructionErr::BoundVariablesInPredicates { - function, - })); + return Some(Err(Error::BoundVariablesInPredicates { function })); }; if !matches!(pred.self_ty().kind(), TyKind::Param(p) if *p == param_as_ty) { trace!(" Bailing because self type is not param type"); @@ -314,7 +312,7 @@ pub fn manufacture_substs_for( ))), 1 => (), _ => { - return Err(ConstructionErr::TooManyPredicatesForSynthesizingGenerics { + return Err(Error::TooManyPredicatesForSynthesizingGenerics { function, number: predicates.len() as u32, }) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index ddbe71c2f2..2992372e48 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -14,7 +14,7 @@ use std::{cell::RefCell, fmt::Display, rc::Rc}; use super::{ default_index, - metadata::{BodyInfo, MetadataLoaderError}, + metadata::{BodyInfo, Error}, path_for_item, src_loc_for_span, RustcInstructionKind, SPDGGenerator, }; use anyhow::{anyhow, Result}; @@ -336,7 +336,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { fn create_flowistry_graph( generator: &SPDGGenerator<'tcx>, def_id: LocalDefId, - ) -> Result, MetadataLoaderError<'tcx>> { + ) -> Result, Error<'tcx>> { generator.metadata_loader.get_pdg(def_id.to_def_id()) } diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 42cb130151..8a40f7556b 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -15,7 +15,7 @@ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; use flowistry_pdg_construction::{ - default_emit_error, graph::InternedString, Asyncness, ConstructionErr, DepGraph, + self as construct, default_emit_error, graph::InternedString, Asyncness, DepGraph, EmittableError, GraphLoader, MemoPdgConstructor, PartialGraph, }; @@ -48,17 +48,17 @@ pub struct MetadataLoader<'tcx> { /// The types of errors that can arise from interacting with the [`MetadataLoader`]. #[derive(Debug)] -pub enum MetadataLoaderError<'tcx> { +pub enum Error<'tcx> { PdgForItemMissing(DefId), MetadataForCrateMissing(CrateNum), NoGenericsKnownForCallSite(CallString), NoSuchItemInCate(DefId), - ConstructionErrors(Vec>), + ConstructionErrors(Vec>), } -impl<'tcx> EmittableError<'tcx> for MetadataLoaderError<'tcx> { +impl<'tcx> EmittableError<'tcx> for Error<'tcx> { fn msg(&self, tcx: TyCtxt<'tcx>, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use MetadataLoaderError::*; + use Error::*; match self { PdgForItemMissing(def) => { write!(f, "found no pdg for item {}", tcx.def_path_debug_str(*def)) @@ -80,7 +80,7 @@ impl<'tcx> EmittableError<'tcx> for MetadataLoaderError<'tcx> { } fn emit(&self, tcx: TyCtxt<'tcx>) { - if let MetadataLoaderError::ConstructionErrors(e) = self { + if let Error::ConstructionErrors(e) = self { for e in e { e.emit(tcx); } @@ -90,20 +90,20 @@ impl<'tcx> EmittableError<'tcx> for MetadataLoaderError<'tcx> { } } -use MetadataLoaderError::*; +use Error::*; impl<'tcx> GraphLoader<'tcx> for MetadataLoader<'tcx> { fn load( &self, function: DefId, - ) -> Result>, Vec>> { + ) -> Result>, Vec>> { let Ok(meta) = self.get_metadata(function.krate) else { return Ok(None); }; let res = meta .pdgs .get(&function.index) - .ok_or_else(|| vec![ConstructionErr::CrateExistsButItemIsNotFound { function }])? + .ok_or_else(|| vec![construct::Error::CrateExistsButItemIsNotFound { function }])? .as_ref() .map_err(Clone::clone)?; @@ -138,9 +138,13 @@ impl<'tcx> MetadataLoader<'tcx> { .with_dump_mir(args.dbg().dump_mir()); let pdgs = emit_targets .into_iter() - .map(|t| { + .filter_map(|t| { + // if tcx.def_path_str(t) != "lemmy_api_crud::match_websocket_operation_crud" { + // return None; + // } + println!("Constructing for {:?}", tcx.def_path_str(t)); let graph = constructor.construct_root(t); - (t.local_def_index, graph.map(Clone::clone)) + Some((t.local_def_index, graph.map(Clone::clone))) }) .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); @@ -191,9 +195,10 @@ impl<'tcx> MetadataLoader<'tcx> { } #[derive(Debug)] -struct ConstructionErrors<'tcx>(Vec>); +struct ConstructionErrors<'tcx>(Vec>); -pub type PdgMap<'tcx> = FxHashMap, Vec>>>; +pub type PdgMap<'tcx> = + FxHashMap, Vec>>>; /// Intermediate artifacts stored on disc for every crate. /// @@ -271,10 +276,7 @@ impl<'tcx> MetadataLoader<'tcx> { }) } - pub fn get_metadata( - &self, - key: CrateNum, - ) -> Result<&Metadata<'tcx>, MetadataLoaderError<'tcx>> { + pub fn get_metadata(&self, key: CrateNum) -> Result<&Metadata<'tcx>, Error<'tcx>> { let meta = self .cache .get(key, |_| { @@ -297,34 +299,28 @@ impl<'tcx> MetadataLoader<'tcx> { Ok(meta) } - pub fn get_partial_graph( - &self, - key: DefId, - ) -> Result<&PartialGraph<'tcx>, MetadataLoaderError<'tcx>> { + pub fn get_partial_graph(&self, key: DefId) -> Result<&PartialGraph<'tcx>, Error<'tcx>> { let meta = self.get_metadata(key.krate)?; let result = meta.pdgs.get(&key.index).ok_or(PdgForItemMissing(key))?; result .as_ref() - .map_err(|e| MetadataLoaderError::ConstructionErrors(e.clone())) + .map_err(|e| Error::ConstructionErrors(e.clone())) } - pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>, MetadataLoaderError<'tcx>> { + pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>, Error<'tcx>> { let meta = self.get_metadata(key.krate)?; let res = meta.bodies.get(&key.index).ok_or(NoSuchItemInCate(key)); res } - pub fn get_mono( - &self, - cs: CallString, - ) -> Result, MetadataLoaderError<'tcx>> { + pub fn get_mono(&self, cs: CallString) -> Result, Error<'tcx>> { let key = cs.root().function; self.get_partial_graph(key)? .get_mono(cs) .ok_or(NoGenericsKnownForCallSite(cs)) } - pub fn get_pdg(&self, key: DefId) -> Result, MetadataLoaderError<'tcx>> { + pub fn get_pdg(&self, key: DefId) -> Result, Error<'tcx>> { Ok(self.get_partial_graph(key)?.to_petgraph()) } From 3c6f7ed9f6f3f5404d8d3382cf84aed76c8587ee Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 11 Jun 2024 15:35:56 -0700 Subject: [PATCH 69/95] Handle top-level async workarounds on the paralegal side --- .../src/async_support.rs | 45 ++-------- .../src/construct.rs | 8 +- .../flowistry_pdg_construction/src/graph.rs | 18 +--- crates/flowistry_pdg_construction/src/lib.rs | 2 +- .../src/local_analysis.rs | 11 +-- .../paralegal-flow/src/ana/graph_converter.rs | 17 +++- crates/paralegal-flow/src/ana/metadata.rs | 85 ++++++++++++++----- crates/paralegal-flow/src/ana/mod.rs | 9 +- crates/paralegal-flow/src/discover.rs | 2 +- crates/paralegal-flow/src/lib.rs | 4 +- crates/paralegal-policy/tests/helpers/mod.rs | 20 ++++- crates/paralegal-policy/tests/lemmy.rs | 6 +- 12 files changed, 127 insertions(+), 100 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index e974a13163..2298516626 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -17,9 +17,8 @@ use rustc_span::Span; use crate::{ construct::EmittableError, - graph::push_call_string_root, local_analysis::{CallKind, LocalAnalysis}, - utils, Error, PartialGraph, + utils, Error, }; /// Describe in which way a function is `async`. @@ -27,16 +26,9 @@ use crate::{ /// Critically distinguishes between a normal `async fn` and an /// `#[async_trait]`. #[derive(Debug, Clone, Copy, Decodable, Encodable)] -pub enum Asyncness { - No, - AsyncFn, - AsyncTrait, -} - -impl Asyncness { - pub fn is_async(self) -> bool { - !matches!(self, Asyncness::No) - } +pub enum AsyncType { + Fn, + Trait, } /// Stores ids that are needed to construct projections around async functions. @@ -174,13 +166,13 @@ pub fn determine_async<'tcx>( tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<'tcx>, -) -> Option<(Instance<'tcx>, Location, Asyncness)> { +) -> Option<(Instance<'tcx>, Location, AsyncType)> { let ((generator_def_id, args, loc), asyncness) = if tcx.asyncness(def_id).is_async() { - (get_async_generator(body), Asyncness::AsyncFn) + (get_async_generator(body), AsyncType::Fn) } else { ( try_as_async_trait_function(tcx, def_id.to_def_id(), body)?, - Asyncness::AsyncTrait, + AsyncType::Trait, ) }; let param_env = tcx.param_env_reveal_all_normalized(def_id); @@ -277,29 +269,6 @@ impl<'tcx> EmittableError<'tcx> for AsyncResolutionErr { } impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { - pub(crate) fn try_handle_as_async( - &self, - ) -> Result>, Vec>> { - let Some((generator_fn, location, asyncness)) = - determine_async(self.tcx(), self.def_id, &self.body) - else { - return Ok(None); - }; - - let Some(g) = self.memo.construct_for(generator_fn)? else { - return Ok(None); - }; - let gloc = GlobalLocation { - function: self.def_id.to_def_id(), - location: flowistry_pdg::RichLocation::Location(location), - }; - let mut new_g = push_call_string_root(g, gloc); - //let g_generics = std::mem::replace(&mut new_g.graph.generics, self.generic_args()); - new_g.asyncness = asyncness; - new_g.monos.insert(CallString::single(gloc), new_g.generics); - Ok(Some(new_g)) - } - pub(crate) fn try_poll_call_kind<'b>( &'b self, def_id: DefId, diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 4fefc3041e..b6e989c8c7 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -8,7 +8,7 @@ //! [`InstructionState`] at each instruction in the procedure. //! 2. [`PartialGraph`] implements [`ResultsVisitor`] over the analysis result -use std::{fmt::Display, rc::Rc}; +use std::{borrow::Cow, fmt::Display, rc::Rc}; use anyhow::anyhow; use either::Either; @@ -26,11 +26,11 @@ use rustc_middle::{ mir::{ visit::Visitor, AggregateKind, Location, Operand, Place, Rvalue, Terminator, TerminatorKind, }, - ty::{GenericArgsRef, Instance, TyCtxt}, + ty::{GenericArgsRef, Instance, ParamEnv, TyCtxt}, }; use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; use rustc_span::Span; -use rustc_utils::cache::Cache; +use rustc_utils::{cache::Cache, mir::borrowck_facts}; use crate::{ async_support::*, @@ -39,7 +39,7 @@ use crate::{ }, local_analysis::{CallHandling, InstructionState, LocalAnalysis}, mutation::{ModularMutationVisitor, Mutation, Time}, - utils::{manufacture_substs_for, try_resolve_function}, + utils::{manufacture_substs_for, try_monomorphize, try_resolve_function}, CallChangeCallback, GraphLoader, }; diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 4a3d4eef1e..df4db31f47 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -14,7 +14,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, DefIndex}; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ - mir::{Body, Place}, + mir::{Body, Location, Place}, ty::{GenericArgsRef, TyCtxt}, }; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; @@ -24,7 +24,7 @@ use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; use serde::{Deserialize, Serialize}; -use crate::{construct::Error, utils::Captures, Asyncness}; +use crate::{construct::Error, utils::Captures, AsyncType}; /// A node in the program dependency graph. /// @@ -292,7 +292,6 @@ pub struct PartialGraph<'tcx> { pub(crate) edges: FxHashSet<(DepNode<'tcx>, DepNode<'tcx>, DepEdge)>, pub(crate) monos: FxHashMap>, pub(crate) generics: GenericArgsRef<'tcx>, - pub(crate) asyncness: Asyncness, def_id: DefId, arg_count: usize, } @@ -316,22 +315,12 @@ impl<'tcx> PartialGraph<'tcx> { } } - pub fn asyncness(&self) -> Asyncness { - self.asyncness - } - - pub fn new( - asyncness: Asyncness, - generics: GenericArgsRef<'tcx>, - def_id: DefId, - arg_count: usize, - ) -> Self { + pub fn new(generics: GenericArgsRef<'tcx>, def_id: DefId, arg_count: usize) -> Self { Self { nodes: Default::default(), edges: Default::default(), monos: Default::default(), generics, - asyncness, def_id, arg_count, } @@ -377,7 +366,6 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { let recurse_node = |n: &DepNode<'tcx>| n.transform_call_string(&f); Self { generics: self.generics, - asyncness: self.asyncness, nodes: self.nodes.iter().map(recurse_node).collect(), edges: self .edges diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 1b8bbae962..dd617d47e8 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -15,7 +15,7 @@ extern crate rustc_span; extern crate rustc_target; extern crate rustc_type_ir; -pub use async_support::{determine_async, is_async_trait_fn, Asyncness}; +pub use async_support::{determine_async, is_async_trait_fn, AsyncType}; pub use construct::Error; pub use graph::{Artifact, DepGraph, GraphLoader, NoLoader, PartialGraph}; pub mod callback; diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 88c6d98964..7e8c36422d 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -32,7 +32,7 @@ use crate::{ graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, - Asyncness, CallChangeCallback, CallChanges, CallInfo, MemoPdgConstructor, SkipCall, + AsyncType, CallChangeCallback, CallChanges, CallInfo, MemoPdgConstructor, SkipCall, }; #[derive(PartialEq, Eq, Default, Clone, Debug)] @@ -563,13 +563,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.root.args } - pub(crate) fn construct_partial( - &'a self, - ) -> Result, Vec>> { - if let Some(g) = self.try_handle_as_async()? { - return Ok(g); - } - + pub(crate) fn construct_partial(&'a self) -> Result, Vec>> { let mut analysis = WithConstructionErrors::new(self) .into_engine(self.tcx(), &self.body) .iterate_to_fixpoint(); @@ -579,7 +573,6 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } let mut final_state = WithConstructionErrors::new(PartialGraph::new( - Asyncness::No, self.generic_args(), self.def_id.to_def_id(), self.body.arg_count, diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 2992372e48..f7193ac020 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -14,12 +14,13 @@ use std::{cell::RefCell, fmt::Display, rc::Rc}; use super::{ default_index, - metadata::{BodyInfo, Error}, + metadata::{AsyncStatus, BodyInfo, Error}, path_for_item, src_loc_for_span, RustcInstructionKind, SPDGGenerator, }; use anyhow::{anyhow, Result}; use either::Either; use flowistry_pdg_construction::{ + determine_async, graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, utils::try_monomorphize, CallChangeCallback, CallChanges, CallInfo, EmittableError, InlineMissReason, @@ -332,12 +333,22 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { } /// Create an initial flowistry graph for the function identified by - /// `local_def_id`. + /// `def_id`. fn create_flowistry_graph( generator: &SPDGGenerator<'tcx>, def_id: LocalDefId, ) -> Result, Error<'tcx>> { - generator.metadata_loader.get_pdg(def_id.to_def_id()) + // We only demand a local def id to ensure that this is always called in + // the same crate. + let def_id = def_id.to_def_id(); + Ok(match generator.metadata_loader.get_asyncness(def_id) { + AsyncStatus::NotAsync => generator.metadata_loader.get_partial_graph(def_id), + AsyncStatus::Async { + generator_id, + asyncness: _, + } => generator.metadata_loader.get_partial_graph(generator_id), + }? + .to_petgraph()) } /// Consume the generator and compile the [`SPDG`]. diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 8a40f7556b..3a1d051484 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -14,15 +14,16 @@ use crate::{ use std::path::Path; use std::{fs::File, io::Read, rc::Rc}; +use construct::determine_async; use flowistry_pdg_construction::{ - self as construct, default_emit_error, graph::InternedString, Asyncness, DepGraph, + self as construct, default_emit_error, graph::InternedString, AsyncType, DepGraph, EmittableError, GraphLoader, MemoPdgConstructor, PartialGraph, }; use rustc_hash::FxHashMap; use rustc_hir::def_id::{CrateNum, DefIndex, LocalDefId, LOCAL_CRATE}; use rustc_index::IndexVec; -use rustc_macros::{TyDecodable, TyEncodable}; +use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ mir::{ BasicBlock, BasicBlockData, HasLocalDecls, Local, LocalDecl, LocalDecls, LocalKind, @@ -107,7 +108,7 @@ impl<'tcx> GraphLoader<'tcx> for MetadataLoader<'tcx> { .as_ref() .map_err(Clone::clone)?; - Ok(Some(res)) + Ok(Some(&res.graph)) } } @@ -123,7 +124,7 @@ impl<'tcx> MetadataLoader<'tcx> { self: Rc, args: &'static Args, path: impl AsRef, - ) -> (Vec, MarkerCtx<'tcx>) { + ) -> (Vec, MarkerCtx<'tcx>, MemoPdgConstructor<'tcx>) { let tcx = self.tcx; let mut collector = CollectingVisitor::new(tcx, args, self.clone()); collector.run(); @@ -143,8 +144,23 @@ impl<'tcx> MetadataLoader<'tcx> { // return None; // } println!("Constructing for {:?}", tcx.def_path_str(t)); - let graph = constructor.construct_root(t); - Some((t.local_def_index, graph.map(Clone::clone))) + let graph = constructor.construct_root(t).map(|graph| { + let body = borrowck_facts::get_body_with_borrowck_facts(tcx, t); + // MONOMORPHIZATION: normally we need to monomorphize the + // body, but here we don't because generics can't change + // whether a function has async structure. + let async_status = determine_async(tcx, t, &body.body) + .map(|(inst, _loc, asyncness)| AsyncStatus::Async { + generator_id: inst.def_id().index, + asyncness, + }) + .unwrap_or(AsyncStatus::NotAsync); + PdgInfo { + graph: graph.clone(), + async_status, + } + }); + Some((t.local_def_index, graph)) }) .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); @@ -152,7 +168,7 @@ impl<'tcx> MetadataLoader<'tcx> { debug!("Writing metadata to {}", path.display()); meta.write(path, tcx); self.cache.get(LOCAL_CRATE, |_| Some(meta)); - (collector.functions_to_analyze, marker_ctx) + (collector.functions_to_analyze, marker_ctx, constructor) } pub fn get_annotations(&self, key: DefId) -> &[Annotation] { @@ -194,11 +210,41 @@ impl<'tcx> MetadataLoader<'tcx> { } } -#[derive(Debug)] -struct ConstructionErrors<'tcx>(Vec>); +#[derive(Clone, Debug, TyEncodable, TyDecodable)] +pub struct PdgInfo<'tcx> { + pub graph: PartialGraph<'tcx>, + pub async_status: AsyncStatus, +} + +#[derive(Clone, Copy, Debug, Encodable, Decodable)] +pub enum AsyncStatus { + NotAsync, + Async { + generator_id: Def, + asyncness: AsyncType, + }, +} + +impl AsyncStatus { + pub fn is_async(&self) -> bool { + matches!(self, Self::Async { .. }) + } -pub type PdgMap<'tcx> = - FxHashMap, Vec>>>; + fn map_index(&self, f: impl FnOnce(&Def) -> D) -> AsyncStatus { + match self { + Self::NotAsync => AsyncStatus::NotAsync, + Self::Async { + generator_id, + asyncness, + } => AsyncStatus::Async { + generator_id: f(generator_id), + asyncness: *asyncness, + }, + } + } +} + +pub type PdgMap<'tcx> = FxHashMap, Vec>>>; /// Intermediate artifacts stored on disc for every crate. /// @@ -232,7 +278,7 @@ impl<'tcx> Metadata<'tcx> { for call_string in pdgs .values() .filter_map(|e| e.as_ref().ok()) - .flat_map(|subgraph| subgraph.mentioned_call_string()) + .flat_map(|subgraph| subgraph.graph.mentioned_call_string()) { for location in call_string.iter() { if let Some(local) = location.function.as_local() { @@ -305,6 +351,7 @@ impl<'tcx> MetadataLoader<'tcx> { result .as_ref() .map_err(|e| Error::ConstructionErrors(e.clone())) + .map(|e| &e.graph) } pub fn get_body_info(&self, key: DefId) -> Result<&BodyInfo<'tcx>, Error<'tcx>> { @@ -320,11 +367,7 @@ impl<'tcx> MetadataLoader<'tcx> { .ok_or(NoGenericsKnownForCallSite(cs)) } - pub fn get_pdg(&self, key: DefId) -> Result, Error<'tcx>> { - Ok(self.get_partial_graph(key)?.to_petgraph()) - } - - pub fn get_asyncness(&self, key: DefId) -> Asyncness { + pub fn get_asyncness(&self, key: DefId) -> AsyncStatus { (|| { Some( self.get_metadata(key.krate) @@ -333,10 +376,14 @@ impl<'tcx> MetadataLoader<'tcx> { .get(&key.index)? .as_ref() .ok()? - .asyncness(), + .async_status + .map_index(|i| DefId { + krate: key.krate, + index: *i, + }), ) })() - .unwrap_or(Asyncness::No) + .unwrap_or(AsyncStatus::NotAsync) } } diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 08bb829d18..0bdea3d9e0 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -4,6 +4,8 @@ //! [`CollectingVisitor`](crate::discover::CollectingVisitor) and then calling //! [`analyze`](SPDGGenerator::analyze). +use std::rc::Rc; + use crate::{ ann::{Annotation, MarkerAnnotation}, desc::*, @@ -12,9 +14,9 @@ use crate::{ DefId, HashMap, HashSet, LogLevelConfig, MarkerCtx, Symbol, }; -use std::rc::Rc; - use anyhow::Result; + +use flowistry_pdg_construction::MemoPdgConstructor; use itertools::Itertools; use petgraph::visit::GraphBase; @@ -39,6 +41,7 @@ pub struct SPDGGenerator<'tcx> { pub opts: &'static crate::Args, pub tcx: TyCtxt<'tcx>, marker_ctx: MarkerCtx<'tcx>, + constructor: MemoPdgConstructor<'tcx>, metadata_loader: Rc>, } @@ -47,6 +50,7 @@ impl<'tcx> SPDGGenerator<'tcx> { marker_ctx: MarkerCtx<'tcx>, opts: &'static crate::Args, tcx: TyCtxt<'tcx>, + constructor: MemoPdgConstructor<'tcx>, metadata_loader: Rc>, ) -> Self { Self { @@ -54,6 +58,7 @@ impl<'tcx> SPDGGenerator<'tcx> { opts, tcx, metadata_loader, + constructor, } } diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index c58683fa54..77c3145bb5 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -127,6 +127,7 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { _s: Span, id: LocalDefId, ) { + self.emit_target_collector.push(id); match &kind { FnKind::ItemFn(name, _, _) | FnKind::Method(name, _) => { if self.should_analyze_function(id) { @@ -135,7 +136,6 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { def_id: id, }); } - self.emit_target_collector.push(id) } _ => (), } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index a99a52d131..bb895effda 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -153,12 +153,12 @@ impl Callbacks { let intermediate_out_file = intermediate_out_file_path(tcx)?; - let (analysis_targets, mctx) = loader + let (analysis_targets, mctx, constructor) = loader .clone() .collect_and_emit_metadata(self.opts, intermediate_out_file); tcx.sess.abort_if_errors(); - let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, loader); + let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, constructor, loader); let compilation = if !analysis_targets.is_empty() { let desc = gen.analyze(analysis_targets)?; diff --git a/crates/paralegal-policy/tests/helpers/mod.rs b/crates/paralegal-policy/tests/helpers/mod.rs index 181eb34616..cffb4c13cd 100644 --- a/crates/paralegal-policy/tests/helpers/mod.rs +++ b/crates/paralegal-policy/tests/helpers/mod.rs @@ -8,7 +8,7 @@ use std::{ path::{Path, PathBuf}, process::Command, sync::Arc, - time::SystemTime, + time::{Duration, Instant, SystemTime, SystemTimeError, UNIX_EPOCH}, }; use anyhow::anyhow; @@ -34,14 +34,16 @@ lazy_static::lazy_static! { fn temporary_directory(to_hash: &impl Hash) -> Result { let tmpdir = env::temp_dir(); - let secs = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?; let mut hasher = DefaultHasher::new(); - secs.hash(&mut hasher); to_hash.hash(&mut hasher); + let t = SystemTime::now().duration_since(UNIX_EPOCH)?; + t.hash(&mut hasher); let hash = hasher.finish(); let short_hash = hash % 0x1_000_000; let path = tmpdir.join(format!("test-crate-{short_hash:06x}")); - fs::create_dir(&path)?; + if !path.exists() { + fs::create_dir(&path)?; + } Ok(path) } @@ -70,6 +72,16 @@ impl Test { pub fn new(code: impl Into) -> Result { let code = code.into(); let tempdir = temporary_directory(&code)?; + for entry in fs::read_dir(&tempdir)? { + let f = entry?; + let typ = f.file_type()?; + if typ.is_dir() { + fs::remove_dir_all(f.path())?; + } else if typ.is_file() { + fs::remove_file(f.path())?; + } + } + println!("Running in {}", tempdir.display()); Ok(Self { code, external_ann_file_name: tempdir.join("external_annotations.toml"), diff --git a/crates/paralegal-policy/tests/lemmy.rs b/crates/paralegal-policy/tests/lemmy.rs index a35710834b..2e99cf7cff 100644 --- a/crates/paralegal-policy/tests/lemmy.rs +++ b/crates/paralegal-policy/tests/lemmy.rs @@ -169,13 +169,15 @@ fn call_async_trait_single_inline_with_version(v: &str) -> Result<()> { } #[test] +#[ignore = "No support yet for calling `async_trait` functions, as that requires (a form of) `dyn` handling"] fn call_async_trait_single_inline_0_1_53() -> Result<()> { - call_async_trait_single_inline_with_version("async_trait@=0.1.53") + call_async_trait_single_inline_with_version("async-trait@=0.1.53") } #[test] +#[ignore = "No support yet for calling `async_trait` functions, as that requires (a form of) `dyn` handling"] fn call_async_trait_single_inline_latest() -> Result<()> { - call_async_trait_single_inline_with_version("async_trait") + call_async_trait_single_inline_with_version("async-trait") } #[test] From 872d30761c5cb2718d89fb78552999520ca0c27d Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 11 Jun 2024 16:06:17 -0700 Subject: [PATCH 70/95] Defer normalization errors --- .../src/construct.rs | 43 ++++++++++++++++--- crates/flowistry_pdg_construction/src/lib.rs | 4 +- .../src/local_analysis.rs | 14 ++++-- .../flowistry_pdg_construction/src/utils.rs | 15 +++++-- .../paralegal-flow/src/ana/graph_converter.rs | 22 +++++++--- crates/paralegal-flow/src/ann/db.rs | 17 +++++--- 6 files changed, 88 insertions(+), 27 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index b6e989c8c7..fa14d76209 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -26,7 +26,9 @@ use rustc_middle::{ mir::{ visit::Visitor, AggregateKind, Location, Operand, Place, Rvalue, Terminator, TerminatorKind, }, - ty::{GenericArgsRef, Instance, ParamEnv, TyCtxt}, + ty::{ + normalize_erasing_regions::NormalizationError, GenericArgsRef, Instance, ParamEnv, TyCtxt, + }, }; use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; use rustc_span::Span; @@ -89,6 +91,11 @@ pub enum Error<'tcx> { op: String, }, AsyncResolutionErr(AsyncResolutionErr), + NormalizationError { + instance: Instance<'tcx>, + span: Span, + error: String, + }, } pub trait EmittableError<'tcx> { @@ -102,6 +109,24 @@ pub trait EmittableError<'tcx> { } } +pub trait UnwrapEmittable<'tcx> { + type Inner; + fn unwrap_emittable(self, tcx: TyCtxt<'tcx>) -> Self::Inner; +} + +impl<'tcx, T, E: EmittableError<'tcx>> UnwrapEmittable<'tcx> for Result { + type Inner = T; + fn unwrap_emittable(self, tcx: TyCtxt<'tcx>) -> Self::Inner { + match self { + Result::Ok(inner) => inner, + Result::Err(e) => { + default_emit_error(&e, tcx); + panic!("unwrap") + } + } + } +} + pub fn default_emit_error<'tcx>(e: &(impl EmittableError<'tcx> + ?Sized), tcx: TyCtxt<'tcx>) { struct FmtWithTcx<'tcx, A> { tcx: TyCtxt<'tcx>, @@ -126,9 +151,9 @@ impl<'tcx> EmittableError<'tcx> for Error<'tcx> { use Error::*; match self { AsyncResolutionErr(e) => e.span(tcx), - InstanceResolutionFailed { span, .. } | FailedLoadingExternalFunction { span, .. } => { - Some(*span) - } + InstanceResolutionFailed { span, .. } + | FailedLoadingExternalFunction { span, .. } + | NormalizationError { span, .. } => Some(*span), BoundVariablesInPredicates { function } | TraitRefWithBinder { function } | ConstantInGenerics { function } => Some(tcx.def_span(*function)), @@ -175,6 +200,12 @@ impl<'tcx> EmittableError<'tcx> for Error<'tcx> { write!(f, "operand {op} is not of function type") } AsyncResolutionErr(e) => e.msg(tcx, f), + NormalizationError { + instance, error, .. + } => write!( + f, + "failed to normalize with instance {instance:?} because {error}" + ), } } } @@ -263,7 +294,9 @@ impl<'tcx> MemoPdgConstructor<'tcx> { if let Some(local) = def_id.as_local() { self.pdg_cache .get_maybe_recursive((local, generics), |_| { - let g = LocalAnalysis::new(self, resolution).construct_partial()?; + let g = LocalAnalysis::new(self, resolution) + .map_err(|e| vec![e])? + .construct_partial()?; g.check_invariants(); Ok(g) }) diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index dd617d47e8..c306579d04 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -19,7 +19,9 @@ pub use async_support::{determine_async, is_async_trait_fn, AsyncType}; pub use construct::Error; pub use graph::{Artifact, DepGraph, GraphLoader, NoLoader, PartialGraph}; pub mod callback; -pub use crate::construct::{default_emit_error, EmittableError, MemoPdgConstructor}; +pub use crate::construct::{ + default_emit_error, EmittableError, MemoPdgConstructor, UnwrapEmittable, +}; pub use callback::{ CallChangeCallback, CallChangeCallbackFn, CallChanges, CallInfo, InlineMissReason, SkipCall, }; diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 7e8c36422d..5a3b6ae8b8 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -69,7 +69,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { pub(crate) fn new( memo: &'a MemoPdgConstructor<'tcx>, root: Instance<'tcx>, - ) -> LocalAnalysis<'tcx, 'a> { + ) -> Result, Error<'tcx>> { let tcx = memo.tcx; let def_id = root.def_id().expect_local(); let body_with_facts = borrowck_facts::get_body_with_borrowck_facts(tcx, def_id); @@ -78,7 +78,13 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { // Some(cx) => cx.param_env, // None => ParamEnv::reveal_all(), // }; - let body = try_monomorphize(root, tcx, param_env, &body_with_facts.body); + let body = try_monomorphize( + root, + tcx, + param_env, + &body_with_facts.body, + tcx.def_span(root.def_id()), + )?; if memo.dump_mir { use std::io::Write; @@ -96,7 +102,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { let body_assignments = utils::find_body_assignments(&body); - LocalAnalysis { + Ok(LocalAnalysis { memo, root, body_with_facts, @@ -106,7 +112,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { start_loc, def_id, body_assignments, - } + }) } fn make_dep_node( diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 1fcfd013c4..605f01c7cf 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -1,4 +1,4 @@ -use std::{collections::hash_map::Entry, hash::Hash}; +use std::{collections::hash_map::Entry, fmt::Debug, hash::Hash}; use either::Either; @@ -17,6 +17,7 @@ use rustc_middle::{ }, }; +use rustc_span::Span; use rustc_type_ir::{fold::TypeFoldable, AliasKind}; use rustc_utils::{BodyExt, PlaceExt}; @@ -58,15 +59,21 @@ pub fn try_monomorphize<'tcx, 'a, T>( tcx: TyCtxt<'tcx>, param_env: ParamEnv<'tcx>, t: &'a T, -) -> T + span: Span, +) -> Result> where - T: TypeFoldable> + Clone, + T: TypeFoldable> + Clone + Debug, { - inst.subst_mir_and_normalize_erasing_regions( + inst.try_subst_mir_and_normalize_erasing_regions( tcx, param_env, EarlyBinder::bind(tcx.erase_regions(t.clone())), ) + .map_err(|e| Error::NormalizationError { + instance: inst, + span, + error: format!("{e:?}"), + }) } /// Attempt to interpret this type as a statically determinable function and its diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index f7193ac020..e2832615ed 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -20,11 +20,11 @@ use super::{ use anyhow::{anyhow, Result}; use either::Either; use flowistry_pdg_construction::{ - determine_async, graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, utils::try_monomorphize, CallChangeCallback, CallChanges, CallInfo, EmittableError, InlineMissReason, SkipCall::Skip, + UnwrapEmittable, }; use petgraph::{ visit::{IntoNodeReferences, NodeIndexable, NodeRef}, @@ -227,7 +227,8 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { &self, at: CallString, place: mir::PlaceRef<'tcx>, - ) -> Option> { + span: rustc_span::Span, + ) -> Result>, Error<'tcx>> { let tcx = self.tcx(); let body = self .generator @@ -240,7 +241,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // from the base place. let place = if self.entrypoint_is_async() && place.local.as_u32() == 1 && at.len() == 2 { if place.projection.is_empty() { - return None; + return Ok(None); } // in the case of targeting the top-level async closure (e.g. async args) // we'll keep the first projection. @@ -267,9 +268,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { ) .unwrap() .unwrap(); - let resolution = try_monomorphize(instance, tcx, ty::ParamEnv::reveal_all(), &raw_ty); + let resolution = try_monomorphize(instance, tcx, ty::ParamEnv::reveal_all(), &raw_ty, span) + .map_err(|e| Error::ConstructionErrors(vec![e]))?; //println!("Resolved to {resolution:?}"); - Some(resolution) + Ok(Some(resolution)) } /// Fetch annotations item identified by this `id`. @@ -307,7 +309,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { fn handle_node_types(&mut self, old_node: Node, weight: &DepNode<'tcx>) { let i = self.new_node_for(old_node); - let Some(place_ty) = self.determine_place_type(weight.at, weight.place.as_ref()) else { + let Some(place_ty) = self + .determine_place_type(weight.at, weight.place.as_ref(), weight.span) + .unwrap_emittable(self.tcx()) + else { return; }; // Restore after fixing https://github.com/brownsys/paralegal/issues/138 @@ -315,7 +320,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let deep = true; let mut node_types = self.type_is_marked(place_ty, deep).collect::>(); for (p, _) in weight.place.iter_projections() { - if let Some(place_ty) = self.determine_place_type(weight.at, p) { + if let Some(place_ty) = self + .determine_place_type(weight.at, p, weight.span) + .unwrap_emittable(self.tcx()) + { node_types.extend(self.type_is_marked(place_ty, false)); } } diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index e0faaeae50..6d0d5e6b9f 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -183,6 +183,7 @@ impl<'tcx> MarkerCtx<'tcx> { /// If the transitive marker cache did not contain the answer, this is what /// computes it. fn compute_reachable_markers(&self, res: Instance<'tcx>) -> Box<[InternedString]> { + let tcx = self.tcx(); trace!("Computing reachable markers for {res:?}"); let Some(local) = res.def_id().as_local() else { trace!(" Is not local"); @@ -192,17 +193,21 @@ impl<'tcx> MarkerCtx<'tcx> { trace!(" Is marked"); return Box::new([]); } - let Some(body) = self.tcx().body_for_def_id_default_policy(local) else { + let Some(body) = tcx.body_for_def_id_default_policy(local) else { trace!(" Cannot find body"); return Box::new([]); }; - let mono_body = try_monomorphize( + let Ok(mono_body) = try_monomorphize( res, - self.tcx(), - self.tcx().param_env_reveal_all_normalized(local), + tcx, + tcx.param_env_reveal_all_normalized(local), &body.body, - ); - if let Some((async_fn, ..)) = determine_async(self.tcx(), local, &mono_body) { + tcx.def_span(res.def_id()), + ) else { + trace!(" monomorphization error"); + return Box::new([]); + }; + if let Some((async_fn, ..)) = determine_async(tcx, local, &mono_body) { return self.get_reachable_markers(async_fn).into(); } mono_body From 7ca7d6d47f52e5685815ff1b38669389ffafa5f8 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 13 Jun 2024 15:26:31 -0700 Subject: [PATCH 71/95] Alternate test harness + clone test case --- Cargo.lock | 30 ++- Cargo.toml | 12 +- .../src/calling_convention.rs | 4 +- .../src/construct.rs | 19 +- .../flowistry_pdg_construction/src/graph.rs | 27 ++- .../src/local_analysis.rs | 50 +++-- .../flowistry_pdg_construction/src/utils.rs | 194 ++++++++++++++++-- crates/paralegal-flow/src/ana/metadata.rs | 14 +- crates/paralegal-flow/src/args.rs | 17 ++ crates/paralegal-flow/src/lib.rs | 54 +++-- crates/paralegal-flow/src/test_utils.rs | 80 +++++++- crates/paralegal-flow/tests/clone-test.rs | 74 +++++++ 12 files changed, 469 insertions(+), 106 deletions(-) create mode 100644 crates/paralegal-flow/tests/clone-test.rs diff --git a/Cargo.lock b/Cargo.lock index 9f8b071ad3..5a99554845 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1111,14 +1111,6 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc_plugin" version = "0.7.4-nightly-2023-08-25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1348edfa020dbe4807a4d99272332dadcbbedff6b587accb95faefe20d2c7129" -replace = "rustc_plugin 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=aa83f5740fa7eb5b8e3e1ee417b29536e87cc864)" - -[[package]] -name = "rustc_plugin" -version = "0.7.4-nightly-2023-08-25" -source = "git+https://github.com/JustusAdam/rustc_plugin?rev=aa83f5740fa7eb5b8e3e1ee417b29536e87cc864#aa83f5740fa7eb5b8e3e1ee417b29536e87cc864" dependencies = [ "cargo_metadata", "log", @@ -1129,22 +1121,21 @@ dependencies = [ ] [[package]] -name = "rustc_tools_util" -version = "0.1.1" +name = "rustc_plugin" +version = "0.7.4-nightly-2023-08-25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3c5a95edfa0c893236ae4778bb7c4752760e4c0d245e19b5eff33c5aa5eb9dc" +checksum = "1348edfa020dbe4807a4d99272332dadcbbedff6b587accb95faefe20d2c7129" +replace = "rustc_plugin 0.7.4-nightly-2023-08-25" [[package]] -name = "rustc_utils" -version = "0.7.4-nightly-2023-08-25" +name = "rustc_tools_util" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09428c7086894369685cca54a516acc0f0ab6d0e5a628c094ba83bfddaf1aedf" -replace = "rustc_utils 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=aa83f5740fa7eb5b8e3e1ee417b29536e87cc864)" +checksum = "b3c5a95edfa0c893236ae4778bb7c4752760e4c0d245e19b5eff33c5aa5eb9dc" [[package]] name = "rustc_utils" version = "0.7.4-nightly-2023-08-25" -source = "git+https://github.com/JustusAdam/rustc_plugin?rev=aa83f5740fa7eb5b8e3e1ee417b29536e87cc864#aa83f5740fa7eb5b8e3e1ee417b29536e87cc864" dependencies = [ "anyhow", "cfg-if", @@ -1154,6 +1145,13 @@ dependencies = [ "textwrap", ] +[[package]] +name = "rustc_utils" +version = "0.7.4-nightly-2023-08-25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09428c7086894369685cca54a516acc0f0ab6d0e5a628c094ba83bfddaf1aedf" +replace = "rustc_utils 0.7.4-nightly-2023-08-25" + [[package]] name = "rustix" version = "0.38.21" diff --git a/Cargo.toml b/Cargo.toml index 253bbdce95..a04a933c28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,10 +27,10 @@ flowistry = { git = "https://github.com/brownsys/flowistry", rev = "b9210041eb84 debug = true [replace] -# "rustc_utils:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_utils" } -# "rustc_plugin:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_plugin" } +"rustc_utils:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_utils" } +"rustc_plugin:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_plugin" } -"rustc_utils:0.7.4-nightly-2023-08-25" = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "aa83f5740fa7eb5b8e3e1ee417b29536e87cc864", features = [ - "indexical", -] } -"rustc_plugin:0.7.4-nightly-2023-08-25" = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "aa83f5740fa7eb5b8e3e1ee417b29536e87cc864" } +# "rustc_utils:0.7.4-nightly-2023-08-25" = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "aa83f5740fa7eb5b8e3e1ee417b29536e87cc864", features = [ +# "indexical", +# ] } +# "rustc_plugin:0.7.4-nightly-2023-08-25" = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "aa83f5740fa7eb5b8e3e1ee417b29536e87cc864" } diff --git a/crates/flowistry_pdg_construction/src/calling_convention.rs b/crates/flowistry_pdg_construction/src/calling_convention.rs index 161402488c..c00b5cb729 100644 --- a/crates/flowistry_pdg_construction/src/calling_convention.rs +++ b/crates/flowistry_pdg_construction/src/calling_convention.rs @@ -3,7 +3,7 @@ use log::trace; use rustc_abi::FieldIdx; use rustc_middle::{ - mir::{Body, HasLocalDecls, Operand, Place, PlaceElem, RETURN_PLACE}, + mir::{tcx::PlaceTy, Body, HasLocalDecls, Operand, Place, PlaceElem, RETURN_PLACE}, ty::TyCtxt, }; @@ -41,6 +41,7 @@ impl<'tcx, 'a> CallingConvention<'tcx, 'a> { parent_body: &Body<'tcx>, parent_def_id: DefId, destination: Place<'tcx>, + target_ty: Option>, ) -> Option> { trace!(" Translating child place: {child:?}"); let (parent_place, child_projection) = @@ -53,6 +54,7 @@ impl<'tcx, 'a> CallingConvention<'tcx, 'a> { tcx, parent_body, parent_def_id, + target_ty, )) } diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index fa14d76209..b85d967e6b 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -292,17 +292,30 @@ impl<'tcx> MemoPdgConstructor<'tcx> { let def_id = resolution.def_id(); let generics = resolution.args; if let Some(local) = def_id.as_local() { - self.pdg_cache + let r = self + .pdg_cache .get_maybe_recursive((local, generics), |_| { let g = LocalAnalysis::new(self, resolution) .map_err(|e| vec![e])? .construct_partial()?; + trace!( + "Computed new for {} {generics:?}", + self.tcx.def_path_str(local) + ); g.check_invariants(); Ok(g) }) .map(Result::as_ref) .transpose() - .map_err(Clone::clone) + .map_err(Clone::clone)?; + if let Some(g) = r { + trace!( + "Found pdg for {} with {:?}", + self.tcx.def_path_str(local), + g.generics + ) + }; + Ok(r) } else { self.loader.load(def_id) } @@ -583,6 +596,7 @@ impl<'tcx> PartialGraph<'tcx> { &constructor.body, constructor.def_id.to_def_id(), *destination, + Some(child_src.place.ty(child_descriptor, constructor.tcx())), ) { self.register_mutation( results, @@ -611,6 +625,7 @@ impl<'tcx> PartialGraph<'tcx> { &constructor.body, constructor.def_id.to_def_id(), *destination, + Some(child_dst.place.ty(child_descriptor, constructor.tcx())), ) { self.register_mutation( results, diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index df4db31f47..94a131a3e5 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -12,9 +12,10 @@ use internment::Intern; use petgraph::{dot, graph::DiGraph}; use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, DefIndex}; +use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ - mir::{Body, Location, Place}, + mir::{Body, HasLocalDecls, Local, LocalDecl, LocalDecls, Location, Place}, ty::{GenericArgsRef, TyCtxt}, }; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; @@ -294,6 +295,13 @@ pub struct PartialGraph<'tcx> { pub(crate) generics: GenericArgsRef<'tcx>, def_id: DefId, arg_count: usize, + local_decls: IndexVec>, +} + +impl<'tcx> HasLocalDecls<'tcx> for PartialGraph<'tcx> { + fn local_decls(&self) -> &LocalDecls<'tcx> { + &self.local_decls + } } impl<'tcx> PartialGraph<'tcx> { @@ -315,7 +323,12 @@ impl<'tcx> PartialGraph<'tcx> { } } - pub fn new(generics: GenericArgsRef<'tcx>, def_id: DefId, arg_count: usize) -> Self { + pub fn new( + generics: GenericArgsRef<'tcx>, + def_id: DefId, + arg_count: usize, + local_decls: &LocalDecls<'tcx>, + ) -> Self { Self { nodes: Default::default(), edges: Default::default(), @@ -323,6 +336,7 @@ impl<'tcx> PartialGraph<'tcx> { generics, def_id, arg_count, + local_decls: local_decls.to_owned(), } } @@ -332,8 +346,10 @@ impl<'tcx> PartialGraph<'tcx> { self.edges .iter() .map(|(src, _, _)| *src) + .filter(|n| n.at.leaf().location.is_start()) .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) - .filter(|(node, _)| node.at.leaf().location.is_start()) + .collect::>() + .into_iter() } pub(crate) fn parentable_dsts<'a>( @@ -342,8 +358,10 @@ impl<'tcx> PartialGraph<'tcx> { self.edges .iter() .map(|(_, dst, _)| *dst) + .filter(|n| n.at.leaf().location.is_end()) .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) - .filter(|node| node.0.at.leaf().location.is_end()) + .collect::>() + .into_iter() } } @@ -385,6 +403,7 @@ impl<'tcx> TransformCallString for PartialGraph<'tcx> { .collect(), def_id: self.def_id, arg_count: self.arg_count, + local_decls: self.local_decls.to_owned(), } } } diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 5a3b6ae8b8..1981d3842c 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -11,8 +11,8 @@ use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; use rustc_middle::{ mir::{ - visit::Visitor, AggregateKind, BasicBlock, Body, Location, Operand, Place, PlaceElem, - Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, + visit::Visitor, AggregateKind, BasicBlock, Body, HasLocalDecls, Location, Operand, Place, + PlaceElem, Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, }, ty::{GenericArg, GenericArgKind, GenericArgsRef, Instance, List, TyCtxt, TyKind}, }; @@ -31,8 +31,8 @@ use crate::{ construct::{Error, WithConstructionErrors}, graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, - utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, - AsyncType, CallChangeCallback, CallChanges, CallInfo, MemoPdgConstructor, SkipCall, + utils::{self, is_async, is_non_default_trait_method, try_monomorphize, SimpleTyEquiv}, + CallChangeCallback, CallChanges, CallInfo, MemoPdgConstructor, SkipCall, }; #[derive(PartialEq, Eq, Default, Clone, Debug)] @@ -196,11 +196,21 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.tcx(), &self.body_with_facts.body, self.def_id.to_def_id(), + None, ); - self.place_info.aliases(place_retyped).iter().map(|alias| { + self.place_info.aliases(place_retyped).iter().map(move |alias| { let mut projection = alias.projection.to_vec(); projection.extend(&place.projection[place_retyped.projection.len()..]); - Place::make(alias.local, &projection, self.tcx()) + let p = Place::make(alias.local, &projection, self.tcx()); + let t1 = place.ty(&self.body, self.tcx()); + let t2 = p.ty(&self.body, self.tcx()); + if !t1.equiv(&t2) { + let p1_str = format!("{place:?}"); + let p2_str = format!("{p:?}"); + let l = p1_str.len().max(p2_str.len()); + panic!("Retyping in {} failed to produce an equivalent type.\n Src {p1_str:l$} : {t1:?}\n Dst {p2_str:l$} : {t2:?}", self.tcx().def_path_str(self.def_id)) + } + p }) } @@ -354,7 +364,10 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { ) -> Result>, Vec>> { let tcx = self.tcx(); - trace!("Considering call at {location:?} in {:?}", self.def_id); + println!( + "Considering call at {location:?} in {:?}", + self.tcx().def_path_str(self.def_id) + ); let (called_def_id, generic_args) = self .operand_to_def_id(func) @@ -378,7 +391,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { )]); } }; - trace!("resolved to instance {resolved_fn:?}"); + println!("resolved to instance {resolved_fn:?}"); let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); @@ -527,16 +540,6 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { let parentable_dsts = child_constructor.parentable_dsts(); let parent_body = &self.body; - let translate_to_parent = |child: Place<'tcx>| -> Option> { - calling_convention.translate_to_parent( - child, - self.async_info(), - self.tcx(), - parent_body, - self.def_id.to_def_id(), - destination, - ) - }; // For each destination node CHILD that is parentable to PLACE, // add an edge from CHILD -> PLACE. @@ -545,7 +548,15 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { // the *last* nodes in the child function to the parent, not *all* of them. trace!("CHILD -> PARENT EDGES:"); for (child_dst, _) in parentable_dsts { - if let Some(parent_place) = translate_to_parent(child_dst.place) { + if let Some(parent_place) = calling_convention.translate_to_parent( + child_dst.place, + self.async_info(), + self.tcx(), + parent_body, + self.def_id.to_def_id(), + destination, + Some(child_dst.place.ty(child_constructor, self.tcx())), + ) { self.apply_mutation(state, location, parent_place); } } @@ -582,6 +593,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.generic_args(), self.def_id.to_def_id(), self.body.arg_count, + self.body.local_decls(), )); analysis.visit_reachable_with(&self.body, &mut final_state); diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 605f01c7cf..3bc96c22e6 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -13,7 +13,8 @@ use rustc_middle::{ StatementKind, Terminator, TerminatorKind, }, ty::{ - self, EarlyBinder, GenericArg, GenericArgsRef, Instance, List, ParamEnv, Ty, TyCtxt, TyKind, + self, BoundVariableKind, EarlyBinder, GenericArg, GenericArgKind, GenericArgsRef, Instance, + List, ParamEnv, Region, Ty, TyCtxt, TyKind, }, }; @@ -90,11 +91,13 @@ pub fn type_as_fn<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<(DefId, Gener } } +/// If `target_ty` is supplied checks that the final type is the same as `target_ty`. pub(crate) fn retype_place<'tcx>( orig: Place<'tcx>, tcx: TyCtxt<'tcx>, body: &Body<'tcx>, def_id: DefId, + target_ty: Option>, ) -> Place<'tcx> { trace!("Retyping {orig:?} in context of {def_id:?}"); @@ -106,23 +109,24 @@ pub(crate) fn retype_place<'tcx>( ty.ty.kind(), TyKind::Alias(..) | TyKind::Param(..) | TyKind::Bound(..) | TyKind::Placeholder(..) ) { + trace!("Breaking on param-like type {:?}", ty.ty); break; } - // Don't continue if we reach a private field - if let ProjectionElem::Field(field, _) = elem { - if let Some(adt_def) = ty.ty.ty_adt_def() { - let field = adt_def - .all_fields() - .nth(field.as_usize()) - .unwrap_or_else(|| { - panic!("ADT for {:?} does not have field {field:?}", ty.ty); - }); - if !field.vis.is_accessible_from(def_id, tcx) { - break; - } - } - } + // // Don't continue if we reach a private field + // if let ProjectionElem::Field(field, _) = elem { + // if let Some(adt_def) = ty.ty.ty_adt_def() { + // let field = adt_def + // .all_fields() + // .nth(field.as_usize()) + // .unwrap_or_else(|| { + // panic!("ADT for {:?} does not have field {field:?}", ty.ty); + // }); + // if !field.vis.is_accessible_from(def_id, tcx) { + // break; + // } + // } + // } trace!( " Projecting {:?}.{new_projection:?} : {:?} with {elem:?}", @@ -152,12 +156,170 @@ pub(crate) fn retype_place<'tcx>( }; new_projection.push(elem); } - let p = Place::make(orig.local, &new_projection, tcx); + + if let Some(target_ty) = target_ty { + if !ty.equiv(&target_ty) { + let p1_str = format!("{orig:?}"); + let p2_str = format!("{p:?}"); + let l = p1_str.len().max(p2_str.len()); + panic!("Retyping in {} failed to produce an equivalent type.\n Src {p1_str:l$} : {target_ty:?}\n Dst {p2_str:l$} : {ty:?}", tcx.def_path_str(def_id)) + } + } + trace!(" Final translation: {p:?}"); p } +pub trait SimpleTyEquiv { + fn equiv(&self, other: &Self) -> bool; +} + +impl<'tcx> SimpleTyEquiv for Ty<'tcx> { + fn equiv(&self, other: &Self) -> bool { + self.kind().equiv(other.kind()) + } +} + +impl<'tcx, T: SimpleTyEquiv> SimpleTyEquiv for [T] { + fn equiv(&self, other: &Self) -> bool { + self.iter().zip(other.iter()).all(|(a, b)| a.equiv(b)) + } +} + +impl SimpleTyEquiv for ty::List { + fn equiv(&self, other: &Self) -> bool { + self.as_slice().equiv(other.as_slice()) + } +} + +impl<'tcx> SimpleTyEquiv for GenericArg<'tcx> { + fn equiv(&self, other: &Self) -> bool { + match (&self.unpack(), &other.unpack()) { + (GenericArgKind::Const(a), GenericArgKind::Const(b)) => a == b, + (GenericArgKind::Lifetime(a), GenericArgKind::Lifetime(b)) => a.equiv(b), + (GenericArgKind::Type(a), GenericArgKind::Type(b)) => a.equiv(b), + _ => false, + } + } +} + +impl<'tcx> SimpleTyEquiv for Region<'tcx> { + fn equiv(&self, _other: &Self) -> bool { + true + } +} + +impl<'tcx, T: SimpleTyEquiv> SimpleTyEquiv for ty::Binder<'tcx, T> { + fn equiv(&self, other: &Self) -> bool { + self.bound_vars().equiv(other.bound_vars()) + && self + .as_ref() + .skip_binder() + .equiv(other.as_ref().skip_binder()) + } +} + +impl SimpleTyEquiv for BoundVariableKind { + fn equiv(&self, other: &Self) -> bool { + self == other + } +} + +impl<'tcx> SimpleTyEquiv for ty::TypeAndMut<'tcx> { + fn equiv(&self, other: &Self) -> bool { + self.mutbl == other.mutbl && self.ty.equiv(&other.ty) + } +} + +impl<'tcx> SimpleTyEquiv for ty::FnSig<'tcx> { + fn equiv(&self, other: &Self) -> bool { + let Self { + inputs_and_output, + c_variadic, + unsafety, + abi, + } = *self; + inputs_and_output.equiv(other.inputs_and_output) + && c_variadic == other.c_variadic + && unsafety == other.unsafety + && abi == other.abi + } +} + +impl SimpleTyEquiv for &T { + fn equiv(&self, other: &Self) -> bool { + (*self).equiv(*other) + } +} + +impl<'tcx> SimpleTyEquiv for ty::AliasTy<'tcx> { + fn equiv(&self, other: &Self) -> bool { + self.def_id == other.def_id && self.args.equiv(other.args) + } +} + +impl<'tcx> SimpleTyEquiv for ty::ExistentialPredicate<'tcx> { + fn equiv(&self, other: &Self) -> bool { + self == other + } +} + +fn is_wildcard(t: &TyKind<'_>) -> bool { + matches!( + t, + TyKind::Param(..) | TyKind::Alias(..) | TyKind::Bound(..) | TyKind::Placeholder(..) + ) +} + +impl<'tcx> SimpleTyEquiv for TyKind<'tcx> { + fn equiv(&self, other: &Self) -> bool { + use rustc_type_ir::TyKind::*; + match (self, other) { + _ if is_wildcard(self) || is_wildcard(other) => true, + (Int(a_i), Int(b_i)) => a_i == b_i, + (Uint(a_u), Uint(b_u)) => a_u == b_u, + (Float(a_f), Float(b_f)) => a_f == b_f, + (Adt(a_d, a_s), Adt(b_d, b_s)) => a_d == b_d && a_s.equiv(b_s), + (Foreign(a_d), Foreign(b_d)) => a_d == b_d, + (Array(a_t, a_c), Array(b_t, b_c)) => a_t.equiv(b_t) && a_c == b_c, + (Slice(a_t), Slice(b_t)) => a_t.equiv(b_t), + (RawPtr(a_t), RawPtr(b_t)) => a_t.equiv(b_t), + (Ref(a_r, a_t, a_m), Ref(b_r, b_t, b_m)) => { + a_r.equiv(b_r) && a_t.equiv(b_t) && a_m == b_m + } + (FnDef(a_d, a_s), FnDef(b_d, b_s)) => a_d == b_d && a_s.equiv(b_s), + (FnPtr(a_s), FnPtr(b_s)) => a_s.equiv(b_s), + (Dynamic(a_p, a_r, a_repr), Dynamic(b_p, b_r, b_repr)) => { + a_p.equiv(b_p) && a_r.equiv(b_r) && a_repr == b_repr + } + (Closure(a_d, a_s), Closure(b_d, b_s)) => a_d == b_d && a_s.equiv(b_s), + (Generator(a_d, a_s, a_m), Generator(b_d, b_s, b_m)) => { + a_d == b_d && a_s.equiv(b_s) && a_m == b_m + } + (GeneratorWitness(a_g), GeneratorWitness(b_g)) => a_g.equiv(b_g), + (GeneratorWitnessMIR(a_d, a_s), GeneratorWitnessMIR(b_d, b_s)) => { + a_d == b_d && a_s.equiv(b_s) + } + (Tuple(a_t), Tuple(b_t)) => a_t.equiv(b_t), + (Alias(a_i, a_p), Alias(b_i, b_p)) => a_i == b_i && a_p.equiv(b_p), + (Param(a_p), Param(b_p)) => a_p == b_p, + (Bound(a_d, a_b), Bound(b_d, b_b)) => a_d == b_d && a_b == b_b, + (Placeholder(a_p), Placeholder(b_p)) => a_p == b_p, + (Infer(_a_t), Infer(_b_t)) => unreachable!(), + (Error(a_e), Error(b_e)) => a_e == b_e, + (Bool, Bool) | (Char, Char) | (Str, Str) | (Never, Never) => true, + _ => false, + } + } +} + +impl<'tcx> SimpleTyEquiv for PlaceTy<'tcx> { + fn equiv(&self, other: &Self) -> bool { + self.variant_index == other.variant_index && self.ty.equiv(&other.ty) + } +} + pub(crate) fn hashset_join( hs1: &mut FxHashSet, hs2: &FxHashSet, diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 3a1d051484..047f4fe566 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -62,7 +62,7 @@ impl<'tcx> EmittableError<'tcx> for Error<'tcx> { use Error::*; match self { PdgForItemMissing(def) => { - write!(f, "found no pdg for item {}", tcx.def_path_debug_str(*def)) + write!(f, "found no pdg for item {}", tcx.def_path_str(*def)) } MetadataForCrateMissing(krate) => { write!(f, "no metadata found for crate {}", tcx.crate_name(*krate)) @@ -123,7 +123,7 @@ impl<'tcx> MetadataLoader<'tcx> { pub fn collect_and_emit_metadata( self: Rc, args: &'static Args, - path: impl AsRef, + path: Option>, ) -> (Vec, MarkerCtx<'tcx>, MemoPdgConstructor<'tcx>) { let tcx = self.tcx; let mut collector = CollectingVisitor::new(tcx, args, self.clone()); @@ -140,7 +140,7 @@ impl<'tcx> MetadataLoader<'tcx> { let pdgs = emit_targets .into_iter() .filter_map(|t| { - // if tcx.def_path_str(t) != "lemmy_api_crud::match_websocket_operation_crud" { + // if tcx.def_path_str(t) != "::clone" { // return None; // } println!("Constructing for {:?}", tcx.def_path_str(t)); @@ -164,9 +164,11 @@ impl<'tcx> MetadataLoader<'tcx> { }) .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); - let path = path.as_ref(); - debug!("Writing metadata to {}", path.display()); - meta.write(path, tcx); + if let Some(path) = path { + let path = path.as_ref(); + debug!("Writing metadata to {}", path.display()); + meta.write(path, tcx); + } self.cache.get(LOCAL_CRATE, |_| Some(meta)); (collector.functions_to_analyze, marker_ctx, constructor) } diff --git a/crates/paralegal-flow/src/args.rs b/crates/paralegal-flow/src/args.rs index edc2a24a85..827f0b9d36 100644 --- a/crates/paralegal-flow/src/args.rs +++ b/crates/paralegal-flow/src/args.rs @@ -376,6 +376,23 @@ impl Args { pub fn attach_to_debugger(&self) -> Option { self.attach_to_debugger } + + pub fn setup_logging(&self) { + let lvl = self.verbosity(); + // //let lvl = log::LevelFilter::Debug; + if simple_logger::SimpleLogger::new() + .with_level(lvl) + .with_module_level("flowistry", lvl) + .with_module_level("rustc_utils", log::LevelFilter::Error) + .without_timestamps() + .init() + .is_ok() + { + if matches!(*self.direct_debug(), LogLevelConfig::Targeted(..)) { + log::set_max_level(log::LevelFilter::Warn); + } + } + } } #[derive(serde::Serialize, serde::Deserialize, clap::Args, Default)] diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index bb895effda..ab9efae5bf 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -49,6 +49,7 @@ use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use std::{fmt::Display, time::Instant}; +use desc::ProgramDescription; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_middle::ty; use rustc_span::Symbol; @@ -114,6 +115,7 @@ struct ArgWrapper { struct Callbacks { opts: &'static Args, stats: Stats, + persist_metadata: bool, } /// Create the name of the file in which to store intermediate artifacts. @@ -147,22 +149,7 @@ fn intermediate_out_file_path(tcx: TyCtxt) -> Result { impl Callbacks { fn in_context(&mut self, tcx: TyCtxt) -> Result { - tcx.sess.abort_if_errors(); - - let loader = MetadataLoader::new(tcx); - - let intermediate_out_file = intermediate_out_file_path(tcx)?; - - let (analysis_targets, mctx, constructor) = loader - .clone() - .collect_and_emit_metadata(self.opts, intermediate_out_file); - tcx.sess.abort_if_errors(); - - let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, constructor, loader); - - let compilation = if !analysis_targets.is_empty() { - let desc = gen.analyze(analysis_targets)?; - + let compilation = if let Some(desc) = self.run_compilation(tcx)? { if self.opts.dbg().dump_spdg() { let out = std::fs::File::create("call-only-flow.gv").unwrap(); paralegal_spdg::dot::dump(&desc, out).unwrap(); @@ -184,6 +171,26 @@ impl Callbacks { }; Ok(compilation) } + + fn run_compilation(&self, tcx: TyCtxt) -> Result> { + tcx.sess.abort_if_errors(); + + let loader = MetadataLoader::new(tcx); + + let (analysis_targets, mctx, constructor) = loader.clone().collect_and_emit_metadata( + self.opts, + self.persist_metadata + .then(|| intermediate_out_file_path(tcx)) + .transpose()?, + ); + tcx.sess.abort_if_errors(); + + let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, constructor, loader); + + (!analysis_targets.is_empty()) + .then(|| gen.analyze(analysis_targets)) + .transpose() + } } struct NoopCallbacks {} @@ -195,6 +202,7 @@ impl Callbacks { Self { opts, stats: Default::default(), + persist_metadata: true, } } } @@ -379,18 +387,8 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { debug!("Is target, compiling"); - let lvl = plugin_args.verbosity(); - // //let lvl = log::LevelFilter::Debug; - simple_logger::SimpleLogger::new() - .with_level(lvl) - .with_module_level("flowistry", lvl) - .with_module_level("rustc_utils", log::LevelFilter::Error) - .without_timestamps() - .init() - .unwrap(); - if matches!(*plugin_args.direct_debug(), LogLevelConfig::Targeted(..)) { - log::set_max_level(log::LevelFilter::Warn); - } + plugin_args.setup_logging(); + let opts = Box::leak(Box::new(plugin_args)); compiler_args.extend([ diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 6c0bd76380..324c2ac60f 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -5,9 +5,10 @@ extern crate rustc_middle; extern crate rustc_span; use crate::{ + args::{Args, ClapArgs}, desc::{Identifier, ProgramDescription}, utils::Print, - HashSet, + Callbacks, HashSet, }; use std::fmt::{Debug, Formatter}; use std::hash::{Hash, Hasher}; @@ -19,12 +20,14 @@ use paralegal_spdg::{ DefInfo, EdgeInfo, Endpoint, Node, SPDG, }; -use flowistry_pdg::CallString; +use clap::Parser; +use flowistry_pdg::{rustc_portable::LocalDefId, CallString}; use itertools::Itertools; use petgraph::visit::{Control, Data, DfsEvent, EdgeRef, FilterEdge, GraphBase, IntoEdges}; use petgraph::visit::{IntoNeighbors, IntoNodeReferences}; use petgraph::visit::{NodeRef as _, Visitable}; use petgraph::Direction; +use rustc_utils::test_utils::{DUMMY_FILE, DUMMY_FILE_NAME, DUMMY_MOD_NAME}; use std::path::Path; lazy_static! { @@ -167,6 +170,63 @@ macro_rules! define_flow_test_template { }; } +lazy_static! { + static ref OPTS: Args = Default::default(); +} + +pub struct InlineTestBuilder { + ctrl_name: String, + input: String, +} + +impl InlineTestBuilder { + pub fn new(input: impl Into) -> Self { + Self { + input: input.into(), + ctrl_name: "main".into(), + } + } + + pub fn check(&self, check: impl FnOnce(CtrlRef) + Send) { + #[derive(clap::Parser)] + struct TopLevelArgs { + #[clap(flatten)] + args: ClapArgs, + } + + // TODO make this --analyze work + let args = Args::try_from( + TopLevelArgs::parse_from([ + "".into(), + "--analyze".into(), + format!("{}::{}", DUMMY_MOD_NAME, self.ctrl_name), + ]) + .args, + ) + .unwrap(); + + args.setup_logging(); + + rustc_utils::test_utils::compile_with_args( + &self.input, + [ + "--cfg", + "paralegal", + "-Zcrate-attr=feature(register_tool)", + "-Zcrate-attr=register_tool(paralegal_flow)", + ], + move |tcx| { + let mut memo = Callbacks::new(Box::leak(Box::new(args))); + memo.persist_metadata = false; + let pdg = memo.run_compilation(tcx).unwrap().unwrap(); + let graph = PreFrg::from_description(pdg); + let cref = graph.ctrl(&self.ctrl_name); + check(cref) + }, + ) + } +} + pub trait HasGraph<'g>: Sized + Copy { fn graph(self) -> &'g PreFrg; @@ -267,14 +327,18 @@ impl PreFrg { crate::consts::FLOW_GRAPH_OUT_NAME )) .unwrap(); - let name_map = desc - .def_info - .iter() - .map(|(def_id, info)| (info.name, *def_id)) - .into_group_map(); - Self { desc, name_map } + Self::from_description(desc) }) } + + pub fn from_description(desc: ProgramDescription) -> Self { + let name_map = desc + .def_info + .iter() + .map(|(def_id, info)| (info.name, *def_id)) + .into_group_map(); + Self { desc, name_map } + } } #[derive(Clone)] diff --git a/crates/paralegal-flow/tests/clone-test.rs b/crates/paralegal-flow/tests/clone-test.rs new file mode 100644 index 0000000000..84a14ac208 --- /dev/null +++ b/crates/paralegal-flow/tests/clone-test.rs @@ -0,0 +1,74 @@ +use paralegal_flow::test_utils::InlineTestBuilder; + +#[test] +fn clone_nesting() { + InlineTestBuilder::new(stringify!( + #[derive(Clone)] + enum Opt { + Empty, + Filled(T), + } + + #[derive(Clone)] + struct AStruct { + f: usize, + g: usize, + } + + #[derive(Clone)] + enum AnEnum { + Var1(usize), + Var2(String), + } + + fn main() { + let v0 = Opt::Filled(AStruct { f: 0, g: 0 }).clone(); + let v2 = Opt::Filled(AnEnum::Var1(0)).clone(); + } + )) + .check(|ctr| {}) +} + +#[test] +fn clone_test_2() { + InlineTestBuilder::new(stringify!( + #[derive(Clone)] + pub(crate) enum IdOrNestedObject { + Id(Url), + NestedObject(Kind), + } + + #[derive(Clone)] + struct Url(String); + + #[derive(Clone)] + pub struct Vote { + pub(crate) to: Vec, + } + + #[derive(Clone)] + pub struct AnnounceActivity { + pub(crate) object: IdOrNestedObject, + } + #[derive(Clone)] + pub struct Tombstone { + pub(crate) id: Url, + } + + #[derive(Clone)] + pub struct Delete { + pub(crate) object: IdOrNestedObject, + } + + #[derive(Clone)] + pub enum AnnouncableActivities { + Vote(Vote), + Delete(Delete), + } + + fn main() { + let v = AnnouncableActivities::Vote(Vote { to: vec![] }).clone(); + } + )) + .check(|_g| {}) +} From a2fd8051ede7ae12eabc49fe93eb290b9a460ce7 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 13 Jun 2024 15:57:04 -0700 Subject: [PATCH 72/95] Perhaps the dumbest error ever --- crates/flowistry_pdg_construction/src/construct.rs | 2 ++ crates/flowistry_pdg_construction/src/graph.rs | 4 ++-- crates/paralegal-flow/tests/clone-test.rs | 14 +++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index b85d967e6b..2c5f90c3d0 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -585,6 +585,8 @@ impl<'tcx> PartialGraph<'tcx> { let child_graph = push_call_string_root(child_descriptor, gloc); + trace!("Child graph has generics {:?}", child_descriptor.generics); + // For each source node CHILD that is parentable to PLACE, // add an edge from PLACE -> CHILD. trace!("PARENT -> CHILD EDGES:"); diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 94a131a3e5..28fd2e65dc 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -346,7 +346,7 @@ impl<'tcx> PartialGraph<'tcx> { self.edges .iter() .map(|(src, _, _)| *src) - .filter(|n| n.at.leaf().location.is_start()) + .filter(|n| n.at.len() == 1 && n.at.leaf().location.is_start()) .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) .collect::>() .into_iter() @@ -358,7 +358,7 @@ impl<'tcx> PartialGraph<'tcx> { self.edges .iter() .map(|(_, dst, _)| *dst) - .filter(|n| n.at.leaf().location.is_end()) + .filter(|n| n.at.len() == 1 && n.at.leaf().location.is_end()) .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) .collect::>() .into_iter() diff --git a/crates/paralegal-flow/tests/clone-test.rs b/crates/paralegal-flow/tests/clone-test.rs index 84a14ac208..2523d32829 100644 --- a/crates/paralegal-flow/tests/clone-test.rs +++ b/crates/paralegal-flow/tests/clone-test.rs @@ -43,16 +43,22 @@ fn clone_test_2() { #[derive(Clone)] pub struct Vote { - pub(crate) to: Vec, + pub(crate) to: Vec, } + #[derive(Clone)] + struct VoteUrl(String); + + #[derive(Clone)] + struct TombstoneUrl(String); + #[derive(Clone)] pub struct AnnounceActivity { pub(crate) object: IdOrNestedObject, } #[derive(Clone)] pub struct Tombstone { - pub(crate) id: Url, + pub(crate) id: TombstoneUrl, } #[derive(Clone)] @@ -66,9 +72,7 @@ fn clone_test_2() { Delete(Delete), } - fn main() { - let v = AnnouncableActivities::Vote(Vote { to: vec![] }).clone(); - } + fn main() {} )) .check(|_g| {}) } From 2080a43ba24d098670c52f92c7157803bd91405d Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 13 Jun 2024 17:10:37 -0700 Subject: [PATCH 73/95] Equiv check doesn't actually work --- .../src/local_analysis.rs | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 1981d3842c..efa131225a 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -198,20 +198,23 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { self.def_id.to_def_id(), None, ); - self.place_info.aliases(place_retyped).iter().map(move |alias| { - let mut projection = alias.projection.to_vec(); - projection.extend(&place.projection[place_retyped.projection.len()..]); - let p = Place::make(alias.local, &projection, self.tcx()); - let t1 = place.ty(&self.body, self.tcx()); - let t2 = p.ty(&self.body, self.tcx()); - if !t1.equiv(&t2) { - let p1_str = format!("{place:?}"); - let p2_str = format!("{p:?}"); - let l = p1_str.len().max(p2_str.len()); - panic!("Retyping in {} failed to produce an equivalent type.\n Src {p1_str:l$} : {t1:?}\n Dst {p2_str:l$} : {t2:?}", self.tcx().def_path_str(self.def_id)) - } - p - }) + self.place_info + .aliases(place_retyped) + .iter() + .map(move |alias| { + let mut projection = alias.projection.to_vec(); + projection.extend(&place.projection[place_retyped.projection.len()..]); + let p = Place::make(alias.local, &projection, self.tcx()); + // let t1 = place.ty(&self.body, self.tcx()); + // let t2 = p.ty(&self.body, self.tcx()); + // if !t1.equiv(&t2) { + // let p1_str = format!("{place:?}"); + // let p2_str = format!("{p:?}"); + // let l = p1_str.len().max(p2_str.len()); + // panic!("Retyping in {} failed to produce an equivalent type.\n Src {p1_str:l$} : {t1:?}\n Dst {p2_str:l$} : {t2:?}", self.tcx().def_path_str(self.def_id)) + // } + p + }) } pub(crate) fn tcx(&self) -> TyCtxt<'tcx> { @@ -364,7 +367,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { ) -> Result>, Vec>> { let tcx = self.tcx(); - println!( + trace!( "Considering call at {location:?} in {:?}", self.tcx().def_path_str(self.def_id) ); @@ -391,7 +394,6 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { )]); } }; - println!("resolved to instance {resolved_fn:?}"); let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); From 0a924166568f20706871a99153ef2530638f4a82 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 13 Jun 2024 17:10:47 -0700 Subject: [PATCH 74/95] Enable profiling env var --- crates/paralegal-flow/src/lib.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index ab9efae5bf..2975a84b2a 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -391,6 +391,18 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { let opts = Box::leak(Box::new(plugin_args)); + const RERUN_VAR: &str = "RERUN_WITH_DEBUGGER"; + if let Ok(debugger) = std::env::var(RERUN_VAR) { + println!("Restarting with debugger '{debugger}'"); + let mut dsplit = debugger.split(' '); + let mut cmd = std::process::Command::new(dsplit.next().unwrap()); + cmd.args(dsplit) + .args(std::env::args()) + .env_remove(RERUN_VAR); + println!("{cmd:?}"); + std::process::exit(cmd.status().unwrap().code().unwrap_or(0)); + } + compiler_args.extend([ "--cfg".into(), "paralegal".into(), From ce66f3a84552249206ec0958ed18ef318f067b56 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 17 Jun 2024 19:02:00 -0700 Subject: [PATCH 75/95] Use commit ref, update tests --- Cargo.lock | 30 ++++++++++--------- Cargo.toml | 15 ++++++---- .../flowistry_pdg_construction/tests/pdg.rs | 6 ++-- crates/paralegal-flow/src/test_utils.rs | 28 +++++++++-------- 4 files changed, 44 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a99554845..3802f32de6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1111,6 +1111,14 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc_plugin" version = "0.7.4-nightly-2023-08-25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1348edfa020dbe4807a4d99272332dadcbbedff6b587accb95faefe20d2c7129" +replace = "rustc_plugin 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672)" + +[[package]] +name = "rustc_plugin" +version = "0.7.4-nightly-2023-08-25" +source = "git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672#d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" dependencies = [ "cargo_metadata", "log", @@ -1120,13 +1128,6 @@ dependencies = [ "toml", ] -[[package]] -name = "rustc_plugin" -version = "0.7.4-nightly-2023-08-25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1348edfa020dbe4807a4d99272332dadcbbedff6b587accb95faefe20d2c7129" -replace = "rustc_plugin 0.7.4-nightly-2023-08-25" - [[package]] name = "rustc_tools_util" version = "0.1.1" @@ -1136,6 +1137,14 @@ checksum = "b3c5a95edfa0c893236ae4778bb7c4752760e4c0d245e19b5eff33c5aa5eb9dc" [[package]] name = "rustc_utils" version = "0.7.4-nightly-2023-08-25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09428c7086894369685cca54a516acc0f0ab6d0e5a628c094ba83bfddaf1aedf" +replace = "rustc_utils 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672)" + +[[package]] +name = "rustc_utils" +version = "0.7.4-nightly-2023-08-25" +source = "git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672#d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" dependencies = [ "anyhow", "cfg-if", @@ -1145,13 +1154,6 @@ dependencies = [ "textwrap", ] -[[package]] -name = "rustc_utils" -version = "0.7.4-nightly-2023-08-25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09428c7086894369685cca54a516acc0f0ab6d0e5a628c094ba83bfddaf1aedf" -replace = "rustc_utils 0.7.4-nightly-2023-08-25" - [[package]] name = "rustix" version = "0.38.21" diff --git a/Cargo.toml b/Cargo.toml index a04a933c28..0647fdf6fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,10 +27,13 @@ flowistry = { git = "https://github.com/brownsys/flowistry", rev = "b9210041eb84 debug = true [replace] -"rustc_utils:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_utils" } -"rustc_plugin:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_plugin" } +# "rustc_utils:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_utils" } +# "rustc_plugin:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_plugin" } -# "rustc_utils:0.7.4-nightly-2023-08-25" = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "aa83f5740fa7eb5b8e3e1ee417b29536e87cc864", features = [ -# "indexical", -# ] } -# "rustc_plugin:0.7.4-nightly-2023-08-25" = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "aa83f5740fa7eb5b8e3e1ee417b29536e87cc864" } +[replace."rustc_utils:0.7.4-nightly-2023-08-25"] +git = "https://github.com/JustusAdam/rustc_plugin" +rev = "d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" + +[replace."rustc_plugin:0.7.4-nightly-2023-08-25"] +git = "https://github.com/JustusAdam/rustc_plugin" +rev = "d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" diff --git a/crates/flowistry_pdg_construction/tests/pdg.rs b/crates/flowistry_pdg_construction/tests/pdg.rs index e78d53f757..13e32a4b1a 100644 --- a/crates/flowistry_pdg_construction/tests/pdg.rs +++ b/crates/flowistry_pdg_construction/tests/pdg.rs @@ -19,7 +19,9 @@ use rustc_middle::{ ty::TyCtxt, }; use rustc_span::Symbol; -use rustc_utils::{mir::borrowck_facts, source_map::find_bodies::find_bodies}; +use rustc_utils::{ + mir::borrowck_facts, source_map::find_bodies::find_bodies, test_utils::CompileResult, +}; fn get_main(tcx: TyCtxt<'_>) -> LocalDefId { find_bodies(tcx) @@ -38,7 +40,7 @@ fn pdg( tests: impl for<'tcx> FnOnce(TyCtxt<'tcx>, DepGraph<'tcx>) + Send, ) { let _ = env_logger::try_init(); - rustc_utils::test_utils::compile(input, move |tcx| { + rustc_utils::test_utils::CompileBuilder::new(input).compile(move |CompileResult { tcx }| { let def_id = get_main(tcx); let mut memo = MemoPdgConstructor::new(tcx, NoLoader); configure(tcx, &mut memo); diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 324c2ac60f..17079a3262 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -21,13 +21,13 @@ use paralegal_spdg::{ }; use clap::Parser; -use flowistry_pdg::{rustc_portable::LocalDefId, CallString}; +use flowistry_pdg::CallString; use itertools::Itertools; use petgraph::visit::{Control, Data, DfsEvent, EdgeRef, FilterEdge, GraphBase, IntoEdges}; use petgraph::visit::{IntoNeighbors, IntoNodeReferences}; use petgraph::visit::{NodeRef as _, Visitable}; use petgraph::Direction; -use rustc_utils::test_utils::{DUMMY_FILE, DUMMY_FILE_NAME, DUMMY_MOD_NAME}; +use rustc_utils::test_utils::{CompileResult, DUMMY_MOD_NAME}; use std::path::Path; lazy_static! { @@ -207,23 +207,25 @@ impl InlineTestBuilder { args.setup_logging(); - rustc_utils::test_utils::compile_with_args( - &self.input, - [ - "--cfg", - "paralegal", - "-Zcrate-attr=feature(register_tool)", - "-Zcrate-attr=register_tool(paralegal_flow)", - ], - move |tcx| { + rustc_utils::test_utils::CompileBuilder::new(&self.input) + .with_args( + [ + "--cfg", + "paralegal", + "-Zcrate-attr=feature(register_tool)", + "-Zcrate-attr=register_tool(paralegal_flow)", + ] + .into_iter() + .map(ToOwned::to_owned), + ) + .compile(move |CompileResult { tcx }| { let mut memo = Callbacks::new(Box::leak(Box::new(args))); memo.persist_metadata = false; let pdg = memo.run_compilation(tcx).unwrap().unwrap(); let graph = PreFrg::from_description(pdg); let cref = graph.ctrl(&self.ctrl_name); check(cref) - }, - ) + }) } } From 9129b560b382e31d19cc20b5d1fd122c9d1ded33 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 18 Jun 2024 10:48:55 -0700 Subject: [PATCH 76/95] Single source for rustc flags --- crates/paralegal-flow/src/lib.rs | 14 ++++++++------ crates/paralegal-flow/src/test_utils.rs | 13 ++----------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 2975a84b2a..5ee4213243 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -253,6 +253,13 @@ fn add_to_rustflags(new: impl IntoIterator) -> Result<(), std::en Ok(()) } +pub const PARALEGAL_RUSTC_FLAGS: [&str; 4] = [ + "--cfg", + "paralegal", + "-Zcrate-attr=feature(register_tool)", + "-Zcrate-attr=register_tool(paralegal_flow)", +]; + impl rustc_plugin::RustcPlugin for DfppPlugin { type Args = Args; @@ -403,12 +410,7 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { std::process::exit(cmd.status().unwrap().code().unwrap_or(0)); } - compiler_args.extend([ - "--cfg".into(), - "paralegal".into(), - "-Zcrate-attr=feature(register_tool)".into(), - "-Zcrate-attr=register_tool(paralegal_flow)".into(), - ]); + compiler_args.extend(PARALEGAL_RUSTC_FLAGS.iter().copied().map(ToOwned::to_owned)); if let Some(dbg) = opts.attach_to_debugger() { dbg.attach() diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 17079a3262..9bcbb86335 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -8,7 +8,7 @@ use crate::{ args::{Args, ClapArgs}, desc::{Identifier, ProgramDescription}, utils::Print, - Callbacks, HashSet, + Callbacks, HashSet, PARALEGAL_RUSTC_FLAGS, }; use std::fmt::{Debug, Formatter}; use std::hash::{Hash, Hasher}; @@ -208,16 +208,7 @@ impl InlineTestBuilder { args.setup_logging(); rustc_utils::test_utils::CompileBuilder::new(&self.input) - .with_args( - [ - "--cfg", - "paralegal", - "-Zcrate-attr=feature(register_tool)", - "-Zcrate-attr=register_tool(paralegal_flow)", - ] - .into_iter() - .map(ToOwned::to_owned), - ) + .with_args(PARALEGAL_RUSTC_FLAGS.iter().copied().map(ToOwned::to_owned)) .compile(move |CompileResult { tcx }| { let mut memo = Callbacks::new(Box::leak(Box::new(args))); memo.persist_metadata = false; From 2f668d73df93af3bdd08d249652e51a7241263fe Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 18 Jun 2024 21:16:55 -0700 Subject: [PATCH 77/95] Add stats back to make griswold compile --- .../paralegal-flow/src/ana/graph_converter.rs | 1 + crates/paralegal-flow/src/ana/mod.rs | 9 +- crates/paralegal-flow/src/lib.rs | 2 + crates/paralegal-policy/src/context.rs | 106 +++++++++--------- crates/paralegal-spdg/src/lib.rs | 21 +++- crates/paralegal-spdg/src/ser.rs | 3 +- 6 files changed, 87 insertions(+), 55 deletions(-) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index e2832615ed..a93135c6f1 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -381,6 +381,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .into_iter() .map(|(k, v)| (k, Types(v.into()))) .collect(), + statistics: Default::default(), } } diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 0bdea3d9e0..8468be1936 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -4,7 +4,7 @@ //! [`CollectingVisitor`](crate::discover::CollectingVisitor) and then calling //! [`analyze`](SPDGGenerator::analyze). -use std::rc::Rc; +use std::{rc::Rc, time::Duration}; use crate::{ ann::{Annotation, MarkerAnnotation}, @@ -147,6 +147,13 @@ impl<'tcx> SPDGGenerator<'tcx> { instruction_info, controllers, def_info, + rustc_time: Duration::ZERO, + marker_annotation_count: 0, + dedup_locs: 0, + dedup_functions: 0, + seen_locs: 0, + seen_functions: 0, + analyzed_spans: Default::default(), } } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 5ee4213243..c8fec637fe 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -157,6 +157,7 @@ impl Callbacks { let ser = Instant::now(); desc.canonical_write(self.opts.result_path()).unwrap(); + println!("Wrote graph to {}", self.opts.result_path().display()); self.stats .record_timed(TimedStat::Serialization, ser.elapsed()); @@ -167,6 +168,7 @@ impl Callbacks { rustc_driver::Compilation::Continue } } else { + println!("No compilation artifact"); rustc_driver::Compilation::Continue }; Ok(compilation) diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 2b074c004c..55cec4379e 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -1,13 +1,17 @@ +use std::collections::BTreeMap; +use std::fs::File; +use std::io::BufReader; use std::time::{Duration, Instant}; use std::vec; use std::{io::Write, process::exit, sync::Arc}; +use paralegal_spdg::rustc_portable::defid_as_local; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; use paralegal_spdg::traverse::{generic_flows_to, EdgeSelection}; use paralegal_spdg::{ - CallString, DisplayNode, Endpoint, GlobalNode, HashMap, HashSet, Identifier, InstructionInfo, - IntoIterGlobalNodes, Node as SPDGNode, NodeCluster, NodeInfo, ProgramDescription, SPDGImpl, - Span, TypeId, SPDG, + CallString, DefKind, DisplayNode, Endpoint, GlobalNode, HashMap, HashSet, Identifier, + InstructionInfo, IntoIterGlobalNodes, Node as SPDGNode, NodeCluster, NodeInfo, + ProgramDescription, SPDGImpl, Span, TypeId, SPDG, }; use anyhow::{anyhow, bail, Result}; @@ -605,55 +609,53 @@ impl Context { node.get_location(self) } - // #[doc(hidden)] - // pub fn write_analyzed_code( - // &self, - // mut out: impl Write, - // include_signatures: bool, - // ) -> std::io::Result<()> { - // let ordered_span_set = self - // .desc - // .analyzed_spans - // .values() - // .zip(std::iter::repeat(true)) - // .chain( - // include_signatures - // .then(|| { - // self.desc - // .def_info - // .iter() - // .filter(|(did, _)| { - // !matches!(defid_as_local(**did), Some(local) - // if self.desc.analyzed_spans.contains_key(&local) - // ) - // }) - // .map(|(_, i)| (&i.src_info, matches!(i.kind, DefKind::Type))) - // }) - // .into_iter() - // .flatten(), - // ) - // .collect::>(); - // let mut current_file = None; - // for (s, is_complete) in ordered_span_set { - // if Some(&s.source_file.file_path) != current_file { - // writeln!(out, "// {}", s.source_file.file_path)?; - // current_file = Some(&s.source_file.file_path); - // } - // let file = BufReader::new(File::open(&s.source_file.abs_file_path).unwrap()); - // for l in file - // .lines() - // .skip(s.start.line as usize - 1) - // .take((s.end.line - s.start.line + 1) as usize) - // { - // writeln!(out, "{}", l.unwrap()).unwrap() - // } - // if !is_complete { - // writeln!(out, "unreachable!() }}")?; - // } - // } - - // Ok(()) - // } + #[doc(hidden)] + pub fn write_analyzed_code( + &self, + mut out: impl Write, + include_signatures: bool, + ) -> std::io::Result<()> { + use std::io::BufRead; + + let ordered_span_set = self + .desc + .analyzed_spans + .values() + .zip(std::iter::repeat(true)) + .chain( + include_signatures + .then(|| { + self.desc + .def_info + .iter() + .filter(|(did, _)| self.desc.analyzed_spans.contains_key(&did)) + .map(|(_, i)| (&i.src_info, matches!(i.kind, DefKind::Type))) + }) + .into_iter() + .flatten(), + ) + .collect::>(); + let mut current_file = None; + for (s, is_complete) in ordered_span_set { + if Some(&s.source_file.file_path) != current_file { + writeln!(out, "// {}", s.source_file.file_path)?; + current_file = Some(&s.source_file.file_path); + } + let file = BufReader::new(File::open(&s.source_file.abs_file_path).unwrap()); + for l in file + .lines() + .skip(s.start.line as usize - 1) + .take((s.end.line - s.start.line + 1) as usize) + { + writeln!(out, "{}", l.unwrap()).unwrap() + } + if !is_complete { + writeln!(out, "unreachable!() }}")?; + } + } + + Ok(()) + } } /// Context queries conveniently accessible on nodes diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 32df9a1d6d..9fa21eb6c1 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -348,6 +348,23 @@ pub struct ProgramDescription { #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] /// Metadata about the `DefId`s pub def_info: HashMap, + + /// INFO: Not implemented, always 0 + pub rustc_time: Duration, + /// INFO: Not implemented, always 0 + pub marker_annotation_count: u32, + /// INFO: Not implemented, always 0 + pub dedup_functions: u32, + /// INFO: Not implemented, always 0 + pub dedup_locs: u32, + /// INFO: Not implemented, always 0 + pub seen_locs: u32, + /// INFO: Not implemented, always 0 + pub seen_functions: u32, + #[cfg_attr(not(feature = "rustc"), serde(with = "serde_map_via_vec"))] + #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] + /// INFO: Not implemented, always emtpy + pub analyzed_spans: HashMap, } /// Metadata about a type @@ -817,9 +834,11 @@ pub struct SPDG { /// that this contains multiple types for a single node, because it hold /// top-level types and subtypes that may be marked. pub type_assigns: HashMap, + /// INFO: Not Implemented, always zero + pub statistics: SPDGStats, } -#[derive(Clone, Serialize, Deserialize, Debug)] +#[derive(Clone, Serialize, Deserialize, Debug, Default)] /// Statistics about the code that produced an SPDG pub struct SPDGStats { /// The number of unique lines of code we generated a PDG for. This means diff --git a/crates/paralegal-spdg/src/ser.rs b/crates/paralegal-spdg/src/ser.rs index 59ff56be98..a1e1e96e59 100644 --- a/crates/paralegal-spdg/src/ser.rs +++ b/crates/paralegal-spdg/src/ser.rs @@ -47,7 +47,8 @@ impl ProgramDescription { /// Read `self` using the configured serialization format pub fn canonical_read(path: impl AsRef) -> Result { let path = path.as_ref(); - let in_file = File::open(path)?; + let in_file = File::open(path) + .with_context(|| format!("Reading PDG file from {}", path.display()))?; cfg_if! { if #[cfg(feature = "binenc")] { let read = bincode::deserialize_from( From 59fca3515127dfe4041be27cbe1b9e6e74635c20 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 26 Jun 2024 20:32:48 -0400 Subject: [PATCH 78/95] Whoops --- crates/paralegal-flow/src/test_utils.rs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 490e454cc5..ce36922628 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -232,20 +232,11 @@ impl InlineTestBuilder { args.setup_logging(); rustc_utils::test_utils::CompileBuilder::new(&self.input) - .with_args( - [ - "--cfg", - "paralegal", - "-Zcrate-attr=feature(register_tool)", - "-Zcrate-attr=register_tool(paralegal_flow)", - ] - .into_iter() - .map(ToOwned::to_owned), - ) - .compile(move |result| { - let tcx = result.tcx; - let memo = crate::Callbacks::new(Box::leak(Box::new(args))); - let pdg = memo.run(tcx).unwrap(); + .with_args(PARALEGAL_RUSTC_FLAGS.iter().copied().map(ToOwned::to_owned)) + .compile(move |CompileResult { tcx }| { + let mut memo = Callbacks::new(Box::leak(Box::new(args))); + memo.persist_metadata = false; + let pdg = memo.run_compilation(tcx).unwrap().unwrap(); let graph = PreFrg::from_description(pdg); let cref = graph.ctrl(&self.ctrl_name); check(cref) From 2d51cc68b41463cf487a5a0ab5a4750d58a9711a Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 28 Jun 2024 12:51:00 -0400 Subject: [PATCH 79/95] Fix parentable_{srcs|dsts} --- .../src/construct.rs | 6 ++-- .../flowistry_pdg_construction/src/graph.rs | 32 ++++++++++++----- .../src/local_analysis.rs | 2 +- .../tests/call_chain_analysis_tests.rs | 34 ++++++++++++++----- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 2c5f90c3d0..7558c700e5 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -587,10 +587,12 @@ impl<'tcx> PartialGraph<'tcx> { trace!("Child graph has generics {:?}", child_descriptor.generics); + let is_root = |n: CallString| n.len() == 2; + // For each source node CHILD that is parentable to PLACE, // add an edge from PLACE -> CHILD. trace!("PARENT -> CHILD EDGES:"); - for (child_src, _kind) in child_graph.parentable_srcs() { + for (child_src, _kind) in child_graph.parentable_srcs(is_root) { if let Some(parent_place) = calling_convention.translate_to_parent( child_src.place, constructor.async_info(), @@ -619,7 +621,7 @@ impl<'tcx> PartialGraph<'tcx> { // PRECISION TODO: for a given child place, we only want to connect // the *last* nodes in the child function to the parent, not *all* of them. trace!("CHILD -> PARENT EDGES:"); - for (child_dst, kind) in child_graph.parentable_dsts() { + for (child_dst, kind) in child_graph.parentable_dsts(is_root) { if let Some(parent_place) = calling_convention.translate_to_parent( child_dst.place, constructor.async_info(), diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 28fd2e65dc..047cc35a1c 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -10,6 +10,7 @@ use std::{ use flowistry_pdg::{CallString, GlobalLocation}; use internment::Intern; use petgraph::{dot, graph::DiGraph}; + use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hir::def_id::{DefId, DefIndex}; use rustc_index::IndexVec; @@ -340,28 +341,43 @@ impl<'tcx> PartialGraph<'tcx> { } } + /// Returns the set of source places that the parent can access (write to) + /// + /// Parameterized by a `is_at_root` function which returns whether a given + /// call string refers to a location in the outermost function. This is + /// necessary, because consumers of [`PartialGraph`] manipulate the call + /// string and as such we cannot assume that `.len() == 1` necessarily refers + /// to a root location. (TODO we probably should maintain that invariant) pub(crate) fn parentable_srcs<'a>( &'a self, - ) -> impl Iterator, Option)> + 'a { + is_at_root: impl Fn(CallString) -> bool, + ) -> FxHashSet<(DepNode<'tcx>, Option)> { self.edges .iter() .map(|(src, _, _)| *src) - .filter(|n| n.at.len() == 1 && n.at.leaf().location.is_start()) + .filter(|n| is_at_root(n.at) && n.at.leaf().location.is_start()) .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) - .collect::>() - .into_iter() + .collect() } + /// Returns the set of destination places that the parent can access (read + /// from) + /// + /// Parameterized by a `is_at_root` function which returns whether a given + /// call string refers to a location in the outermost function. This is + /// necessary, because consumers of [`PartialGraph`] manipulate the call + /// string and as such we cannot assume that `.len() == 1` necessarily refers + /// to a root location. (TODO we probably should maintain that invariant) pub(crate) fn parentable_dsts<'a>( &'a self, - ) -> impl Iterator, Option)> + 'a { + is_at_root: impl Fn(CallString) -> bool, + ) -> FxHashSet<(DepNode<'tcx>, Option)> { self.edges .iter() .map(|(_, dst, _)| *dst) - .filter(|n| n.at.len() == 1 && n.at.leaf().location.is_end()) + .filter(|n| is_at_root(n.at) && n.at.leaf().location.is_end()) .filter_map(move |a| Some((a, as_arg(&a, self.def_id, self.arg_count)?))) - .collect::>() - .into_iter() + .collect() } } diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index efa131225a..9108831c3f 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -540,7 +540,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } }; - let parentable_dsts = child_constructor.parentable_dsts(); + let parentable_dsts = child_constructor.parentable_dsts(|n| n.len() == 1); let parent_body = &self.body; // For each destination node CHILD that is parentable to PLACE, diff --git a/crates/paralegal-flow/tests/call_chain_analysis_tests.rs b/crates/paralegal-flow/tests/call_chain_analysis_tests.rs index 91f38fb74b..cee4f5f449 100644 --- a/crates/paralegal-flow/tests/call_chain_analysis_tests.rs +++ b/crates/paralegal-flow/tests/call_chain_analysis_tests.rs @@ -28,16 +28,32 @@ define_test!(without_return: ctrl -> { assert!(src.output().flows_to_data(&dest)); }); -define_test!(with_return: ctrl -> { - let src_fn = ctrl.function("source"); - let src = ctrl.call_site(&src_fn); - let ctrl = ctrl.ctrl("with_return"); - let dest_fn = ctrl.function("receiver"); - let dest_sink = ctrl.call_site(&dest_fn); - let dest = dest_sink.input().nth(0).unwrap(); +#[test] +fn with_return() { + InlineTestBuilder::new(stringify!( + #[paralegal_flow::marker(hello, return)] + fn source() -> i32 { + 0 + } + fn callee(x: i32) -> i32 { + source() + } + #[paralegal_flow::marker(there, arguments = [0])] + fn receiver(x: i32) {} - assert!(src.output().flows_to_data(&dest)); -}); + fn main(x: i32) { + receiver(callee(x)); + } + )).check(|ctrl| { + let src_fn = ctrl.function("source"); + let src = ctrl.call_site(&src_fn); + let dest_fn = ctrl.function("receiver"); + let dest_sink = ctrl.call_site(&dest_fn); + let dest = dest_sink.input().nth(0).unwrap(); + + assert!(src.output().flows_to_data(&dest)); + }) +} define_test!(on_mut_var: ctrl -> { let src_fn = ctrl.function("source"); From a0131da368a77acf9efa501531c782fc0c2a5ba9 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 28 Jun 2024 13:33:06 -0400 Subject: [PATCH 80/95] Allow dyn Any as wildcard --- crates/flowistry_pdg_construction/src/utils.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 3bc96c22e6..3e2556829c 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -13,8 +13,8 @@ use rustc_middle::{ StatementKind, Terminator, TerminatorKind, }, ty::{ - self, BoundVariableKind, EarlyBinder, GenericArg, GenericArgKind, GenericArgsRef, Instance, - List, ParamEnv, Region, Ty, TyCtxt, TyKind, + self, Binder, BoundVariableKind, EarlyBinder, GenericArg, GenericArgKind, GenericArgsRef, + Instance, List, ParamEnv, Region, Ty, TyCtxt, TyKind, }, }; @@ -269,6 +269,14 @@ fn is_wildcard(t: &TyKind<'_>) -> bool { matches!( t, TyKind::Param(..) | TyKind::Alias(..) | TyKind::Bound(..) | TyKind::Placeholder(..) + ) || matches!(t, + TyKind::Dynamic(pred, _, _) if matches!( + pred.first().copied().and_then(Binder::no_bound_vars), + Some(ty::ExistentialPredicate::Trait(tref)) + if tref.def_id == ty::tls::with(|tcx| tcx + .get_diagnostic_item(rustc_span::sym::Any) + .expect("The `Any` item is not defined.")) + ) ) } From 5a9912b104f337a4baa31f32d557087f45bfae1d Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Fri, 28 Jun 2024 13:35:18 -0400 Subject: [PATCH 81/95] Formatting --- crates/flowistry_pdg_construction/src/async_support.rs | 2 +- crates/flowistry_pdg_construction/src/construct.rs | 10 ++++------ crates/flowistry_pdg_construction/src/graph.rs | 4 ++-- .../flowistry_pdg_construction/src/local_analysis.rs | 2 +- crates/paralegal-flow/src/ana/metadata.rs | 4 ++-- crates/paralegal-flow/src/test_utils.rs | 3 +-- .../paralegal-flow/tests/call_chain_analysis_tests.rs | 3 ++- crates/paralegal-flow/tests/clone-test.rs | 2 +- crates/paralegal-policy/src/context.rs | 3 +-- crates/paralegal-policy/tests/helpers/mod.rs | 2 +- 10 files changed, 16 insertions(+), 19 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 2298516626..6ab7da151d 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -1,7 +1,7 @@ use std::{fmt::Display, rc::Rc}; use either::Either; -use flowistry_pdg::{CallString, GlobalLocation}; + use itertools::Itertools; use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::{DefId, LocalDefId}; diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 7558c700e5..371aecd8ab 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -8,7 +8,7 @@ //! [`InstructionState`] at each instruction in the procedure. //! 2. [`PartialGraph`] implements [`ResultsVisitor`] over the analysis result -use std::{borrow::Cow, fmt::Display, rc::Rc}; +use std::{fmt::Display, rc::Rc}; use anyhow::anyhow; use either::Either; @@ -26,13 +26,11 @@ use rustc_middle::{ mir::{ visit::Visitor, AggregateKind, Location, Operand, Place, Rvalue, Terminator, TerminatorKind, }, - ty::{ - normalize_erasing_regions::NormalizationError, GenericArgsRef, Instance, ParamEnv, TyCtxt, - }, + ty::{GenericArgsRef, Instance, TyCtxt}, }; use rustc_mir_dataflow::{AnalysisDomain, Results, ResultsVisitor}; use rustc_span::Span; -use rustc_utils::{cache::Cache, mir::borrowck_facts}; +use rustc_utils::cache::Cache; use crate::{ async_support::*, @@ -41,7 +39,7 @@ use crate::{ }, local_analysis::{CallHandling, InstructionState, LocalAnalysis}, mutation::{ModularMutationVisitor, Mutation, Time}, - utils::{manufacture_substs_for, try_monomorphize, try_resolve_function}, + utils::{manufacture_substs_for, try_resolve_function}, CallChangeCallback, GraphLoader, }; diff --git a/crates/flowistry_pdg_construction/src/graph.rs b/crates/flowistry_pdg_construction/src/graph.rs index 047cc35a1c..7275eaba95 100644 --- a/crates/flowistry_pdg_construction/src/graph.rs +++ b/crates/flowistry_pdg_construction/src/graph.rs @@ -16,7 +16,7 @@ use rustc_hir::def_id::{DefId, DefIndex}; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; use rustc_middle::{ - mir::{Body, HasLocalDecls, Local, LocalDecl, LocalDecls, Location, Place}, + mir::{Body, HasLocalDecls, Local, LocalDecl, LocalDecls, Place}, ty::{GenericArgsRef, TyCtxt}, }; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; @@ -26,7 +26,7 @@ use rustc_utils::PlaceExt; pub use flowistry_pdg::{RichLocation, SourceUse, TargetUse}; use serde::{Deserialize, Serialize}; -use crate::{construct::Error, utils::Captures, AsyncType}; +use crate::{construct::Error, utils::Captures}; /// A node in the program dependency graph. /// diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 9108831c3f..b44886d01e 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -31,7 +31,7 @@ use crate::{ construct::{Error, WithConstructionErrors}, graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, - utils::{self, is_async, is_non_default_trait_method, try_monomorphize, SimpleTyEquiv}, + utils::{self, is_async, is_non_default_trait_method, try_monomorphize}, CallChangeCallback, CallChanges, CallInfo, MemoPdgConstructor, SkipCall, }; diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 047f4fe566..3dd3f86a90 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -16,8 +16,8 @@ use std::{fs::File, io::Read, rc::Rc}; use construct::determine_async; use flowistry_pdg_construction::{ - self as construct, default_emit_error, graph::InternedString, AsyncType, DepGraph, - EmittableError, GraphLoader, MemoPdgConstructor, PartialGraph, + self as construct, default_emit_error, graph::InternedString, AsyncType, EmittableError, + GraphLoader, MemoPdgConstructor, PartialGraph, }; use rustc_hash::FxHashMap; diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index ce36922628..8a97fc24b5 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -5,7 +5,6 @@ extern crate rustc_middle; extern crate rustc_span; use crate::{ - args::{Args, ClapArgs}, desc::{Identifier, ProgramDescription}, utils::Print, Callbacks, HashSet, PARALEGAL_RUSTC_FLAGS, @@ -27,7 +26,7 @@ use petgraph::visit::{Control, Data, DfsEvent, EdgeRef, FilterEdge, GraphBase, I use petgraph::visit::{IntoNeighbors, IntoNodeReferences}; use petgraph::visit::{NodeRef as _, Visitable}; use petgraph::Direction; -use rustc_utils::test_utils::{CompileResult, DUMMY_MOD_NAME}; +use rustc_utils::test_utils::CompileResult; use std::path::Path; lazy_static! { diff --git a/crates/paralegal-flow/tests/call_chain_analysis_tests.rs b/crates/paralegal-flow/tests/call_chain_analysis_tests.rs index cee4f5f449..70f915f161 100644 --- a/crates/paralegal-flow/tests/call_chain_analysis_tests.rs +++ b/crates/paralegal-flow/tests/call_chain_analysis_tests.rs @@ -44,7 +44,8 @@ fn with_return() { fn main(x: i32) { receiver(callee(x)); } - )).check(|ctrl| { + )) + .check(|ctrl| { let src_fn = ctrl.function("source"); let src = ctrl.call_site(&src_fn); let dest_fn = ctrl.function("receiver"); diff --git a/crates/paralegal-flow/tests/clone-test.rs b/crates/paralegal-flow/tests/clone-test.rs index 2523d32829..faf0764b89 100644 --- a/crates/paralegal-flow/tests/clone-test.rs +++ b/crates/paralegal-flow/tests/clone-test.rs @@ -26,7 +26,7 @@ fn clone_nesting() { let v2 = Opt::Filled(AnEnum::Var1(0)).clone(); } )) - .check(|ctr| {}) + .check(|_ctr| {}) } #[test] diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 55cec4379e..f7c5195309 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -5,7 +5,6 @@ use std::time::{Duration, Instant}; use std::vec; use std::{io::Write, process::exit, sync::Arc}; -use paralegal_spdg::rustc_portable::defid_as_local; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; use paralegal_spdg::traverse::{generic_flows_to, EdgeSelection}; use paralegal_spdg::{ @@ -628,7 +627,7 @@ impl Context { self.desc .def_info .iter() - .filter(|(did, _)| self.desc.analyzed_spans.contains_key(&did)) + .filter(|(did, _)| self.desc.analyzed_spans.contains_key(did)) .map(|(_, i)| (&i.src_info, matches!(i.kind, DefKind::Type))) }) .into_iter() diff --git a/crates/paralegal-policy/tests/helpers/mod.rs b/crates/paralegal-policy/tests/helpers/mod.rs index cffb4c13cd..00dad610c3 100644 --- a/crates/paralegal-policy/tests/helpers/mod.rs +++ b/crates/paralegal-policy/tests/helpers/mod.rs @@ -8,7 +8,7 @@ use std::{ path::{Path, PathBuf}, process::Command, sync::Arc, - time::{Duration, Instant, SystemTime, SystemTimeError, UNIX_EPOCH}, + time::{SystemTime, UNIX_EPOCH}, }; use anyhow::anyhow; From fa0575704baf378e0c20e12615a345e1a371c315 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sun, 30 Jun 2024 13:10:35 -0400 Subject: [PATCH 82/95] Allow custom output for context --- crates/paralegal-policy/src/context.rs | 4 ++-- crates/paralegal-policy/src/lib.rs | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index f7c5195309..d758427ab8 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -258,8 +258,8 @@ impl Context { } /// Dispatch and drain all queued diagnostics without aborting the program. - pub fn emit_diagnostics(&self, w: impl Write) -> std::io::Result { - self.diagnostics.emit(w) + pub fn emit_diagnostics(&self) -> std::io::Result { + self.diagnostics.emit((self.config.get_output_writer)()) } /// Returns all nodes that are in any of the PDGs diff --git a/crates/paralegal-policy/src/lib.rs b/crates/paralegal-policy/src/lib.rs index be8729c0a8..6cf05e1ff7 100644 --- a/crates/paralegal-policy/src/lib.rs +++ b/crates/paralegal-policy/src/lib.rs @@ -245,7 +245,7 @@ impl GraphLocation { let start = Instant::now(); let result = prop(ctx.clone())?; - let success = ctx.emit_diagnostics(std::io::stdout())?; + let success = ctx.emit_diagnostics()?; Ok(PolicyReturn { success, result, @@ -283,6 +283,12 @@ pub struct Config { /// Whether tho precompute an index for `flows_to` queries with /// `EdgeSelection::Data` or whether to use a new DFS every time. pub use_flows_to_index: bool, + /// Where to write output to + pub get_output_writer: fn() -> Box, +} + +fn default_output() -> Box { + Box::new(std::io::stdout()) } impl Default for Config { @@ -290,6 +296,7 @@ impl Default for Config { Config { always_happens_before_tracing: algo::ahb::TraceLevel::StartAndEnd, use_flows_to_index: false, + get_output_writer: default_output, } } } From 5b3999bc9a637ac7d7b7fd310b73151543c76071 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sun, 30 Jun 2024 13:51:29 -0400 Subject: [PATCH 83/95] Adjust output mechanism --- crates/paralegal-policy/src/algo/ahb.rs | 2 +- crates/paralegal-policy/src/context.rs | 9 +++++---- crates/paralegal-policy/src/lib.rs | 11 ++--------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/crates/paralegal-policy/src/algo/ahb.rs b/crates/paralegal-policy/src/algo/ahb.rs index ad8c32f8ff..9ca762bf4e 100644 --- a/crates/paralegal-policy/src/algo/ahb.rs +++ b/crates/paralegal-policy/src/algo/ahb.rs @@ -143,7 +143,7 @@ impl crate::Context { .map(|i| (i.controller_id(), i.local_node())) .into_group_map(); - let mut trace = Trace::new(self.config.always_happens_before_tracing); + let mut trace = Trace::new(self.config.lock().unwrap().always_happens_before_tracing); let select_data = |e: <&SPDGImpl as IntoEdgeReferences>::EdgeRef| e.weight().is_data(); diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index d758427ab8..a82357e858 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -1,6 +1,7 @@ use std::collections::BTreeMap; use std::fs::File; use std::io::BufReader; +use std::sync::Mutex; use std::time::{Duration, Instant}; use std::vec; use std::{io::Write, process::exit, sync::Arc}; @@ -99,14 +100,13 @@ fn bfs_iter< /// [`Self::emit_diagnostics`]. If you used /// [`super::GraphLocation::with_context`] this will be done automatically for /// you. -#[derive(Debug)] pub struct Context { marker_to_ids: MarkerIndex, desc: ProgramDescription, flows_to: Option, pub(crate) diagnostics: DiagnosticsRecorder, name_map: HashMap>, - pub(crate) config: Arc, + pub(crate) config: Arc>, pub(crate) stats: ContextStats, } @@ -142,7 +142,7 @@ impl Context { flows_to, diagnostics: Default::default(), name_map, - config: Arc::new(config), + config: Arc::new(Mutex::new(config)), stats: ContextStats { pdg_construction: None, precomputation: start.elapsed(), @@ -259,7 +259,8 @@ impl Context { /// Dispatch and drain all queued diagnostics without aborting the program. pub fn emit_diagnostics(&self) -> std::io::Result { - self.diagnostics.emit((self.config.get_output_writer)()) + self.diagnostics + .emit(&mut self.config.lock().unwrap().output_writer) } /// Returns all nodes that are in any of the PDGs diff --git a/crates/paralegal-policy/src/lib.rs b/crates/paralegal-policy/src/lib.rs index 6cf05e1ff7..c064dc2bc9 100644 --- a/crates/paralegal-policy/src/lib.rs +++ b/crates/paralegal-policy/src/lib.rs @@ -49,8 +49,6 @@ #![warn(missing_docs)] -extern crate core; - use anyhow::{ensure, Result}; pub use paralegal_spdg; use paralegal_spdg::utils::TruncatedHumanTime; @@ -276,7 +274,6 @@ impl GraphLocation { } /// Configuration for the framework -#[derive(Clone, Debug)] pub struct Config { /// How much information to retain for error messages in `always_happens_before` pub always_happens_before_tracing: algo::ahb::TraceLevel, @@ -284,11 +281,7 @@ pub struct Config { /// `EdgeSelection::Data` or whether to use a new DFS every time. pub use_flows_to_index: bool, /// Where to write output to - pub get_output_writer: fn() -> Box, -} - -fn default_output() -> Box { - Box::new(std::io::stdout()) + pub output_writer: Box, } impl Default for Config { @@ -296,7 +289,7 @@ impl Default for Config { Config { always_happens_before_tracing: algo::ahb::TraceLevel::StartAndEnd, use_flows_to_index: false, - get_output_writer: default_output, + output_writer: Box::new(std::io::stdout()), } } } From 6433d4392f0c8cbcf5b7e11d7c315fc9f2f8e029 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Sun, 30 Jun 2024 14:22:14 -0400 Subject: [PATCH 84/95] Clippy fixes --- .../src/calling_convention.rs | 1 + crates/flowistry_pdg_construction/src/utils.rs | 6 +++--- crates/paralegal-flow/src/ana/metadata.rs | 11 ++++------- crates/paralegal-flow/src/ana/mod.rs | 4 ---- crates/paralegal-flow/src/lib.rs | 4 ++-- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/crates/flowistry_pdg_construction/src/calling_convention.rs b/crates/flowistry_pdg_construction/src/calling_convention.rs index c00b5cb729..f86b4cbc97 100644 --- a/crates/flowistry_pdg_construction/src/calling_convention.rs +++ b/crates/flowistry_pdg_construction/src/calling_convention.rs @@ -33,6 +33,7 @@ impl<'tcx, 'a> CallingConvention<'tcx, 'a> { } } + #[allow(clippy::too_many_arguments)] pub(crate) fn translate_to_parent( &self, child: Place<'tcx>, diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 3e2556829c..97631b8a9d 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -181,7 +181,7 @@ impl<'tcx> SimpleTyEquiv for Ty<'tcx> { } } -impl<'tcx, T: SimpleTyEquiv> SimpleTyEquiv for [T] { +impl SimpleTyEquiv for [T] { fn equiv(&self, other: &Self) -> bool { self.iter().zip(other.iter()).all(|(a, b)| a.equiv(b)) } @@ -397,8 +397,8 @@ pub fn manufacture_substs_for( function: DefId, ) -> Result<&List>, Error> { use rustc_middle::ty::{ - Binder, BoundRegionKind, DynKind, ExistentialPredicate, ExistentialProjection, - ExistentialTraitRef, GenericParamDefKind, ImplPolarity, ParamTy, Region, TraitPredicate, + BoundRegionKind, DynKind, ExistentialPredicate, ExistentialProjection, ExistentialTraitRef, + GenericParamDefKind, ImplPolarity, ParamTy, TraitPredicate, }; trace!("Manufacturing for {function:?}"); diff --git a/crates/paralegal-flow/src/ana/metadata.rs b/crates/paralegal-flow/src/ana/metadata.rs index 3dd3f86a90..85df051779 100644 --- a/crates/paralegal-flow/src/ana/metadata.rs +++ b/crates/paralegal-flow/src/ana/metadata.rs @@ -124,7 +124,7 @@ impl<'tcx> MetadataLoader<'tcx> { self: Rc, args: &'static Args, path: Option>, - ) -> (Vec, MarkerCtx<'tcx>, MemoPdgConstructor<'tcx>) { + ) -> (Vec, MarkerCtx<'tcx>) { let tcx = self.tcx; let mut collector = CollectingVisitor::new(tcx, args, self.clone()); collector.run(); @@ -139,10 +139,7 @@ impl<'tcx> MetadataLoader<'tcx> { .with_dump_mir(args.dbg().dump_mir()); let pdgs = emit_targets .into_iter() - .filter_map(|t| { - // if tcx.def_path_str(t) != "::clone" { - // return None; - // } + .map(|t| { println!("Constructing for {:?}", tcx.def_path_str(t)); let graph = constructor.construct_root(t).map(|graph| { let body = borrowck_facts::get_body_with_borrowck_facts(tcx, t); @@ -160,7 +157,7 @@ impl<'tcx> MetadataLoader<'tcx> { async_status, } }); - Some((t.local_def_index, graph)) + (t.local_def_index, graph) }) .collect::>(); let meta = Metadata::from_pdgs(tcx, pdgs, marker_ctx.db()); @@ -170,7 +167,7 @@ impl<'tcx> MetadataLoader<'tcx> { meta.write(path, tcx); } self.cache.get(LOCAL_CRATE, |_| Some(meta)); - (collector.functions_to_analyze, marker_ctx, constructor) + (collector.functions_to_analyze, marker_ctx) } pub fn get_annotations(&self, key: DefId) -> &[Annotation] { diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 8468be1936..65006accaf 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -16,7 +16,6 @@ use crate::{ use anyhow::Result; -use flowistry_pdg_construction::MemoPdgConstructor; use itertools::Itertools; use petgraph::visit::GraphBase; @@ -41,7 +40,6 @@ pub struct SPDGGenerator<'tcx> { pub opts: &'static crate::Args, pub tcx: TyCtxt<'tcx>, marker_ctx: MarkerCtx<'tcx>, - constructor: MemoPdgConstructor<'tcx>, metadata_loader: Rc>, } @@ -50,7 +48,6 @@ impl<'tcx> SPDGGenerator<'tcx> { marker_ctx: MarkerCtx<'tcx>, opts: &'static crate::Args, tcx: TyCtxt<'tcx>, - constructor: MemoPdgConstructor<'tcx>, metadata_loader: Rc>, ) -> Self { Self { @@ -58,7 +55,6 @@ impl<'tcx> SPDGGenerator<'tcx> { opts, tcx, metadata_loader, - constructor, } } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index eb3ebe0d25..a3ec539ffd 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -179,7 +179,7 @@ impl Callbacks { let loader = MetadataLoader::new(tcx); - let (analysis_targets, mctx, constructor) = loader.clone().collect_and_emit_metadata( + let (analysis_targets, mctx) = loader.clone().collect_and_emit_metadata( self.opts, self.persist_metadata .then(|| intermediate_out_file_path(tcx)) @@ -187,7 +187,7 @@ impl Callbacks { ); tcx.sess.abort_if_errors(); - let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, constructor, loader); + let mut gen = SPDGGenerator::new(mctx, self.opts, tcx, loader); (!analysis_targets.is_empty()) .then(|| gen.analyze(analysis_targets)) From 1a580545dcd42de3227dbdfb1dfa864203cc12d6 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 1 Jul 2024 14:53:53 -0400 Subject: [PATCH 85/95] Test case for markers on return nodes when control flow is present --- .../paralegal-flow/src/ana/graph_converter.rs | 6 +- crates/paralegal-policy/tests/freedit.rs | 71 +++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index a93135c6f1..60952deb9d 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -193,7 +193,10 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { // // Also yikes. This should have better detection of whether // a place is (part of) a function return - let mut in_edges = graph.graph.edges_directed(old_node, Direction::Incoming); + let mut in_edges = graph + .graph + .edges_directed(old_node, Direction::Incoming) + .filter(|e| e.weight().kind == DepEdgeKind::Data); let needs_return_markers = in_edges.clone().next().is_none() || in_edges.any(|e| { let at = e.weight().at; @@ -497,6 +500,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { } } +#[cfg(debug_assertions)] fn assert_edge_location_invariant<'tcx>( tcx: TyCtxt<'tcx>, at: CallString, diff --git a/crates/paralegal-policy/tests/freedit.rs b/crates/paralegal-policy/tests/freedit.rs index 480c371ab2..1674ecfdff 100644 --- a/crates/paralegal-policy/tests/freedit.rs +++ b/crates/paralegal-policy/tests/freedit.rs @@ -263,3 +263,74 @@ fn markers_on_generic_calls() -> Result<()> { Ok(()) }) } + +#[test] +fn finding_utc_now() -> Result<()> { + let mut test = Test::new(stringify!( + use sled::Db; + use chrono::Utc; + use thiserror::Error; + + #[derive(Error, Debug)] + pub enum AppError { + #[error("Sled db error: {}", .0)] + SledError(#[from] sled::Error), + #[error(transparent)] + Utf8Error(#[from] std::str::Utf8Error), + } + + pub async fn clear_invalid(db: &Db, tree_name: &str) -> Result<(), AppError> { + // let tree = db.open_tree(tree_name)?; + // for i in tree.iter() { + // let (k, _) = i?; + // let k_str = std::str::from_utf8(&k)?; + // let time_stamp = k_str + // .split_once('_') + // .and_then(|s| i64::from_str_radix(s.0, 16).ok()); + let time_stamp = Some(0_i64); + if let Some(time_stamp) = time_stamp { + if time_stamp < Utc::now().timestamp() { + panic!() + //tree.remove(k)?; + } + } + //} + Ok(()) + } + + #[paralegal::analyze] + pub async fn user_chron_job() -> ! { + let db = sled::Config::default().open().unwrap(); + loop { + //sleep_seconds(600).await; + clear_invalid(&db, "dummy").await.unwrap() + //sleep_seconds(3600 * 4).await; + } + } + ))?; + test.with_external_annotations( + " + [[\"chrono::Utc::now\"]] + marker = \"time\" + on_return = true + ", + ) + .with_dep([ + "chrono@0.4.38", + "--no-default-features", + "--features", + "clock", + ]) + .with_dep(["sled@0.34.7"]) + .with_dep(["thiserror@1"]); + test.run(|ctx| { + assert_error!( + ctx, + ctx.marked_nodes(Identifier::new_intern("time")) + .next() + .is_some(), + "No time found" + ); + Ok(()) + }) +} From a9c4ed2e05b7c7689b03b481c0db35af2fc25c2c Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Mon, 1 Jul 2024 15:25:36 -0400 Subject: [PATCH 86/95] Enable all policy framework tests --- Makefile.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.toml b/Makefile.toml index ee1c7748ec..6b00347eab 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -74,7 +74,7 @@ command = "cargo" [tasks.policy-framework-tests] description = "Tests related to the correctness of the policy framework." command = "cargo" -args = ["test", "-p", "paralegal-policy", "--lib"] +args = ["test", "-p", "paralegal-policy"] [tasks.guide-project] description = "Build and run the policy from the guide." From e21d3b9f0d1284ae2dc3b792b7a60fa4722005b4 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Tue, 2 Jul 2024 21:01:59 +0000 Subject: [PATCH 87/95] Use a clean version --- crates/paralegal-flow/build.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/paralegal-flow/build.rs b/crates/paralegal-flow/build.rs index 7fb9ee8ce6..5e3933e0ea 100644 --- a/crates/paralegal-flow/build.rs +++ b/crates/paralegal-flow/build.rs @@ -1,3 +1,5 @@ +#![feature(string_remove_matches)] + use std::path::PathBuf; use std::process::Command; extern crate chrono; @@ -71,8 +73,10 @@ fn main() { .arg("--version") .output() .unwrap(); + let mut version_str = + String::from_utf8(rustc_version.stdout).unwrap(); + version_str.remove_matches('\n'); println!( - "cargo:rustc-env=RUSTC_VERSION=\"{}\"", - String::from_utf8(rustc_version.stdout).unwrap() + "cargo:rustc-env=RUSTC_VERSION={}", version_str, ); } From 4385ce94154b0dba63c8efcb918ccf57de7c3d76 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 11 Jul 2024 16:24:18 +0000 Subject: [PATCH 88/95] Mimic rustc version --- Cargo.lock | 8 ++++---- Cargo.toml | 6 ++++-- crates/paralegal-flow/src/args.rs | 23 +++++++++++++++++------ crates/paralegal-flow/src/lib.rs | 6 +++++- 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3802f32de6..317068f51d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1113,12 +1113,12 @@ name = "rustc_plugin" version = "0.7.4-nightly-2023-08-25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1348edfa020dbe4807a4d99272332dadcbbedff6b587accb95faefe20d2c7129" -replace = "rustc_plugin 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672)" +replace = "rustc_plugin 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=dd382b79fc12ee86bc774c290a00bda32a0d54db)" [[package]] name = "rustc_plugin" version = "0.7.4-nightly-2023-08-25" -source = "git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672#d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" +source = "git+https://github.com/JustusAdam/rustc_plugin?rev=dd382b79fc12ee86bc774c290a00bda32a0d54db#dd382b79fc12ee86bc774c290a00bda32a0d54db" dependencies = [ "cargo_metadata", "log", @@ -1139,12 +1139,12 @@ name = "rustc_utils" version = "0.7.4-nightly-2023-08-25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09428c7086894369685cca54a516acc0f0ab6d0e5a628c094ba83bfddaf1aedf" -replace = "rustc_utils 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672)" +replace = "rustc_utils 0.7.4-nightly-2023-08-25 (git+https://github.com/JustusAdam/rustc_plugin?rev=dd382b79fc12ee86bc774c290a00bda32a0d54db)" [[package]] name = "rustc_utils" version = "0.7.4-nightly-2023-08-25" -source = "git+https://github.com/JustusAdam/rustc_plugin?rev=d4fefb5c0344cdf4812b4877d5b03cb19a2c4672#d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" +source = "git+https://github.com/JustusAdam/rustc_plugin?rev=dd382b79fc12ee86bc774c290a00bda32a0d54db#dd382b79fc12ee86bc774c290a00bda32a0d54db" dependencies = [ "anyhow", "cfg-if", diff --git a/Cargo.toml b/Cargo.toml index 0647fdf6fe..a02f24432a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,9 +31,11 @@ debug = true # "rustc_plugin:0.7.4-nightly-2023-08-25" = { path = "../rustc_plugin/crates/rustc_plugin" } [replace."rustc_utils:0.7.4-nightly-2023-08-25"] +# path = "../rustc_plugin/crates/rustc_utils" git = "https://github.com/JustusAdam/rustc_plugin" -rev = "d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" +rev = "dd382b79fc12ee86bc774c290a00bda32a0d54db" [replace."rustc_plugin:0.7.4-nightly-2023-08-25"] +# path = "../rustc_plugin/crates/rustc_plugin" git = "https://github.com/JustusAdam/rustc_plugin" -rev = "d4fefb5c0344cdf4812b4877d5b03cb19a2c4672" +rev = "dd382b79fc12ee86bc774c290a00bda32a0d54db" diff --git a/crates/paralegal-flow/src/args.rs b/crates/paralegal-flow/src/args.rs index 38359ebf2e..de905bd343 100644 --- a/crates/paralegal-flow/src/args.rs +++ b/crates/paralegal-flow/src/args.rs @@ -66,17 +66,12 @@ impl TryFrom for Args { .iter() .flat_map(|s| s.split(',').map(ToOwned::to_owned)) .collect(); + let build_config = get_build_config()?; if let Some(from_env) = env_var_expect_unicode("PARALEGAL_ANALYZE")? { anactrl .analyze .extend(from_env.split(',').map(ToOwned::to_owned)); } - let build_config_file = std::path::Path::new("Paralegal.toml"); - let build_config = if build_config_file.exists() { - toml::from_str(&std::fs::read_to_string(build_config_file)?)? - } else { - Default::default() - }; let log_level_config = match debug_target { Some(target) if !target.is_empty() => LogLevelConfig::Targeted(target), _ => LogLevelConfig::Disabled, @@ -107,6 +102,15 @@ impl TryFrom for Args { } } +pub fn get_build_config() -> Result { + let build_config_file = std::path::Path::new("Paralegal.toml"); + Ok(if build_config_file.exists() { + toml::from_str(&std::fs::read_to_string(build_config_file)?)? + } else { + Default::default() + }) +} + #[derive(serde::Serialize, serde::Deserialize, clap::ValueEnum, Clone, Copy)] pub enum Debugger { /// The CodeLLDB debugger. Learn more at . @@ -544,4 +548,11 @@ pub struct DepConfig { pub struct BuildConfig { /// Dependency specific configuration pub dep: crate::HashMap, + /// Overrides what is reported if this tool is called like `rustc + /// --version`. This is sometimes needed when crates attempt to detect the + /// rust version being used. + /// + /// Set this to "inherent" to use the rustc version that paralegal will be + /// using internally. + pub imitate_compiler: Option, } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index a3ec539ffd..f7b7f91d64 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -81,7 +81,7 @@ pub use paralegal_spdg as desc; pub use crate::ann::db::MarkerCtx; use ana::{MetadataLoader, SPDGGenerator}; -use args::{AnalysisCtrl, Args, ClapArgs, Debugger, LogLevelConfig}; +use args::{get_build_config, AnalysisCtrl, Args, BuildConfig, ClapArgs, Debugger, LogLevelConfig}; use consts::INTERMEDIATE_ARTIFACT_EXT; use desc::utils::write_sep; use stats::{Stats, TimedStat}; @@ -273,6 +273,10 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { "paralegal-flow".into() } + fn reported_driver_version(&self) -> Cow<'static, str> { + env!("RUSTC_VERSION").into() + } + fn args( &self, _target_dir: &rustc_plugin::Utf8Path, From 7df232e001403ebdb7e6c34eab403af3e39e5480 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 11 Jul 2024 16:51:18 +0000 Subject: [PATCH 89/95] Format and clippy --- crates/paralegal-flow/build.rs | 7 ++----- crates/paralegal-flow/src/args.rs | 4 ++-- crates/paralegal-flow/src/lib.rs | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/crates/paralegal-flow/build.rs b/crates/paralegal-flow/build.rs index 5e3933e0ea..d4b1f305bb 100644 --- a/crates/paralegal-flow/build.rs +++ b/crates/paralegal-flow/build.rs @@ -73,10 +73,7 @@ fn main() { .arg("--version") .output() .unwrap(); - let mut version_str = - String::from_utf8(rustc_version.stdout).unwrap(); + let mut version_str = String::from_utf8(rustc_version.stdout).unwrap(); version_str.remove_matches('\n'); - println!( - "cargo:rustc-env=RUSTC_VERSION={}", version_str, - ); + println!("cargo:rustc-env=RUSTC_VERSION={}", version_str,); } diff --git a/crates/paralegal-flow/src/args.rs b/crates/paralegal-flow/src/args.rs index de905bd343..bfb6909188 100644 --- a/crates/paralegal-flow/src/args.rs +++ b/crates/paralegal-flow/src/args.rs @@ -550,8 +550,8 @@ pub struct BuildConfig { pub dep: crate::HashMap, /// Overrides what is reported if this tool is called like `rustc /// --version`. This is sometimes needed when crates attempt to detect the - /// rust version being used. - /// + /// rust version being used. + /// /// Set this to "inherent" to use the rustc version that paralegal will be /// using internally. pub imitate_compiler: Option, diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index f7b7f91d64..fca946cf54 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -81,7 +81,7 @@ pub use paralegal_spdg as desc; pub use crate::ann::db::MarkerCtx; use ana::{MetadataLoader, SPDGGenerator}; -use args::{get_build_config, AnalysisCtrl, Args, BuildConfig, ClapArgs, Debugger, LogLevelConfig}; +use args::{AnalysisCtrl, Args, ClapArgs, Debugger, LogLevelConfig}; use consts::INTERMEDIATE_ARTIFACT_EXT; use desc::utils::write_sep; use stats::{Stats, TimedStat}; From 879142641a8a45c327d7baf348dd158dffa2b31e Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 11 Jul 2024 22:37:23 +0000 Subject: [PATCH 90/95] Misc small changes --- crates/paralegal-flow/src/args.rs | 17 +++++++---------- crates/paralegal-flow/src/discover.rs | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/crates/paralegal-flow/src/args.rs b/crates/paralegal-flow/src/args.rs index bfb6909188..68c359b6a0 100644 --- a/crates/paralegal-flow/src/args.rs +++ b/crates/paralegal-flow/src/args.rs @@ -444,7 +444,7 @@ struct ClapAnalysisCtrl { unconstrained_depth: bool, } -#[derive(serde::Serialize, serde::Deserialize)] +#[derive(serde::Serialize, serde::Deserialize, Default)] pub struct AnalysisCtrl { /// Target this function as analysis target. Command line version of /// `#[paralegal::analyze]`). Must be a full rust path and resolve to a @@ -456,15 +456,6 @@ pub struct AnalysisCtrl { inlining_depth: InliningDepth, } -impl Default for AnalysisCtrl { - fn default() -> Self { - Self { - analyze: Vec::new(), - inlining_depth: InliningDepth::Adaptive, - } - } -} - impl TryFrom for AnalysisCtrl { type Error = Error; fn try_from(value: ClapAnalysisCtrl) -> Result { @@ -500,6 +491,12 @@ pub enum InliningDepth { Adaptive, } +impl Default for InliningDepth { + fn default() -> Self { + Self::Adaptive + } +} + impl AnalysisCtrl { /// Externally (via command line) selected analysis targets pub fn selected_targets(&self) -> &[String] { diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 77c3145bb5..fd9644f3f3 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -68,7 +68,7 @@ impl<'tcx> CollectingVisitor<'tcx> { if let Some(local) = def_id.as_local() { Some(FnToAnalyze { def_id: local, - name: tcx.opt_item_ident(def_id).unwrap(), + name: tcx.opt_item_ident(def_id).expect("analysis target does not have a name"), }) } else { tcx.sess.span_err(tcx.def_span(def_id), "found an external function as analysis target. Analysis targets are required to be local."); From 0d404e2e6d0ef5a86600b04c2534f00941afa827 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 17 Jul 2024 16:14:30 +0000 Subject: [PATCH 91/95] Test cases for async + generics interactions --- crates/paralegal-flow/tests/async-generics.rs | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 crates/paralegal-flow/tests/async-generics.rs diff --git a/crates/paralegal-flow/tests/async-generics.rs b/crates/paralegal-flow/tests/async-generics.rs new file mode 100644 index 0000000000..01a6fce954 --- /dev/null +++ b/crates/paralegal-flow/tests/async-generics.rs @@ -0,0 +1,92 @@ +use paralegal_flow::test_utils::InlineTestBuilder; + +#[test] +fn await_on_generic() { + InlineTestBuilder::new(stringify!( + use std::{ + future::{Future}, + task::{Context, Poll}, + pin::Pin + }; + struct AFuture; + + impl Future for AFuture { + type Output = usize; + fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { + unimplemented!() + } + } + + trait Trait { + fn method(&mut self) -> AFuture; + } + + async fn main(mut t: T) -> usize { + t.method().await + } + )) + .check(|_ctrl| {}) +} + +#[test] +fn await_with_inner_generic() { + InlineTestBuilder::new(stringify!( + use std::{ + future::{Future}, + task::{Context, Poll}, + pin::Pin, + }; + struct AFuture<'a, T: ?Sized>(&'a mut T); + + impl<'a, T> Future for AFuture<'a, T> { + type Output = usize; + fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { + unimplemented!() + } + } + + trait Trait { + fn method(&mut self) -> AFuture<'_, Self> { + AFuture(self) + } + } + + async fn main(mut t: T) -> usize { + t.method().await + } + )) + .check(|_ctrl| {}) +} + +#[test] +fn await_with_inner_generic_wrapped() { + InlineTestBuilder::new(stringify!( + use std::{ + future::{Future}, + task::{Context, Poll}, + pin::Pin, + }; + struct AFuture<'a, T: ?Sized>(&'a mut T); + + impl<'a, T: Trait + Unpin + ?Sized> Future for AFuture<'a, T> { + type Output = usize; + fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { + unimplemented!() + } + } + + trait Trait: Send + Unpin + 'static { + fn method(&mut self) -> AFuture<'_, Self> + where + Self: Unpin + Sized, + { + AFuture(self) + } + } + + async fn main(mut t: T) -> usize { + t.method().await + } + )) + .check(|_ctrl| {}) +} From 88583b50af79fa52d644757a6df9ab4d7d729f2d Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 17 Jul 2024 16:17:09 +0000 Subject: [PATCH 92/95] Rename test case --- crates/paralegal-flow/tests/async-generics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/paralegal-flow/tests/async-generics.rs b/crates/paralegal-flow/tests/async-generics.rs index 01a6fce954..8aa34849db 100644 --- a/crates/paralegal-flow/tests/async-generics.rs +++ b/crates/paralegal-flow/tests/async-generics.rs @@ -59,7 +59,7 @@ fn await_with_inner_generic() { } #[test] -fn await_with_inner_generic_wrapped() { +fn await_with_inner_generic_constrained() { InlineTestBuilder::new(stringify!( use std::{ future::{Future}, From f8565cd8ad3b45c66a946e5a0c1a442ce77c4fd5 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 17 Jul 2024 16:58:56 +0000 Subject: [PATCH 93/95] Move it to other async test cases --- crates/paralegal-flow/tests/async-generics.rs | 92 ------------------- crates/paralegal-flow/tests/async_tests.rs | 92 +++++++++++++++++++ 2 files changed, 92 insertions(+), 92 deletions(-) delete mode 100644 crates/paralegal-flow/tests/async-generics.rs diff --git a/crates/paralegal-flow/tests/async-generics.rs b/crates/paralegal-flow/tests/async-generics.rs deleted file mode 100644 index 8aa34849db..0000000000 --- a/crates/paralegal-flow/tests/async-generics.rs +++ /dev/null @@ -1,92 +0,0 @@ -use paralegal_flow::test_utils::InlineTestBuilder; - -#[test] -fn await_on_generic() { - InlineTestBuilder::new(stringify!( - use std::{ - future::{Future}, - task::{Context, Poll}, - pin::Pin - }; - struct AFuture; - - impl Future for AFuture { - type Output = usize; - fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { - unimplemented!() - } - } - - trait Trait { - fn method(&mut self) -> AFuture; - } - - async fn main(mut t: T) -> usize { - t.method().await - } - )) - .check(|_ctrl| {}) -} - -#[test] -fn await_with_inner_generic() { - InlineTestBuilder::new(stringify!( - use std::{ - future::{Future}, - task::{Context, Poll}, - pin::Pin, - }; - struct AFuture<'a, T: ?Sized>(&'a mut T); - - impl<'a, T> Future for AFuture<'a, T> { - type Output = usize; - fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { - unimplemented!() - } - } - - trait Trait { - fn method(&mut self) -> AFuture<'_, Self> { - AFuture(self) - } - } - - async fn main(mut t: T) -> usize { - t.method().await - } - )) - .check(|_ctrl| {}) -} - -#[test] -fn await_with_inner_generic_constrained() { - InlineTestBuilder::new(stringify!( - use std::{ - future::{Future}, - task::{Context, Poll}, - pin::Pin, - }; - struct AFuture<'a, T: ?Sized>(&'a mut T); - - impl<'a, T: Trait + Unpin + ?Sized> Future for AFuture<'a, T> { - type Output = usize; - fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { - unimplemented!() - } - } - - trait Trait: Send + Unpin + 'static { - fn method(&mut self) -> AFuture<'_, Self> - where - Self: Unpin + Sized, - { - AFuture(self) - } - } - - async fn main(mut t: T) -> usize { - t.method().await - } - )) - .check(|_ctrl| {}) -} diff --git a/crates/paralegal-flow/tests/async_tests.rs b/crates/paralegal-flow/tests/async_tests.rs index 53131d287f..6363f31ced 100644 --- a/crates/paralegal-flow/tests/async_tests.rs +++ b/crates/paralegal-flow/tests/async_tests.rs @@ -259,3 +259,95 @@ define_test!(markers: graph -> { assert!(!output.is_empty()); assert!(input.flows_to_data(&output)); }); + +#[test] +fn await_on_generic() { + InlineTestBuilder::new(stringify!( + use std::{ + future::{Future}, + task::{Context, Poll}, + pin::Pin + }; + struct AFuture; + + impl Future for AFuture { + type Output = usize; + fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { + unimplemented!() + } + } + + trait Trait { + fn method(&mut self) -> AFuture; + } + + async fn main(mut t: T) -> usize { + t.method().await + } + )) + .check(|_ctrl| {}) +} + +#[test] +fn await_with_inner_generic() { + InlineTestBuilder::new(stringify!( + use std::{ + future::{Future}, + task::{Context, Poll}, + pin::Pin, + }; + struct AFuture<'a, T: ?Sized>(&'a mut T); + + impl<'a, T> Future for AFuture<'a, T> { + type Output = usize; + fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { + unimplemented!() + } + } + + trait Trait { + fn method(&mut self) -> AFuture<'_, Self> { + AFuture(self) + } + } + + async fn main(mut t: T) -> usize { + t.method().await + } + )) + .check(|_ctrl| {}) +} + +#[test] +#[ignore = "https://github.com/brownsys/paralegal/issues/159"] +fn await_with_inner_generic_constrained() { + InlineTestBuilder::new(stringify!( + use std::{ + future::{Future}, + task::{Context, Poll}, + pin::Pin, + }; + struct AFuture<'a, T: ?Sized>(&'a mut T); + + impl<'a, T: Trait + Unpin + ?Sized> Future for AFuture<'a, T> { + type Output = usize; + fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { + unimplemented!() + } + } + + trait Trait: Send + Unpin + 'static { + fn method(&mut self) -> AFuture<'_, Self> + where + Self: Unpin + Sized, + { + AFuture(self) + } + } + + async fn main(mut t: T) -> usize { + t.method().await + } + )) + .check(|_ctrl| {}) +} From 5ce9ea7a78edc95281a0a90889d7575408d625c9 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 17 Jul 2024 17:12:48 +0000 Subject: [PATCH 94/95] Another test case for async handling issues --- crates/paralegal-policy/tests/misc_async.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/crates/paralegal-policy/tests/misc_async.rs b/crates/paralegal-policy/tests/misc_async.rs index ede82774ea..7cc7a27f62 100644 --- a/crates/paralegal-policy/tests/misc_async.rs +++ b/crates/paralegal-policy/tests/misc_async.rs @@ -55,3 +55,20 @@ on_argument = [1] Ok(()) }) } + +#[test] +#[ignored = "https://github.com/brownsys/paralegal/issues/159"] +fn oneshot_channel() -> Result<()> { + let mut test = Test::new(stringify!( + #[paralegal::analyze] + async fn main() { + let (_, receiver) = tokio::sync::oneshot::channel(); + + receiver.await.unwrap() + } + ))?; + + test.with_dep(["tokio", "--features", "sync"]); + + test.run(|_ctx| Ok(())) +} From 504cb5872abe1a832278f47f78aafc628419e15a Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Wed, 17 Jul 2024 20:42:14 +0000 Subject: [PATCH 95/95] Indirect async test case --- crates/paralegal-flow/tests/async_tests.rs | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/crates/paralegal-flow/tests/async_tests.rs b/crates/paralegal-flow/tests/async_tests.rs index 6363f31ced..376c846ed1 100644 --- a/crates/paralegal-flow/tests/async_tests.rs +++ b/crates/paralegal-flow/tests/async_tests.rs @@ -351,3 +351,43 @@ fn await_with_inner_generic_constrained() { )) .check(|_ctrl| {}) } + +#[test] +fn async_through_another_layer() { + InlineTestBuilder::new(stringify!( + async fn maker(x: u32, y: u32) -> u32 { + x + } + + fn get_async(x: u32, y: u32) -> impl std::future::Future { + maker(y, x) + } + + #[paralegal_flow::marker(source, return)] + fn mark_source(t: T) -> T { + t + } + + #[paralegal_flow::marker(source_2, return)] + fn mark_source_2(t: T) -> T { + t + } + + #[paralegal_flow::marker(sink, arguments = [0])] + fn sink(t: T) {} + + async fn main() { + let src = mark_source(1); + let src2 = mark_source_2(2); + sink(get_async(src, src2).await) + } + )) + .check(|ctrl| { + assert!(!ctrl + .marked(Identifier::new_intern("source")) + .flows_to_any(&ctrl.marked(Identifier::new_intern("sink")))); + assert!(ctrl + .marked(Identifier::new_intern("source_2")) + .flows_to_any(&ctrl.marked(Identifier::new_intern("sink")))); + }) +}