diff --git a/Cargo.lock b/Cargo.lock index e0f83585f7..9e8cc16884 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -838,6 +838,7 @@ dependencies = [ "serde_json", "serial_test", "simple_logger", + "thiserror", "toml", "trait_enum", ] @@ -1243,6 +1244,26 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "thiserror" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.7" diff --git a/crates/paralegal-flow/Cargo.toml b/crates/paralegal-flow/Cargo.toml index b68710570f..1e50164d4a 100644 --- a/crates/paralegal-flow/Cargo.toml +++ b/crates/paralegal-flow/Cargo.toml @@ -49,12 +49,12 @@ serde_json = "1" toml = "0.7" - # This is just for pinning this dependency camino = "= 1.0.9" serial_test = "2.0.0" itertools = "0.11.0" anyhow = "1.0.72" +thiserror = "1" [build-dependencies] chrono = "0.4" diff --git a/crates/paralegal-flow/src/ana/df.rs b/crates/paralegal-flow/src/ana/df.rs index ec71bbd333..b61386296b 100644 --- a/crates/paralegal-flow/src/ana/df.rs +++ b/crates/paralegal-flow/src/ana/df.rs @@ -775,7 +775,7 @@ impl<'a, 'tcx, 'inliner> Analysis<'tcx> for FlowAnalysis<'a, 'tcx, 'inliner> { pub fn compute_flow_internal<'a, 'tcx, 's>( tcx: TyCtxt<'tcx>, - body_id: BodyId, + def_id: DefId, body_with_facts: &'a CachedSimplifedBodyWithFacts<'tcx>, carries_marker: &'s InlineJudge<'tcx>, ) -> FlowResults<'a, 'tcx, 's> { @@ -788,7 +788,6 @@ pub fn compute_flow_internal<'a, 'tcx, 's>( // ); // debug!("{}", body_with_facts.simplified_body().to_string(tcx).unwrap()); - let def_id = tcx.hir().body_owner_def_id(body_id).to_def_id(); let aliases = Aliases::build(tcx, def_id, body_with_facts.body_with_facts()); let location_domain = aliases.location_domain().clone(); diff --git a/crates/paralegal-flow/src/ana/inline/graph.rs b/crates/paralegal-flow/src/ana/inline/graph.rs index 58f20d1676..ee022bf4d3 100644 --- a/crates/paralegal-flow/src/ana/inline/graph.rs +++ b/crates/paralegal-flow/src/ana/inline/graph.rs @@ -1,12 +1,13 @@ use crate::{ ir::{regal, GlobalLocation}, mir, serde, - utils::{time, write_sep, DisplayViaDebug, FnResolution, IntoDefId, TinyBitSet}, - BodyId, Either, HashMap, HashSet, Location, TyCtxt, + utils::{time, write_sep, DisplayViaDebug, FnResolution, TinyBitSet}, + Either, HashMap, HashSet, Location, TyCtxt, }; use super::algebra; +use paralegal_spdg::rustc_portable::DefId; use petgraph::prelude as pg; pub type ArgNum = u32; @@ -275,12 +276,12 @@ impl<'tcx> InlinedGraph<'tcx> { /// Construct the initial graph from a [`regal::Body`] pub fn from_body( - body_id: BodyId, + def_id: DefId, body: ®al::Body<'tcx, DisplayViaDebug>, tcx: TyCtxt<'tcx>, ) -> Self { time("Graph Construction From Regal Body", || { - let equations = to_global_equations(&body.equations, body_id); + let equations = to_global_equations(&body.equations); let mut gwr = InlinedGraph { equations, graph: Default::default(), @@ -301,11 +302,11 @@ impl<'tcx> InlinedGraph<'tcx> { use regal::Target; let from = match d { Target::Call(c) => regal::SimpleLocation::Call(( - GlobalLocation::single(**c, body_id), + GlobalLocation::single(**c, def_id), *call_map.get(c).unwrap_or_else(|| { panic!( "Expected to find call at {c} in function {}", - tcx.def_path_debug_str(body_id.into_def_id(tcx)) + tcx.def_path_debug_str(def_id) ) }), )), @@ -322,7 +323,7 @@ impl<'tcx> InlinedGraph<'tcx> { }; for (&loc, call) in body.calls.iter() { - let n = Node::Call((GlobalLocation::single(*loc, body_id), call.function)); + let n = Node::Call((GlobalLocation::single(*loc, def_id), call.function)); for (idx, deps) in call.arguments.iter().enumerate() { if let Some((_, deps)) = deps { add_dep_edges(n, EdgeType::Data(idx as u32), deps) @@ -341,10 +342,7 @@ impl<'tcx> InlinedGraph<'tcx> { } /// Globalize all locations mentioned in these equations. -fn to_global_equations( - eqs: &Equations>, - _body_id: BodyId, -) -> Equations { +fn to_global_equations(eqs: &Equations>) -> Equations { eqs.iter() .map(|eq| eq.map_bases(|target| GlobalLocal::at_root(**target))) .collect() diff --git a/crates/paralegal-flow/src/ana/inline/mod.rs b/crates/paralegal-flow/src/ana/inline/mod.rs index ffc19fa341..ccedb5da19 100644 --- a/crates/paralegal-flow/src/ana/inline/mod.rs +++ b/crates/paralegal-flow/src/ana/inline/mod.rs @@ -16,7 +16,6 @@ use std::fmt::Write; use crate::{ ana::algebra::{self, Term}, - hir::BodyId, ir::{ flows::CallOnlyFlow, regal::{self, SimpleLocation}, @@ -28,13 +27,13 @@ use crate::{ rustc_target::abi::FieldIdx, ty, utils::{ - body_name_pls, dump_file_pls, time, write_sep, DisplayViaDebug, FnResolution, IntoDefId, - IntoLocalDefId, Print, RecursionBreakingCache, + body_name_pls, dump_file_pls, time, write_sep, DisplayViaDebug, FnResolution, Print, + RecursionBreakingCache, TyCtxtExt, }, AnalysisCtrl, DumpArgs, Either, HashMap, HashSet, MarkerCtx, Symbol, TyCtxt, }; -use rustc_utils::{cache::Cache, mir::borrowck_facts}; +use rustc_utils::cache::Cache; mod graph; mod judge; @@ -178,7 +177,7 @@ impl<'tcx> Inliner<'tcx> { } } -type BodyCache<'tcx> = Cache>>; +type BodyCache<'tcx> = Cache>>; /// Essentially just a bunch of caches of analyses. pub struct Inliner<'tcx> { @@ -187,7 +186,7 @@ pub struct Inliner<'tcx> { /// Memoized graphs that have all their callees inlined. Unlike `base_memo` /// this has to be recursion breaking, since a function may call itself /// (possibly transitively). - inline_memo: RecursionBreakingCache>, + inline_memo: RecursionBreakingCache>, tcx: TyCtxt<'tcx>, ana_ctrl: &'static AnalysisCtrl, dbg_ctrl: &'static DumpArgs, @@ -321,32 +320,22 @@ impl<'tcx> Inliner<'tcx> { /// [`ProcedureGraph::from`] fn get_procedure_graph<'a>( &'a self, - body_id: BodyId, + def_id: DefId, ) -> ®al::Body<'tcx, DisplayViaDebug> { - self.base_memo.get(body_id, |bid| { - regal::compute_from_body_id(self.dbg_ctrl, bid, self.tcx, &self.marker_carrying) + self.base_memo.get(def_id, |_| { + regal::compute_from_def_id(self.dbg_ctrl, def_id, self.tcx, &self.marker_carrying) }) } /// Compute an inlined graph for this `body_id` (memoized) - pub fn get_inlined_graph(&self, body_id: BodyId) -> Option<&InlinedGraph<'tcx>> { - self.inline_memo.get(body_id, |bid| self.inline_graph(bid)) + pub fn get_inlined_graph(&self, def_id: DefId) -> Option<&InlinedGraph<'tcx>> { + self.inline_memo.get(def_id, |bid| self.inline_graph(bid)) } /// Convenience wrapper around [`Self::get_inlined_graph`] fn get_inlined_graph_by_def_id(&self, def_id: LocalDefId) -> Option<&InlinedGraph<'tcx>> { - let hir = self.tcx.hir(); - let body_id = match hir.maybe_body_owned_by(def_id) { - None => { - warn!( - "no body id for {:?}", - self.tcx.def_path_debug_str(def_id.into_def_id(self.tcx)) - ); - return None; - } - Some(b) => b, - }; - self.get_inlined_graph(body_id) + let _hir = self.tcx.hir(); + self.get_inlined_graph(def_id.to_def_id()) } /// Make the set of equations relative to the call site described by `gli` @@ -427,10 +416,8 @@ impl<'tcx> Inliner<'tcx> { } } - fn try_inline_as_async_fn(&self, i_graph: &mut InlinedGraph<'tcx>, body_id: BodyId) -> bool { - let local_def_id = body_id.into_local_def_id(self.tcx); - let body_with_facts = - borrowck_facts::get_simplified_body_with_borrowck_facts(self.tcx, local_def_id); + fn try_inline_as_async_fn(&self, i_graph: &mut InlinedGraph<'tcx>, def_id: DefId) -> bool { + let body_with_facts = self.tcx.body_for_def_id(def_id).unwrap(); let body = body_with_facts.simplified_body(); let num_args = body.args_iter().count(); // XXX This might become invalid if functions other than `async` can create generators @@ -462,7 +449,7 @@ impl<'tcx> Inliner<'tcx> { _ => unreachable!(), }; - let root_location = GlobalLocation::single(return_location, body_id); + let root_location = GlobalLocation::single(return_location, def_id); // Following we must sumilate two code rewrites to the body of this // function to simulate calling the closure. We make the closure @@ -497,11 +484,11 @@ impl<'tcx> Inliner<'tcx> { debug!( "Recognized {} as an async function", - self.tcx.def_path_debug_str(local_def_id.to_def_id()) + self.tcx.def_path_debug_str(def_id) ); self.inline_one_function( i_graph, - body_id, + def_id, closure_fn.expect_local(), &incoming, &outgoing, @@ -523,7 +510,7 @@ impl<'tcx> Inliner<'tcx> { num_inlined, max_call_stack_depth, }: &mut InlinedGraph<'tcx>, - caller_function: BodyId, + caller_function: DefId, inlining_target: LocalDefId, incoming: &[(StdNode<'tcx>, Edge)], outgoing: &[(StdNode<'tcx>, Edge)], @@ -642,11 +629,11 @@ impl<'tcx> Inliner<'tcx> { &self, proc_g: ®al::Body>, i_graph: &mut InlinedGraph<'tcx>, - body_id: BodyId, + def_id: DefId, ) -> EdgeSet<'tcx> { let recursive_analysis_enabled = self.ana_ctrl.use_recursive_analysis(); let mut queue_for_pruning = HashSet::new(); - if recursive_analysis_enabled && self.try_inline_as_async_fn(i_graph, body_id) { + if recursive_analysis_enabled && self.try_inline_as_async_fn(i_graph, def_id) { return queue_for_pruning; }; let targets = i_graph @@ -718,7 +705,7 @@ impl<'tcx> Inliner<'tcx> { .collect::>(); self.inline_one_function( i_graph, - body_id, + def_id, did, &incoming, &outgoing, @@ -779,20 +766,21 @@ impl<'tcx> Inliner<'tcx> { /// In spite of the name of this function it not only inlines the graph but /// also first creates it (with [`Self::get_procedure_graph`]) and globalize /// it ([`to_global_graph`]). - fn inline_graph(&self, body_id: BodyId) -> InlinedGraph<'tcx> { - let proc_g = self.get_procedure_graph(body_id); - let mut gwr = InlinedGraph::from_body(body_id, proc_g, self.tcx); + fn inline_graph(&self, def_id: DefId) -> InlinedGraph<'tcx> { + let local_def_id = def_id.expect_local(); + let proc_g = self.get_procedure_graph(def_id); + let mut gwr = InlinedGraph::from_body(def_id, proc_g, self.tcx); - let name = body_name_pls(self.tcx, body_id).name; + let name = body_name_pls(self.tcx, local_def_id).name; if self.dbg_ctrl.dump_pre_inline_graph() { dump_dot_graph( - dump_file_pls(self.tcx, body_id, "pre-inline.gv").unwrap(), + dump_file_pls(self.tcx, local_def_id, "pre-inline.gv").unwrap(), &gwr, ) .unwrap(); } if self.dbg_ctrl.dump_local_equations() { - let mut eqout = dump_file_pls(self.tcx, body_id, "local.eqs").unwrap(); + let mut eqout = dump_file_pls(self.tcx, local_def_id, "local.eqs").unwrap(); for eq in &gwr.equations { use std::io::Write; writeln!(eqout, "{eq}").unwrap(); @@ -800,7 +788,7 @@ impl<'tcx> Inliner<'tcx> { } let mut queue_for_pruning = time(&format!("Inlining subgraphs into '{name}'"), || { - self.perform_subfunction_inlining(proc_g, &mut gwr, body_id) + self.perform_subfunction_inlining(proc_g, &mut gwr, def_id) }); if self.ana_ctrl.remove_inconsequential_calls().is_enabled() { @@ -808,7 +796,7 @@ impl<'tcx> Inliner<'tcx> { } if self.dbg_ctrl.dump_global_equations() { - let mut eqout = dump_file_pls(self.tcx, body_id, "global.eqs").unwrap(); + let mut eqout = dump_file_pls(self.tcx, local_def_id, "global.eqs").unwrap(); for eq in &gwr.equations { use std::io::Write; writeln!(eqout, "{eq}").unwrap(); @@ -816,7 +804,7 @@ impl<'tcx> Inliner<'tcx> { } if self.dbg_ctrl.dump_inlined_graph() { dump_dot_graph( - dump_file_pls(self.tcx, body_id, "inlined.gv").unwrap(), + dump_file_pls(self.tcx, local_def_id, "inlined.gv").unwrap(), &gwr, ) .unwrap(); @@ -835,15 +823,10 @@ impl<'tcx> Inliner<'tcx> { } queue_for_pruning }; - self.prune_impossible_edges( - &mut gwr, - name, - &edges_to_prune, - body_id.into_local_def_id(self.tcx), - ); + self.prune_impossible_edges(&mut gwr, name, &edges_to_prune, local_def_id); if self.dbg_ctrl.dump_inlined_pruned_graph() { dump_dot_graph( - dump_file_pls(self.tcx, body_id, "inlined-pruned.gv").unwrap(), + dump_file_pls(self.tcx, local_def_id, "inlined-pruned.gv").unwrap(), &gwr, ) .unwrap(); diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index e9b9e34106..5ddf491630 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -11,7 +11,7 @@ use crate::{ use hir::def_id::DefId; use mir::Location; -use rustc_utils::mir::borrowck_facts; +use anyhow::Result; use super::discover::{CallSiteAnnotations, CollectingVisitor, FnToAnalyze}; @@ -27,7 +27,7 @@ impl<'tcx> CollectingVisitor<'tcx> { /// Driver function. Performs the data collection via visit, then calls /// [`Self::analyze`] to construct the Forge friendly description of all /// endpoints. - pub fn run(mut self) -> std::io::Result { + pub fn run(mut self) -> Result { let tcx = self.tcx; tcx.hir().visit_all_item_likes_in_crate(&mut self); //println!("{:?}\n{:?}\n{:?}", self.marked_sinks, self.marked_sources, self.functions_to_analyze); @@ -42,9 +42,8 @@ impl<'tcx> CollectingVisitor<'tcx> { call_site_annotations: &mut CallSiteAnnotations, target: FnToAnalyze, inliner: &inline::Inliner<'tcx>, - ) -> std::io::Result<(Endpoint, Ctrl)> { + ) -> anyhow::Result<(Endpoint, Ctrl)> { let mut flows = Ctrl::default(); - let local_def_id = self.tcx.hir().body_owner_def_id(target.body_id); fn register_call_site( map: &mut CallSiteAnnotations, did: DefId, @@ -57,8 +56,7 @@ impl<'tcx> CollectingVisitor<'tcx> { .or_insert_with(|| ann.iter().flat_map(|a| a.iter()).cloned().collect()); } let tcx = self.tcx; - let controller_body_with_facts = - borrowck_facts::get_simplified_body_with_borrowck_facts(tcx, local_def_id); + let controller_body_with_facts = tcx.body_for_def_id(target.def_id)?; if self.opts.dbg().dump_ctrl_mir() { mir::graphviz::write_mir_fn_graphviz( @@ -74,7 +72,7 @@ impl<'tcx> CollectingVisitor<'tcx> { let flow = { let w = 6; - let i = inliner.get_inlined_graph(target.body_id).unwrap(); + let i = inliner.get_inlined_graph(target.def_id).unwrap(); info!("Graph statistics for {}\n {: CollectingVisitor<'tcx> { ), statement_index: a as usize + 1, }, - target.body_id, + target.def_id, ) }) }; @@ -100,7 +98,7 @@ impl<'tcx> CollectingVisitor<'tcx> { let subtypes = self .marker_ctx .all_type_markers(ty) - .map(|t| t.0.marker) + .map(|t| t.1 .1) .collect::>(); (DataSource::Argument(l.as_usize() - 1), subtypes) }); @@ -136,7 +134,7 @@ impl<'tcx> CollectingVisitor<'tcx> { // We need to make sure to fetch the body again here, because we // might be looking at an inlined location, so the body we operate // on bight not be the `body` we fetched before. - let inner_body_with_facts = tcx.body_for_body_id(inner_body_id); + let inner_body_with_facts = tcx.body_for_def_id(inner_body_id).unwrap(); let inner_body = &inner_body_with_facts.simplified_body(); if !inner_location.is_real(inner_body) { assert!(loc.is_at_root()); @@ -170,7 +168,7 @@ impl<'tcx> CollectingVisitor<'tcx> { } }; let defid = instance.def_id(); - let call_site = CallSite::new(loc, defid, tcx); + let call_site = CallSite::new(loc, defid); // Propagate annotations on the function object to the call site register_call_site( call_site_annotations, @@ -183,7 +181,7 @@ impl<'tcx> CollectingVisitor<'tcx> { let interesting_output_types: HashSet<_> = self .marker_ctx .all_function_markers(instance) - .filter_map(|(_, t)| Some(identifier_for_item(self.tcx, t?.1))) + .filter_map(|(_, t)| Some(t?.1)) .collect(); if !interesting_output_types.is_empty() { flows.types.0.insert( @@ -235,7 +233,7 @@ impl<'tcx> CollectingVisitor<'tcx> { DataSink::Return, ); } - Ok((identifier_for_item(tcx, target.body_id), flows)) + Ok((target.def_id.into_def_id(tcx), flows)) } /// Main analysis driver. Essentially just calls [`Self::handle_target`] @@ -243,8 +241,7 @@ impl<'tcx> CollectingVisitor<'tcx> { /// other setup necessary for the flow graph creation. /// /// Should only be called after the visit. - fn analyze(mut self) -> std::io::Result { - let tcx = self.tcx; + fn analyze(mut self) -> Result { let mut targets = std::mem::take(&mut self.functions_to_analyze); if let LogLevelConfig::Targeted(s) = &*self.opts.debug() { @@ -281,39 +278,99 @@ impl<'tcx> CollectingVisitor<'tcx> { ) }) }) - .collect::>>() - .map(|controllers| ProgramDescription { - controllers, - annotations: self.marker_ctx.local_annotations_found() - .into_iter() - .map(|(k, v)| (k.to_def_id(), v.to_vec())) - .chain(self.marker_ctx.external_annotations().iter().map(|(did, anns)| { - ( - *did, - anns.iter().cloned().map(Annotation::Marker).collect(), - ) - })) - .map(|(did, anns)| { - let def_kind = tcx.def_kind(did); - let obj_type = if def_kind.is_fn_like() { - ObjectType::Function( - tcx.fn_sig(did).skip_binder().skip_binder().inputs().len(), - ) - } else { - // XXX add an actual match here - ObjectType::Type - }; - (identifier_for_item(tcx, did), (anns, obj_type)) - }) - .collect(), - }); - //}); + .collect::>>() + .map(|controllers| self.make_program_description(controllers) + ); info!( "Total number of analyzed functions {}", inliner.cache_size() ); result } + + fn make_program_description(&self, controllers: HashMap) -> ProgramDescription { + let tcx = self.tcx; + let annotations: HashMap, ObjectType)> = self + .marker_ctx + .local_annotations_found() + .into_iter() + .map(|(k, v)| (k.to_def_id(), v.to_vec())) + .chain( + self.marker_ctx + .external_annotations() + .iter() + .map(|(did, anns)| { + (*did, anns.iter().cloned().map(Annotation::Marker).collect()) + }), + ) + .map(|(did, anns)| { + let def_kind = tcx.def_kind(did); + let obj_type = if def_kind.is_fn_like() { + ObjectType::Function(tcx.fn_sig(did).skip_binder().skip_binder().inputs().len()) + } else { + // XXX add an actual match here + ObjectType::Type + }; + (did.into_def_id(tcx), (anns, obj_type)) + }) + .collect(); + let mut known_def_ids = def_ids_from_controllers(&controllers, tcx); + known_def_ids.extend(annotations.keys().copied()); + let def_info = known_def_ids + .into_iter() + .map(|id| (id, def_info_for_item(id, tcx))) + .collect(); + ProgramDescription { + controllers, + annotations, + def_info, + } + } +} + +fn def_info_for_item(id: DefId, tcx: TyCtxt) -> DefInfo { + use hir::def; + let name = crate::utils::identifier_for_item(tcx, id); + let kind = match tcx.def_kind(id) { + kind if kind.is_fn_like() => DefKind::Function, + def::DefKind::Struct + | def::DefKind::AssocTy + | def::DefKind::OpaqueTy + | def::DefKind::TyAlias + | def::DefKind::Enum => DefKind::Type, + _ => unreachable!("{}", tcx.def_path_debug_str(id)), + }; + let def_path = tcx.def_path(id); + let path = std::iter::once(Identifier::new(tcx.crate_name(def_path.krate))) + .chain(def_path.data.iter().filter_map(|segment| { + use hir::definitions::DefPathDataName::*; + match segment.data.name() { + Named(sym) => Some(Identifier::new(sym)), + Anon { .. } => None, + } + })) + .collect(); + DefInfo { name, path, kind } +} + +fn def_ids_from_controllers(map: &HashMap, tcx: TyCtxt) -> HashSet { + map.iter() + .flat_map(|(id, ctrl)| { + let from_dataflow = ctrl.data_flow.iter().flat_map(|(from, to)| { + from.as_function_call() + .into_iter() + .chain(to.iter().filter_map(|sink| sink.as_argument().map(|t| t.0))) + }); + let from_ctrl_flow = ctrl + .ctrl_flow + .iter() + .flat_map(|(from, to)| from.as_function_call().into_iter().chain(to.iter())); + std::iter::once(*id).chain(from_dataflow.chain(from_ctrl_flow).flat_map(|cs| { + std::iter::once(cs.function) + .chain(cs.location.iter().map(|loc| loc.function.into_def_id(tcx))) + })) + }) + .collect() } /// A higher order function that increases the logging level if the `target` diff --git a/crates/paralegal-flow/src/ann_parse.rs b/crates/paralegal-flow/src/ann_parse.rs index ce4ac0eeb0..cbef4842a5 100644 --- a/crates/paralegal-flow/src/ann_parse.rs +++ b/crates/paralegal-flow/src/ann_parse.rs @@ -225,18 +225,15 @@ pub(crate) fn otype_ann_match(ann: &ast::AttrArgs, tcx: TyCtxt) -> Vec>(); - utils::identifier_for_item( - tcx, - utils::resolve::def_path_res(tcx, &segment_vec) - .unwrap_or_else(|err| { - panic!( - "Could not resolve {}: {err:?}", - Print(|f| write_sep(f, "::", &segment_vec, |elem, f| f - .write_str(elem))) - ) - }) - .def_id(), - ) + utils::resolve::def_path_res(tcx, &segment_vec) + .unwrap_or_else(|err| { + panic!( + "Could not resolve {}: {err:?}", + Print(|f| write_sep(f, "::", &segment_vec, |elem, f| f + .write_str(elem))) + ) + }) + .def_id() }) .collect() } diff --git a/crates/paralegal-flow/src/dbg.rs b/crates/paralegal-flow/src/dbg.rs index 3eb5d361ac..435cbd9c16 100644 --- a/crates/paralegal-flow/src/dbg.rs +++ b/crates/paralegal-flow/src/dbg.rs @@ -9,12 +9,12 @@ //! as [TyCtxt]) to get contextual information that is used to make the output //! more useful. use flowistry::indexed::IndexedDomain; -use paralegal_spdg::Identifier; +use paralegal_spdg::{rustc_portable::DefId, Identifier}; use crate::{ ir::CallOnlyFlow, rust::{mir, TyCtxt}, - utils::body_name_pls, + utils::{body_name_pls, TyCtxtExt}, HashMap, HashSet, }; extern crate dot; @@ -72,7 +72,7 @@ pub mod call_only_flow_dot { ir::{CallOnlyFlow, GlobalLocation, GlobalLocationS}, rust::mir::{Statement, StatementKind}, rust::TyCtxt, - utils::{identifier_for_item, AsFnAndArgs, DfppBodyExt, LocationExt}, + utils::{unique_identifier_for_item, AsFnAndArgs, DfppBodyExt, LocationExt, TyCtxtExt}, Either, }; @@ -188,11 +188,7 @@ pub mod call_only_flow_dot { } else { return dot::LabelText::LabelStr("return".into()); }; - let body_with_facts = - rustc_utils::mir::borrowck_facts::get_simplified_body_with_borrowck_facts( - self.tcx, - self.tcx.hir().body_owner_def_id(body_id), - ); + let body_with_facts = self.tcx.body_for_def_id(body_id).unwrap(); let body = &body_with_facts.simplified_body(); let write_label = |s: &mut String| -> std::fmt::Result { write!(s, "{{B{}:{}", loc.block.as_usize(), loc.statement_index)?; @@ -228,7 +224,7 @@ pub mod call_only_flow_dot { match stmt { Either::Right(term) => { if let Ok((fun, args, _)) = term.as_fn_and_args(self.tcx) { - let fun_name = identifier_for_item(self.tcx, fun); + let fun_name = unique_identifier_for_item(self.tcx, fun); write!(s, "{{{{")?; for (i, arg) in args.iter().enumerate() { write!(s, "", i)?; @@ -337,19 +333,15 @@ pub fn write_non_transitive_graph_and_body( ) }) .map(|l| l.innermost_function()) - .collect::>() + .collect::>() .into_iter() .map(|bid| { ( bid, ( - Identifier::new(body_name_pls(tcx, bid).name), + Identifier::new(body_name_pls(tcx, bid.expect_local()).name), BodyProxy::from_body_with_normalize( - rustc_utils::mir::borrowck_facts::get_simplified_body_with_borrowck_facts( - tcx, - tcx.hir().body_owner_def_id(bid), - ) - .simplified_body(), + tcx.body_for_def_id(bid).unwrap().simplified_body(), tcx, ), ), diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 0a6f09b7fc..0e78724a81 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -43,7 +43,7 @@ pub struct CollectingVisitor<'tcx> { /// [`CollectingVisitor::handle_target`]. pub struct FnToAnalyze { pub name: Ident, - pub body_id: BodyId, + pub def_id: DefId, } impl FnToAnalyze { @@ -99,7 +99,7 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> { { self.functions_to_analyze.push(FnToAnalyze { name: *name, - body_id, + def_id: id.to_def_id(), }); } _ => (), diff --git a/crates/paralegal-flow/src/frg.rs b/crates/paralegal-flow/src/frg.rs index af3b91ed36..3f6abd5d31 100644 --- a/crates/paralegal-flow/src/frg.rs +++ b/crates/paralegal-flow/src/frg.rs @@ -8,7 +8,8 @@ extern crate pretty; use std::hash::Hash; -use crate::{HashSet, ModelCtrl, TyCtxt}; +use crate::{utils::unique_identifier_for_item, HashSet, ModelCtrl, TyCtxt}; +use paralegal_spdg::{rustc_portable::DefId, ShortHash}; use pretty::{DocAllocator, DocBuilder, Pretty}; use crate::desc::{ @@ -116,12 +117,12 @@ where } /// A serialization trait for Forge. -pub trait ToForge<'a, A, D> +pub trait ToForge<'a, 'tcx, A, D> where A: 'a, D: ?std::marker::Sized + DocAllocator<'a, A>, { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A>; + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A>; } lazy_static! { @@ -136,8 +137,8 @@ lazy_static! { .collect(); } -impl<'a, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, A, D> for Identifier { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for Identifier { + fn build_forge(self, _tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { alloc .text("`") .append(alloc.text(if FORGE_RESERVED_SYMBOLS.contains(&self) { @@ -148,38 +149,60 @@ impl<'a, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, A, D> for Identifier { } } -impl<'a, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, A, D> for &'a Identifier { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { - (*self).build_forge(alloc) +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for &'a Identifier { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { + (*self).build_forge(tcx, alloc) } } -impl<'a, A: 'a + Clone, D: DocAllocator<'a, A>, X, Y> ToForge<'a, A, D> for &'a Relation +impl<'a, 'tcx, A: 'a + Clone, D: DocAllocator<'a, A>, X, Y> ToForge<'a, 'tcx, A, D> + for &'a Relation where D::Doc: Clone, - &'a X: ToForge<'a, A, D>, - &'a Y: ToForge<'a, A, D>, + &'a X: ToForge<'a, 'tcx, A, D>, + &'a Y: ToForge<'a, 'tcx, A, D>, { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { alloc.forge_relation(self.0.iter().map(|(src, sinks)| { ( - std::iter::once(src.build_forge(alloc)), - sinks.iter().map(|sink| sink.build_forge(alloc)), + std::iter::once(src.build_forge(tcx, alloc)), + sinks.iter().map(|sink| sink.build_forge(tcx, alloc)), ) })) } } -impl<'a, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, A, D> for &'a str { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for &'a str { + fn build_forge(self, _tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { alloc.text(self) } } +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for DefId { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { + unique_identifier_for_item(tcx, self).build_forge(tcx, alloc) + } +} + +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for &'a DefId { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { + (*self).build_forge(tcx, alloc) + } +} + +pub fn call_site_to_string(tcx: TyCtxt, cs: &CallSite) -> String { + format!( + "cs_{}_{}", + unique_identifier_for_item(tcx, cs.function), + ShortHash::new(cs.location), + ) +} + /// Basically a decomposed version of `ToForge` for `DataSink` for the case /// where you have its constituent parts but not a `DataSink`. -fn data_sink_as_forge<'b, A, D: DocAllocator<'b, A>>( +fn data_sink_as_forge<'b, 'tcx, A, D: DocAllocator<'b, A>>( alloc: &'b D, + tcx: TyCtxt<'tcx>, function: &'b CallSite, arg_slot: usize, ) -> DocBuilder<'b, D, A> { @@ -187,58 +210,60 @@ fn data_sink_as_forge<'b, A, D: DocAllocator<'b, A>>( .text("`arg") .append(alloc.as_string(arg_slot)) .append(alloc.text("_")) - .append(function.to_string()) + .append(call_site_to_string(tcx, function)) } -impl<'a, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, A, D> for &'a DataSink { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for &'a DataSink { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { match self { DataSink::Return => alloc.text("`Return"), DataSink::Argument { function, arg_slot } => { - data_sink_as_forge(alloc, function, *arg_slot) + data_sink_as_forge(alloc, tcx, function, *arg_slot) } } } } -impl<'a, A: 'a, D: DocAllocator<'a, A>, T> ToForge<'a, A, D> for &'a HashSet +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>, T> ToForge<'a, 'tcx, A, D> for &'a HashSet where - &'a T: ToForge<'a, A, D>, + &'a T: ToForge<'a, 'tcx, A, D>, { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { if self.is_empty() { alloc.text("none") } else { - alloc.intersperse(self.iter().map(|w| w.build_forge(alloc)), "+") + alloc.intersperse(self.iter().map(|w| w.build_forge(tcx, alloc)), "+") } } } -fn call_site_as_forge<'b, A, D: DocAllocator<'b, A>>( +fn call_site_as_forge<'b, 'tcx, A, D: DocAllocator<'b, A>>( + tcx: TyCtxt<'tcx>, alloc: &'b D, function: &'b CallSite, ) -> DocBuilder<'b, D, A> { - alloc.text("`").append(function.to_string()) + alloc.text("`").append(call_site_to_string(tcx, function)) } -impl<'a, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, A, D> for &'a CallSite { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { - call_site_as_forge(alloc, self) +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for &'a CallSite { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { + call_site_as_forge(tcx, alloc, self) } } -fn data_source_as_forge<'b, A, D: DocAllocator<'b, A>>( +fn data_source_as_forge<'b, 'tcx, A, D: DocAllocator<'b, A>>( src: &'b DataSource, + tcx: TyCtxt<'tcx>, alloc: &'b D, - ctrl: Identifier, + ctrl: DefId, ) -> DocBuilder<'b, D, A> { match src { - DataSource::FunctionCall(f) => call_site_as_forge(alloc, f), + DataSource::FunctionCall(f) => call_site_as_forge(tcx, alloc, f), DataSource::Argument(a) => FormalParameter { function: ctrl, position: *a as u16, } - .build_forge(alloc), + .build_forge(tcx, alloc), } } @@ -418,25 +443,30 @@ where /// Emits `(h[0] + h[1] + ...)` but also handles the empty set correctly (by /// emitting `none`). -fn hash_set_into_forge<'a, A: 'a, D: DocAllocator<'a, A>, T: ToForge<'a, A, D>>( +fn hash_set_into_forge<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>, T: ToForge<'a, 'tcx, A, D>>( h: HashSet, + tcx: TyCtxt<'tcx>, alloc: &'a D, ) -> DocBuilder<'a, D, A> { if h.is_empty() { alloc.text("none") } else { - alloc.intersperse(h.into_iter().map(|w| w.build_forge(alloc)), "+") + alloc.intersperse(h.into_iter().map(|w| w.build_forge(tcx, alloc)), "+") } } -fn hash_set_into_call_site_forge<'a, A: 'a, D: DocAllocator<'a, A>>( +fn hash_set_into_call_site_forge<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>>( h: HashSet<&'a CallSite>, + tcx: TyCtxt<'tcx>, alloc: &'a D, ) -> DocBuilder<'a, D, A> { if h.is_empty() { alloc.text("none") } else { - alloc.intersperse(h.into_iter().map(|w| call_site_as_forge(alloc, w)), "+") + alloc.intersperse( + h.into_iter().map(|w| call_site_as_forge(tcx, alloc, w)), + "+", + ) } } @@ -485,109 +515,43 @@ where #[derive(Hash, PartialEq, Eq)] pub struct FormalParameter { - function: Identifier, + function: DefId, position: u16, } -impl<'a, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, A, D> for FormalParameter { - fn build_forge(self, alloc: &'a D) -> DocBuilder<'a, D, A> { +impl<'a, 'tcx, A: 'a, D: DocAllocator<'a, A>> ToForge<'a, 'tcx, A, D> for FormalParameter { + fn build_forge(self, tcx: TyCtxt<'tcx>, alloc: &'a D) -> DocBuilder<'a, D, A> { alloc .text("`fp") .append(alloc.as_string(self.position)) .append("_") - .append(self.function.as_str().to_string()) + .append( + unique_identifier_for_item(tcx, self.function) + .as_str() + .to_string(), + ) } } -pub trait ProgramDescriptionExt { - fn used_labels(&self) -> HashSet; - - fn all_formal_parameters(&self) -> HashSet; - - fn make_label_sigs<'a, A: Clone + 'a, D: DocAllocator<'a, A>>( - &self, - alloc: &'a D, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone; - - fn all_types(&self) -> HashSet<&Identifier>; - - fn make_labels_relation<'a, A: Clone + 'a, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone; - - fn make_callsite_argument_relation<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone; - - fn make_return_func_relation<'a, A: Clone + 'a, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone; - - fn make_otype_relation<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone; - - fn make_formal_param_relation<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone; - - fn make_types_relation<'a, A: 'a, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - version: Version, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone, - A: Clone; - - fn make_flow<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - version: Version, - ) -> DocBuilder - where - D::Doc: Clone; +pub struct ForgeConverter<'tcx> { + description: ProgramDescription, + tcx: TyCtxt<'tcx>, +} - fn make_ctrl_flow<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - version: Version, - ) -> DocBuilder - where - D::Doc: Clone; +impl<'tcx> ForgeConverter<'tcx> { + pub fn desc(&self) -> &ProgramDescription { + &self.description + } - fn serialize_forge<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( - &'a self, - alloc: &'a D, - _tcx: TyCtxt, - model_ctrl: &ModelCtrl, - ) -> DocBuilder<'a, D, A> - where - D::Doc: Clone; -} + pub fn new(description: ProgramDescription, tcx: TyCtxt<'tcx>) -> Self { + Self { description, tcx } + } -impl ProgramDescriptionExt for ProgramDescription { /// Returns all labels in this program description, including the special /// [`name::EXCEPTIONS_LABEL`] label. fn used_labels(&self) -> HashSet { - self.annotations + self.description + .annotations .values() .flat_map(|v| v.0.iter()) .filter_map(Annotation::as_marker) @@ -599,7 +563,8 @@ impl ProgramDescriptionExt for ProgramDescription { } fn all_formal_parameters(&self) -> HashSet { - self.annotations + self.description + .annotations .iter() .flat_map(|(function, (_, oj))| { if let ObjectType::Function(num) = oj { @@ -615,18 +580,23 @@ impl ProgramDescriptionExt for ProgramDescription { }) }) }) - .chain(self.controllers.iter().flat_map(|(&function, ctrl)| { - ctrl.data_flow - .0 - .keys() - .chain(ctrl.types.0.keys()) - .chain(ctrl.ctrl_flow.0.keys()) - .filter_map(|src| src.as_argument()) - .map(move |position| FormalParameter { - function, - position: position as u16, - }) - })) + .chain( + self.description + .controllers + .iter() + .flat_map(|(&function, ctrl)| { + ctrl.data_flow + .0 + .keys() + .chain(ctrl.types.0.keys()) + .chain(ctrl.ctrl_flow.0.keys()) + .filter_map(|src| src.as_argument()) + .map(move |position| FormalParameter { + function, + position: position as u16, + }) + }), + ) .collect() } @@ -646,11 +616,12 @@ impl ProgramDescriptionExt for ProgramDescription { } /// Returns all types mentioned in this program description. - fn all_types(&self) -> HashSet<&Identifier> { - self.annotations + fn all_types(&self) -> HashSet { + self.description + .annotations .iter() .filter(|t| t.1 .1 == ObjectType::Type) - .map(|t| t.0) + .map(|t| *t.0) .collect() } @@ -661,79 +632,89 @@ impl ProgramDescriptionExt for ProgramDescription { where D::Doc: Clone, { + let tcx = self.tcx; alloc - .forge_relation(self.annotations.iter().flat_map(|(id, (anns, _typ))| { - // I've decided to do the more - // complicated thing here which - // restores the old behavior. Is - // this the right thing to do? - // Maybe, maybe not. Only time will - // tell. - // - // The "old behavior" I am restoring is that if there is a label - // `l` on function `f`, then what is actually emitted into forge - // is `cs1_f->l + cs2_f->l`, i.e. the label is being attached to - // each call site of the function in addition to the function - // itself. - // Part of why we choose this behavior is because there is no - // call-site-independent representation for arguments, so the - // label has to be attached to the call site argument. - anns.iter().filter_map(Annotation::as_marker).map(move |a| { - ( - if a.refinement.on_return() { - Some(self.all_sources_with_ctrl().into_iter().filter(|(_, s)| { - s.as_function_call().map_or(false, |c| &c.function == id) - })) - } else { - None - } - .into_iter() - .flatten() - .map(|(ctrl, ds)| data_source_as_forge(ds, alloc, ctrl)) - .chain( - self.all_sinks() - .into_iter() - .filter(|s| { - matches!( - s, - DataSink::Argument{function, arg_slot} if - &function.function == id - && a.refinement - .on_argument() - .is_set(*arg_slot as u32) + .forge_relation( + self.description + .annotations + .iter() + .flat_map(|(id, (anns, _typ))| { + // I've decided to do the more + // complicated thing here which + // restores the old behavior. Is + // this the right thing to do? + // Maybe, maybe not. Only time will + // tell. + // + // The "old behavior" I am restoring is that if there is a label + // `l` on function `f`, then what is actually emitted into forge + // is `cs1_f->l + cs2_f->l`, i.e. the label is being attached to + // each call site of the function in addition to the function + // itself. + // Part of why we choose this behavior is because there is no + // call-site-independent representation for arguments, so the + // label has to be attached to the call site argument. + anns.iter().filter_map(Annotation::as_marker).map(move |a| { + ( + if a.refinement.on_return() { + Some( + self.description + .all_sources_with_ctrl() + .into_iter() + .filter(|(_, s)| { + s.as_function_call() + .map_or(false, |c| &c.function == id) + }), ) - }) - .map(|s| s.build_forge(alloc)), - ) - .chain([id.build_forge(alloc)]) - .chain( - a.refinement - .on_argument() - .into_iter_set_in_domain() - .map(|slot| { - FormalParameter { - function: *id, - position: slot as u16, - } - .build_forge(alloc) - }), - ) - // This is necessary because otherwise captured variables escape - .collect::>() - .into_iter(), - std::iter::once(a.marker.build_forge(alloc)), - ) - }) - })) + } else { + None + } + .into_iter() + .flatten() + .map(|(ctrl, ds)| data_source_as_forge(ds, tcx, alloc, ctrl)) + .chain( + self.description + .all_sinks() + .into_iter() + .filter(|s| { + matches!( + s, + DataSink::Argument{function, arg_slot} if + &function.function == id + && a.refinement + .on_argument() + .is_set(*arg_slot as u32) + ) + }) + .map(|s| s.build_forge(tcx, alloc)), + ) + .chain([id.build_forge(tcx, alloc)]) + .chain(a.refinement.on_argument().into_iter_set_in_domain().map( + |slot| { + FormalParameter { + function: *id, + position: slot as u16, + } + .build_forge(tcx, alloc) + }, + )) + // This is necessary because otherwise captured variables escape + .collect::>() + .into_iter(), + std::iter::once(a.marker.build_forge(tcx, alloc)), + ) + }) + }), + ) .append(" +") .append(alloc.hardline()) .append( - alloc.forge_relation(self.annotations.iter().map(|(id, (anns, _))| { + alloc.forge_relation(self.description.annotations.iter().map(|(id, (anns, _))| { ( anns.iter() .filter_map(Annotation::as_exception) .next() - .map(|_| id.build_forge(alloc)) + .map(|_| id.build_forge(tcx, alloc)) .into_iter(), std::iter::once(alloc.text(name::EXCEPTIONS_LABEL)), ) @@ -748,11 +729,12 @@ impl ProgramDescriptionExt for ProgramDescription { where D::Doc: Clone, { - alloc.forge_relation(self.all_sinks().into_iter().filter_map(|src| { + let tcx = self.tcx; + alloc.forge_relation(self.description.all_sinks().into_iter().filter_map(|src| { src.as_argument().map(|(function, _)| { ( - std::iter::once(src.build_forge(alloc)), - std::iter::once(call_site_as_forge(alloc, function)), + std::iter::once(src.build_forge(tcx, alloc)), + std::iter::once(call_site_as_forge(tcx, alloc, function)), ) }) })) @@ -765,10 +747,11 @@ impl ProgramDescriptionExt for ProgramDescription { where D::Doc: Clone, { - alloc.forge_relation(self.all_call_sites().into_iter().map(|src| { + let tcx = self.tcx; + alloc.forge_relation(self.description.all_call_sites().into_iter().map(|src| { ( - std::iter::once(call_site_as_forge(alloc, src)), - std::iter::once((&src.function).build_forge(alloc)), + std::iter::once(call_site_as_forge(tcx, alloc, src)), + std::iter::once((&src.function).build_forge(tcx, alloc)), ) })) } @@ -780,13 +763,13 @@ impl ProgramDescriptionExt for ProgramDescription { where D::Doc: Clone, { - alloc.forge_relation(self.annotations.iter().map(|(o, (anns, _))| { + let tcx = self.tcx; + alloc.forge_relation(self.description.annotations.iter().map(|(o, (anns, _))| { ( - std::iter::once(o.build_forge(alloc)), + std::iter::once(o.build_forge(tcx, alloc)), anns.iter() .filter_map(Annotation::as_otype) - .flat_map(|v| v.iter()) - .map(|t| t.build_forge(alloc)), + .map(|t| t.build_forge(tcx, alloc)), ) })) } @@ -797,10 +780,11 @@ impl ProgramDescriptionExt for ProgramDescription { where D::Doc: Clone, { + let tcx = self.tcx; alloc.forge_relation(self.all_formal_parameters().into_iter().map(|p| { - let fn_forge = p.function.build_forge(alloc); + let fn_forge = p.function.build_forge(tcx, alloc); ( - std::iter::once(p.build_forge(alloc)), + std::iter::once(p.build_forge(tcx, alloc)), std::iter::once(fn_forge), ) })) @@ -815,19 +799,22 @@ impl ProgramDescriptionExt for ProgramDescription { D::Doc: Clone, A: Clone, { + let tcx = self.tcx; match version { Version::V1 => alloc.forge_relation_with_arity( 3, - self.controllers.iter().map(|(e, ctrl)| { + self.description.controllers.iter().map(|(e, ctrl)| { ( - std::iter::once(e.build_forge(alloc)), + std::iter::once(e.build_forge(tcx, alloc)), std::iter::once( alloc.hardline().append( alloc .forge_relation(ctrl.types.0.iter().map(|(i, desc)| { ( - std::iter::once(data_source_as_forge(i, alloc, *e)), - desc.iter().map(|t| t.build_forge(alloc)), + std::iter::once(data_source_as_forge( + i, tcx, alloc, *e, + )), + desc.iter().map(|t| t.build_forge(tcx, alloc)), ) })) .indent(4), @@ -836,14 +823,16 @@ impl ProgramDescriptionExt for ProgramDescription { ) }), ), - Version::V2 => alloc.forge_relation(self.controllers.iter().flat_map(|(e, ctrl)| { - ctrl.types.0.iter().map(|(i, desc)| { - ( - std::iter::once(data_source_as_forge(i, alloc, *e)), - desc.iter().map(|t| t.build_forge(alloc)), - ) - }) - })), + Version::V2 => { + alloc.forge_relation(self.description.controllers.iter().flat_map(|(e, ctrl)| { + ctrl.types.0.iter().map(|(i, desc)| { + ( + std::iter::once(data_source_as_forge(i, tcx, alloc, *e)), + desc.iter().map(|t| t.build_forge(tcx, alloc)), + ) + }) + })) + } } } @@ -855,12 +844,13 @@ impl ProgramDescriptionExt for ProgramDescription { where D::Doc: Clone, { + let tcx = self.tcx; match version { Version::V1 => alloc.forge_relation_with_arity( 3, - self.controllers.iter().map(|(e, ctrl)| { + self.description.controllers.iter().map(|(e, ctrl)| { ( - std::iter::once(e.build_forge(alloc)), + std::iter::once(e.build_forge(tcx, alloc)), std::iter::once( alloc.hardline().append( //(&ctrl.data_flow).as_forge(alloc) @@ -869,9 +859,9 @@ impl ProgramDescriptionExt for ProgramDescription { |(source, sinks)| { ( std::iter::once(data_source_as_forge( - source, alloc, *e, + source, tcx, alloc, *e, )), - sinks.iter().map(|snk| snk.build_forge(alloc)), + sinks.iter().map(|snk| snk.build_forge(tcx, alloc)), ) }, )) @@ -881,14 +871,16 @@ impl ProgramDescriptionExt for ProgramDescription { ) }), ), - Version::V2 => alloc.forge_relation(self.controllers.iter().flat_map(|(e, ctrl)| { - ctrl.data_flow.0.iter().map(|(src, snks)| { - ( - std::iter::once(data_source_as_forge(src, alloc, *e)), - snks.iter().map(|snk| snk.build_forge(alloc)), - ) - }) - })), + Version::V2 => { + alloc.forge_relation(self.description.controllers.iter().flat_map(|(e, ctrl)| { + ctrl.data_flow.0.iter().map(|(src, snks)| { + ( + std::iter::once(data_source_as_forge(src, tcx, alloc, *e)), + snks.iter().map(|snk| snk.build_forge(tcx, alloc)), + ) + }) + })) + } } } @@ -900,21 +892,24 @@ impl ProgramDescriptionExt for ProgramDescription { where D::Doc: Clone, { + let tcx = self.tcx; match version { Version::V1 => alloc.forge_relation_with_arity( 3, - self.controllers.iter().map(|(e, ctrl)| { + self.description.controllers.iter().map(|(e, ctrl)| { ( - std::iter::once(e.build_forge(alloc)), + std::iter::once(e.build_forge(tcx, alloc)), std::iter::once( alloc.hardline().append( (alloc.forge_relation(ctrl.ctrl_flow.0.iter().map( |(src, sinks)| { ( - std::iter::once(data_source_as_forge(src, alloc, *e)), + std::iter::once(data_source_as_forge( + src, tcx, alloc, *e, + )), sinks .iter() - .map(|sink| call_site_as_forge(alloc, sink)), + .map(|sink| call_site_as_forge(tcx, alloc, sink)), ) }, ))) @@ -924,26 +919,28 @@ impl ProgramDescriptionExt for ProgramDescription { ) }), ), - Version::V2 => alloc.forge_relation(self.controllers.iter().flat_map(|(e, ctrl)| { - ctrl.ctrl_flow.0.iter().map(|(src, snks)| { - ( - std::iter::once(data_source_as_forge(src, alloc, *e)), - snks.iter().map(|snk| call_site_as_forge(alloc, snk)), - ) - }) - })), + Version::V2 => { + alloc.forge_relation(self.description.controllers.iter().flat_map(|(e, ctrl)| { + ctrl.ctrl_flow.0.iter().map(|(src, snks)| { + ( + std::iter::once(data_source_as_forge(src, tcx, alloc, *e)), + snks.iter().map(|snk| call_site_as_forge(tcx, alloc, snk)), + ) + }) + })) + } } } - fn serialize_forge<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( + pub fn serialize_forge<'a, A: 'a + Clone, D: DocAllocator<'a, A>>( &'a self, alloc: &'a D, - _tcx: TyCtxt, model_ctrl: &ModelCtrl, ) -> DocBuilder<'a, D, A> where D::Doc: Clone, { + let tcx = self.tcx; let version = model_ctrl.model_version(); alloc.lines([ if model_ctrl.skip_sigs() { @@ -989,15 +986,15 @@ impl ProgramDescriptionExt for ProgramDescription { alloc .text(name::LABEL) .append(" = ") - .append(hash_set_into_forge(self.used_labels(), alloc)), + .append(hash_set_into_forge(self.used_labels(), tcx, alloc)), alloc.lines(self.used_labels().into_iter().map(|l| { alloc .text(l.as_str().to_owned()) .append(" = ") - .append(l.build_forge(alloc)) + .append(l.build_forge(tcx, alloc)) })), alloc.text(name::CALL_SITE).append(" = ").append( - hash_set_into_call_site_forge(self.all_call_sites(), alloc), + hash_set_into_call_site_forge(self.description.all_call_sites(), tcx, alloc), ), // alloc.text(name::INPUT_ARGUMENT).append(" = ").append( // hash_set_into_forge( @@ -1010,7 +1007,7 @@ impl ProgramDescriptionExt for ProgramDescription { alloc .text(name::FORMAL_PARAMETER) .append(" = ") - .append(hash_set_into_forge(self.all_formal_parameters(), alloc)), + .append(hash_set_into_forge(self.all_formal_parameters(), tcx, alloc)), alloc .text(name::SRC) @@ -1019,18 +1016,20 @@ impl ProgramDescriptionExt for ProgramDescription { [name::FORMAL_PARAMETER, name::CALL_SITE] .into_iter() .collect::>(), + tcx, alloc, )), alloc.text(name::RETURN).append(" = `").append(name::RETURN), alloc.text(name::CALL_ARGUMENT).append(" = ").append( hash_set_into_forge( - self.all_sinks() + self.description.all_sinks() .iter() .filter_map(|ds| match ds { DataSink::Argument { .. } => Some(*ds), _ => None, }) .collect(), + tcx, alloc, ), ), @@ -1041,23 +1040,24 @@ impl ProgramDescriptionExt for ProgramDescription { [name::CALL_ARGUMENT, name::RETURN] .into_iter() .collect::>(), + tcx, alloc, )), alloc .text(name::TYPE) .append(" = ") - .append(hash_set_into_forge(self.all_types(), alloc)), + .append(hash_set_into_forge(self.all_types(), tcx, alloc)), alloc .text(name::CTRL) .append(" = ") .append(hash_set_into_forge( - self.controllers.keys().collect::>(), + self.description.controllers.keys().collect::>(), tcx, alloc, )), alloc .text(name::FUNCTION) .append(" = ") - .append(hash_set_into_forge(self.all_functions(), alloc)) + .append(hash_set_into_forge(self.description.all_functions(), tcx, alloc)) .append(" + ") .append(name::CTRL), alloc @@ -1072,6 +1072,7 @@ impl ProgramDescriptionExt for ProgramDescription { ] .into_iter() .collect::>(), + tcx, alloc, )), alloc.nil(), @@ -1143,14 +1144,14 @@ impl ProgramDescriptionExt for ProgramDescription { alloc.hardline() .append( alloc.forge_relation_with_arity(3, - self.annotations.iter() + self.description.annotations.iter() .flat_map(|(ident, (anns, _))| anns.iter().filter_map(Annotation::as_marker) .flat_map(|label| label.refinement.on_argument().into_iter_set_in_domain().map(|i| ( - std::iter::once(FormalParameter { position: i as u16, function: *ident }.build_forge(alloc)), - std::iter::once(ident.build_forge(alloc).append("->").append(label.marker.as_str()))))) + std::iter::once(FormalParameter { position: i as u16, function: *ident }.build_forge(tcx, alloc)), + std::iter::once(ident.build_forge(tcx, alloc).append("->").append(label.marker.as_str()))))) ) ) .indent(4) @@ -1162,7 +1163,7 @@ impl ProgramDescriptionExt for ProgramDescription { alloc.text(name::CTRL_CALLS).append(" = ").append( alloc.hardline().append( alloc.forge_relation_with_arity(2, - self.controllers.iter().map(|(ctrl, data)| { + self.description.controllers.iter().map(|(ctrl, data)| { let call_sites = data.ctrl_flow.0.iter().flat_map(|(from, to)| from.as_function_call().into_iter().chain(to) ).chain( @@ -1173,8 +1174,8 @@ impl ProgramDescriptionExt for ProgramDescription { ) ).collect::>(); - (std::iter::once(ctrl.build_forge(alloc)), - call_sites.into_iter().map(|cs| cs.build_forge(alloc)) + (std::iter::once(ctrl.build_forge(tcx, alloc)), + call_sites.into_iter().map(|cs| cs.build_forge(tcx, alloc)) ) }) ) diff --git a/crates/paralegal-flow/src/ir/regal.rs b/crates/paralegal-flow/src/ir/regal.rs index 8186f76cca..f1834af5db 100644 --- a/crates/paralegal-flow/src/ir/regal.rs +++ b/crates/paralegal-flow/src/ir/regal.rs @@ -2,10 +2,8 @@ use flowistry::indexed::{ impls::{build_location_arg_domain, LocationOrArg}, IndexedDomain, }; -use rustc_utils::{ - mir::{borrowck_facts, control_dependencies::ControlDependencies}, - BodyExt, -}; +use paralegal_spdg::rustc_portable::DefId; +use rustc_utils::{mir::control_dependencies::ControlDependencies, BodyExt}; use crate::{ ana::{ @@ -15,13 +13,10 @@ use crate::{ }, hir::def_id::LocalDefId, mir::{self, BasicBlock, Field, HasLocalDecls, Location}, - rust::{ - rustc_ast, rustc_hir::BodyId, rustc_index::bit_set::HybridBitSet, - rustc_index::vec::IndexVec, - }, + rust::{rustc_ast, rustc_index::bit_set::HybridBitSet, rustc_index::vec::IndexVec}, utils::{ body_name_pls, dump_file_pls, time, write_sep, AsFnAndArgs, AsFnAndArgsErr, - DisplayViaDebug, FnResolution, IntoLocalDefId, + DisplayViaDebug, FnResolution, TyCtxtExt, }, DumpArgs, Either, HashMap, HashSet, TyCtxt, }; @@ -556,30 +551,29 @@ fn recursive_ctrl_deps< dependencies } -pub fn compute_from_body_id<'tcx>( +pub fn compute_from_def_id<'tcx>( dbg_opts: &DumpArgs, - body_id: BodyId, + def_id: DefId, tcx: TyCtxt<'tcx>, carries_marker: &InlineJudge<'tcx>, ) -> Body<'tcx, DisplayViaDebug> { - let local_def_id = body_id.into_local_def_id(tcx); - info!("Analyzing function {}", body_name_pls(tcx, body_id)); - let body_with_facts = - borrowck_facts::get_simplified_body_with_borrowck_facts(tcx, local_def_id); + let local_def_id = def_id.expect_local(); + info!("Analyzing function {}", body_name_pls(tcx, local_def_id)); + let body_with_facts = tcx.body_for_def_id(def_id).unwrap(); let body = body_with_facts.simplified_body(); - let flow = df::compute_flow_internal(tcx, body_id, body_with_facts, carries_marker); + let flow = df::compute_flow_internal(tcx, def_id, body_with_facts, carries_marker); if dbg_opts.dump_callee_mir() { mir::pretty::write_mir_fn( tcx, body, &mut |_, _| Ok(()), - &mut dump_file_pls(tcx, body_id, "mir").unwrap(), + &mut dump_file_pls(tcx, local_def_id, "mir").unwrap(), ) .unwrap(); } if dbg_opts.dump_dataflow_analysis_result() { use std::io::Write; - let states_out = &mut dump_file_pls(tcx, body_id, "df").unwrap(); + let states_out = &mut dump_file_pls(tcx, local_def_id, "df").unwrap(); for l in body.all_locations() { writeln!(states_out, "{l:?}: {}", flow.state_at(l)).unwrap(); } @@ -594,7 +588,7 @@ pub fn compute_from_body_id<'tcx>( ); let r = Body::construct(flow, equations, tcx, local_def_id, body_with_facts); if dbg_opts.dump_regal_ir() { - let mut out = dump_file_pls(tcx, body_id, "regal").unwrap(); + let mut out = dump_file_pls(tcx, local_def_id, "regal").unwrap(); use std::io::Write; write!(&mut out, "{}", r).unwrap(); } diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 4ec6a1620a..1756a6eb24 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -104,7 +104,10 @@ pub use paralegal_spdg as desc; pub use args::{AnalysisCtrl, Args, DumpArgs, ModelCtrl}; -use crate::{frg::ProgramDescriptionExt, utils::outfile_pls}; +use crate::{ + frg::{call_site_to_string, ForgeConverter}, + utils::outfile_pls, +}; pub use crate::marker_db::MarkerCtx; @@ -185,7 +188,8 @@ impl rustc_driver::Callbacks for Callbacks { .with_extension("ana.frg"); let mut outf = outfile_pls(&result_path)?; let doc_alloc = pretty::BoxAllocator; - let doc: DocBuilder<_, ()> = desc.serialize_forge(&doc_alloc, tcx, self.opts.modelctrl()); + let converter = ForgeConverter::new(desc, tcx); + let doc: DocBuilder<_, ()> = converter.serialize_forge(&doc_alloc, self.opts.modelctrl()); doc.render(100, &mut outf)?; let mut outf_2 = outfile_pls(self.opts.result_path())?; doc.render(100, &mut outf_2)?; @@ -193,7 +197,7 @@ impl rustc_driver::Callbacks for Callbacks { let info_path = compiler.build_output_filenames(compiler.session(), &[]) .with_extension("info.json"); let info = AdditionalInfo { - call_sites: desc.all_call_sites().into_iter().map(|cs| (cs.to_string(), cs.clone())).collect() + call_sites: converter.desc().all_call_sites().into_iter().map(|cs| (call_site_to_string(tcx, cs), cs.clone())).collect() }; serde_json::to_writer(outfile_pls(info_path)?, &info)?; @@ -201,7 +205,7 @@ impl rustc_driver::Callbacks for Callbacks { serde_json::to_writer(outfile_pls(info_path2)?, &info)?; warn!("Due to potential overwrite issues with --result-path (with multiple targets in a crate) outputs were written to {} and {}", self.opts.result_path().display(), &result_path.display()); - Ok::<_, std::io::Error>( + anyhow::Ok( if self.opts.abort_after_analysis() { rustc_driver::Compilation::Stop } else { diff --git a/crates/paralegal-flow/src/marker_db.rs b/crates/paralegal-flow/src/marker_db.rs index baa53e9e58..c246c08b5b 100644 --- a/crates/paralegal-flow/src/marker_db.rs +++ b/crates/paralegal-flow/src/marker_db.rs @@ -6,12 +6,12 @@ use crate::{ args::Args, consts, desc::{Annotation, MarkerAnnotation}, - hir, mir, ty, + mir, ty, utils::{ - AsFnAndArgs, FnResolution, GenericArgExt, IntoBodyId, IntoDefId, IntoHirId, MetaItemMatch, - TyCtxtExt, TyExt, + AsFnAndArgs, FnResolution, GenericArgExt, IntoDefId, IntoHirId, MetaItemMatch, TyCtxtExt, + TyExt, }, - BodyId, DefId, HashMap, LocalDefId, TyCtxt, + DefId, HashMap, LocalDefId, TyCtxt, }; use rustc_utils::cache::{Cache, CopyCache}; @@ -132,26 +132,32 @@ impl<'tcx> MarkerCtx<'tcx> { /// /// XXX Does not take into account reachable type markers pub fn marker_is_reachable(&self, def_id: DefId) -> bool { - self.is_marked(def_id) - || def_id.as_local().map_or(false, |ldid| { - force_into_body_id(self.tcx(), ldid).map_or(false, |body_id| { - self.has_transitive_reachable_markers(body_id) - }) - }) + self.is_marked(def_id) || self.has_transitive_reachable_markers(def_id) } /// Queries the transitive marker cache. - fn has_transitive_reachable_markers(&self, body_id: BodyId) -> bool { + fn has_transitive_reachable_markers(&self, def_id: DefId) -> bool { self.db() .marker_reachable_cache - .get_maybe_recursive(body_id, |_| self.compute_marker_reachable(body_id)) + .get_maybe_recursive(def_id, |_| self.compute_marker_reachable(def_id)) .unwrap_or(false) } /// If the transitive marker cache did not contain the answer, this is what /// computes it. - fn compute_marker_reachable(&self, body_id: BodyId) -> bool { - let body = self.tcx().body_for_body_id(body_id).simplified_body(); + fn compute_marker_reachable(&self, def_id: DefId) -> bool { + let body = match self.tcx().body_for_def_id(def_id) { + Ok(body) => body, + Err(e) => { + warn!( + "Marker reachability for {} was asked but is unknown ({})", + self.tcx().def_path_debug_str(def_id), + e + ); + return false; + } + } + .simplified_body(); body.basic_blocks .iter() .any(|bbdat| self.terminator_carries_marker(&body.local_decls, bbdat.terminator())) @@ -187,30 +193,23 @@ impl<'tcx> MarkerCtx<'tcx> { let tcx = self.tcx(); let hir = tcx.hir(); let id = def_id.force_into_hir_id(tcx); - let sink_matches = hir - .attrs(id) - .iter() - .filter_map(|a| { - a.match_extract(&consts::MARKER_MARKER, |i| { - Annotation::Marker(crate::ann_parse::ann_match_fn(i)) - }).or_else(|| - a.match_extract(&consts::LABEL_MARKER, |i| { - warn!("The `paralegal_flow::label` annotation is deprecated, use `paralegal_flow::marker` instead"); - Annotation::Marker(crate::ann_parse::ann_match_fn(i)) - }) - ) - .or_else(|| { - a.match_extract(&consts::OTYPE_MARKER, |i| { - Annotation::OType(crate::ann_parse::otype_ann_match(i, tcx)) - }) - }) - .or_else(|| { - a.match_extract(&consts::EXCEPTION_MARKER, |i| { - Annotation::Exception(crate::ann_parse::match_exception(i)) - }) - }) - }) - .collect::>(); + let mut sink_matches = vec![]; + for a in hir.attrs(id) { + if let Some(i) = a.match_get_ref(&consts::MARKER_MARKER) { + sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i))); + } else if let Some(i) = a.match_get_ref(&consts::LABEL_MARKER) { + warn!("The `paralegal_flow::label` annotation is deprecated, use `paralegal_flow::marker` instead"); + sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i))) + } else if let Some(i) = a.match_get_ref(&consts::OTYPE_MARKER) { + sink_matches.extend( + crate::ann_parse::otype_ann_match(i, tcx) + .into_iter() + .map(Annotation::OType), + ); + } else if let Some(i) = a.match_get_ref(&consts::EXCEPTION_MARKER) { + sink_matches.push(Annotation::Exception(crate::ann_parse::match_exception(i))); + } + } if sink_matches.is_empty() { return None; } @@ -219,6 +218,10 @@ impl<'tcx> MarkerCtx<'tcx> { } /// All the markers applied to this type and its subtypes. + /// + /// Returns `(ann, (ty, did))` tuples which are the marker annotation `ann`, + /// the specific type `ty` that it was applied to and the `did` [`Defid`] of + /// that type that was used to look up the annotations. pub fn all_type_markers<'a>( &'a self, ty: ty::Ty<'tcx>, @@ -245,22 +248,6 @@ impl<'tcx> MarkerCtx<'tcx> { } } -fn force_into_body_id(tcx: TyCtxt, defid: LocalDefId) -> Option { - defid.into_body_id(tcx).or_else(|| { - let kind = tcx.def_kind(defid); - let name = tcx.def_path_debug_str(defid.to_def_id()); - if matches!(kind, hir::def::DefKind::AssocFn) { - warn!( - "Inline elision and inling for associated functions is not yet implemented {}", - name - ); - None - } else { - panic!("Could not get a body id for {name}, def kind {kind:?}") - } - }) -} - /// We expect most local items won't have annotations. This structure is much /// smaller (8 bytes) than without the `Box` (24 Bytes). #[allow(clippy::box_collection)] @@ -274,7 +261,7 @@ struct MarkerDatabase<'tcx> { local_annotations: Cache, external_annotations: ExternalMarkers, /// Cache whether markers are reachable transitively. - marker_reachable_cache: CopyCache, + marker_reachable_cache: CopyCache, } impl<'tcx> MarkerDatabase<'tcx> { diff --git a/crates/paralegal-flow/src/serializers.rs b/crates/paralegal-flow/src/serializers.rs index 699cd78f5a..416c09d6f3 100644 --- a/crates/paralegal-flow/src/serializers.rs +++ b/crates/paralegal-flow/src/serializers.rs @@ -13,11 +13,11 @@ //! Some types (such as [`mir::Body`]) first have to be explicitly transformed //! into the respective proxy type. In the case of [`mir::Body`] this can be //! done with [`BodyProxy::from_body_with_normalize`] -use paralegal_spdg::Identifier; +use paralegal_spdg::{rustc_portable::DefId, Identifier}; use serde::Deserialize; use crate::{ - hir, mir, + mir, rust::TyCtxt, serde::{Serialize, Serializer}, utils::{extract_places, read_places_with_provenance, DfppBodyExt}, @@ -118,17 +118,6 @@ impl BodyProxy { } } -/// This exists because of serde's restrictions on how you derive serializers. -/// [`BodyIdProxy`] can be used to serialize a [`BodyId`](hir::BodyId) but if -/// the [`BodyId`](hir::BodyId) is used as e.g. a key in a map or in a vector it -/// does not dispatch to the remote impl on [`BodyIdProxy`]. Implementing the -/// serializers for the map or vector by hand is annoying so instead you can map -/// over the datastructure, wrap each [`BodyId`](hir::BodyId) in this proxy type -/// and then dispatch to the `serialize` impl for the reconstructed data -/// structure. -#[derive(Serialize, Deserialize)] -pub struct BodyIdProxy2(#[serde(with = "paralegal_spdg::rustc_proxies::BodyId")] pub hir::BodyId); - pub mod serde_map_via_vec { //! Serialize a [`HashMap`] by converting it to a [`Vec`], lifting //! restrictions on the types of permissible keys. @@ -180,10 +169,21 @@ pub mod serde_map_via_vec { } } +/// This exists because of serde's restrictions on how you derive serializers. +/// [`BodyIdProxy`] can be used to serialize a [`BodyId`](hir::BodyId) but if +/// the [`BodyId`](hir::BodyId) is used as e.g. a key in a map or in a vector it +/// does not dispatch to the remote impl on [`BodyIdProxy`]. Implementing the +/// serializers for the map or vector by hand is annoying so instead you can map +/// over the datastructure, wrap each [`BodyId`](hir::BodyId) in this proxy type +/// and then dispatch to the `serialize` impl for the reconstructed data +/// structure. +#[derive(Serialize, Deserialize)] +pub struct BodyIdProxy2(#[serde(with = "paralegal_spdg::rustc_proxies::DefId")] pub DefId); + /// A serializable version of [`mir::Body`]s, mapped to their [`hir::BodyId`] so /// that you can resolve the body belonging to a global location (see /// [`IsGlobalLocation::function`]). -pub struct Bodies(pub HashMap); +pub struct Bodies(pub HashMap); impl Serialize for Bodies { fn serialize(&self, serializer: S) -> Result diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 7ac324bda4..7e39fc2ef2 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -10,11 +10,13 @@ use crate::{ utils::outfile_pls, HashSet, Symbol, }; -use hir::BodyId; + +use paralegal_spdg::{rustc_portable::DefId, DefInfo}; use rustc_middle::mir; use either::Either; +use itertools::Itertools; use std::borrow::Cow; use std::io::prelude::*; use std::path::Path; @@ -513,7 +515,7 @@ impl G { ctrl_name: Identifier::new(s), } } - pub fn ctrl(&self) -> BodyId { + pub fn ctrl(&self) -> DefId { *self .body .0 @@ -522,7 +524,7 @@ impl G { .unwrap() } /// Get the `n`th argument for this `bid` body. - pub fn argument(&self, bid: BodyId, n: usize) -> mir::Location { + pub fn argument(&self, bid: DefId, n: usize) -> mir::Location { let body = &self.body.0[&bid]; body.1 .0 @@ -536,42 +538,43 @@ impl G { pub trait HasGraph<'g>: Sized + Copy { fn graph(self) -> &'g PreFrg; - fn function(self, name: &str) -> FnRef<'g> { + fn function(self, name: impl AsRef) -> FnRef<'g> { + let name = Identifier::new_intern(name.as_ref()); + let id = match self.graph().name_map[&name].as_slice() { + [one] => *one, + [] => panic!("Did not find name {name}"), + _ => panic!("Found too many function matching name {name}"), + }; FnRef { graph: self.graph(), - ident: Identifier::new_intern(name), + ident: id, } } + fn info_for(self, id: DefId) -> &'g DefInfo { + &self.graph().desc.def_info[&id] + } + fn ctrl(self, name: &str) -> CtrlRef<'g> { let ident = Identifier::new_intern(name); CtrlRef { graph: self.graph(), ident, - ctrl: &self.graph().0.controllers[&self.ctrl_hashed(name)], + ctrl: &self.graph().desc.controllers[&self.ctrl_hashed(name)], } } - fn ctrl_hashed(self, name: &str) -> Identifier { - match self - .graph() - .0 - .controllers - .iter() - .filter(|(id, _)| id.as_str().starts_with(name) && id.as_str().len() == name.len() + 7) - .map(|i| i.0) - .collect::>() - .as_slice() - { + fn ctrl_hashed(self, name: &str) -> DefId { + match self.graph().name_map[&Identifier::new_intern(name)].as_slice() { [] => panic!("Could not find controller '{name}'"), - [ctrl] => **ctrl, + [ctrl] => *ctrl, more => panic!("Too many matching controllers, found candidates: {more:?}"), } } fn has_marker(self, marker: &str) -> bool { let marker = Identifier::new_intern(marker); - self.graph().0.annotations.values().any(|v| { + self.graph().desc.annotations.values().any(|v| { v.0.iter() .filter_map(|a| a.as_marker()) .any(|m| m.marker == marker) @@ -579,11 +582,14 @@ pub trait HasGraph<'g>: Sized + Copy { } fn annotations(self) -> &'g AnnotationMap { - &self.graph().0.annotations + &self.graph().desc.annotations } } -pub struct PreFrg(pub ProgramDescription); +pub struct PreFrg { + pub desc: ProgramDescription, + pub name_map: crate::HashMap>, +} impl<'g> HasGraph<'g> for &'g PreFrg { fn graph(self) -> &'g PreFrg { @@ -594,16 +600,17 @@ impl<'g> HasGraph<'g> for &'g PreFrg { impl PreFrg { pub fn from_file_at(dir: &str) -> Self { use_rustc(|| { - Self( - serde_json::from_reader( - &mut std::fs::File::open(format!( - "{dir}/{}", - crate::consts::FLOW_GRAPH_OUT_NAME - )) + let desc: ProgramDescription = serde_json::from_reader( + &mut std::fs::File::open(format!("{dir}/{}", crate::consts::FLOW_GRAPH_OUT_NAME)) .unwrap(), - ) - .unwrap(), ) + .unwrap(); + let name_map = desc + .def_info + .iter() + .map(|(def_id, info)| (info.name, *def_id)) + .into_group_map(); + Self { desc, name_map } }) } } @@ -679,12 +686,7 @@ impl<'g> CtrlRef<'g> { ctrl: Cow::Borrowed(self), }), ) - .filter(|ref_| { - ref_.call_site - .function - .as_str() - .starts_with(ref_.function.ident.as_str()) - }) + .filter(|ref_| ref_.call_site.function == fun.ident) .collect(); all.dedup_by_key(|r| r.call_site); all @@ -695,7 +697,7 @@ impl<'g> CtrlRef<'g> { assert!( cs.len() == 1, "expected only one call site for {}, found {}", - fun.ident, + fun.info_for(fun.ident).name, cs.len() ); cs.pop().unwrap() @@ -714,7 +716,7 @@ impl<'g> HasGraph<'g> for &FnRef<'g> { pub struct FnRef<'g> { graph: &'g PreFrg, - ident: Identifier, + ident: DefId, } impl<'g> FnRef<'g> { diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index c82b22e952..a2a1ee5825 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -1,6 +1,7 @@ //! Utility functions, general purpose structs and extension traits extern crate smallvec; +use thiserror::Error; use paralegal_spdg::{CallSite, DataSource}; use smallvec::SmallVec; @@ -58,16 +59,20 @@ pub trait MetaItemMatch { /// functions see the source for /// [`match_exception`](crate::ann_parse::match_exception) or /// [`ann_match_fn`](crate::ann_parse::ann_match_fn). - fn match_extract A>(&self, path: &[Symbol], parse: F) -> Option; + fn match_extract A>(&self, path: &[Symbol], parse: F) -> Option { + self.match_get_ref(path).map(parse) + } /// Check that this attribute matches the provided path. All attribute /// payload is ignored (i.e. no error if there is a payload). fn matches_path(&self, path: &[Symbol]) -> bool { - self.match_extract(path, |_| ()).is_some() + self.match_get_ref(path).is_some() } + + fn match_get_ref(&self, path: &[Symbol]) -> Option<&ast::AttrArgs>; } impl MetaItemMatch for ast::Attribute { - fn match_extract A>(&self, path: &[Symbol], parse: F) -> Option { + fn match_get_ref(&self, path: &[Symbol]) -> Option<&ast::AttrArgs> { match &self.kind { ast::AttrKind::Normal(normal) => match &normal.item { ast::AttrItem { @@ -81,7 +86,7 @@ impl MetaItemMatch for ast::Attribute { .zip(path) .all(|(seg, i)| seg.ident.name == *i) => { - Some(parse(args)) + Some(args) } _ => None, }, @@ -310,12 +315,17 @@ impl<'tcx> AsFnAndArgs<'tcx> for mir::Terminator<'tcx> { } } -#[derive(Debug)] +#[derive(Debug, Error)] pub enum AsFnAndArgsErr<'tcx> { + #[error("not a constant")] NotAConstant, + #[error("is not a function type: {0:?}")] NotFunctionType(ty::TyKind<'tcx>), + #[error("is not a `Val` constant: {0}")] NotValueLevelConstant(ty::Const<'tcx>), + #[error("terminator is not a `Call`")] NotAFunctionCall, + #[error("function instance could not be resolved")] InstanceResolutionErr, } @@ -475,9 +485,7 @@ impl<'hir> NodeExt<'hir> for hir::Node<'hir> { } } -/// Old version of [`places_read`](../fn.places_read.html) and -/// [`places_read_with_provenance`](../fn.places_read_with_provenance.html). -/// Should be considered deprecated. +/// Old version of [`places_read`], should be considered deprecated. pub fn extract_places<'tcx>( l: mir::Location, body: &mir::Body<'tcx>, @@ -776,10 +784,37 @@ impl IntoHirId for LocalDefId { } } -/// Creates an `Identifier` for this `HirId` +/// Get a reasonable, but not guaranteed unique name for this item pub fn identifier_for_item(tcx: TyCtxt, did: D) -> Identifier { + // TODO Make a generic version instead of just copying `unique_identifier_for_item` let did = did.into_def_id(tcx); let get_parent = || identifier_for_item(tcx, tcx.parent(did)); + Identifier::new_intern( + &tcx.opt_item_name(did) + .map(|n| n.to_string()) + .or_else(|| { + use hir::def::DefKind::*; + match tcx.def_kind(did) { + OpaqueTy => Some("Opaque".to_string()), + Closure => Some(format!("{}_closure", get_parent())), + Generator => Some(format!("{}_generator", get_parent())), + _ => None, + } + }) + .unwrap_or_else(|| { + panic!( + "Could not name {} {:?}", + tcx.def_path_debug_str(did), + tcx.def_kind(did) + ) + }), + ) +} + +/// Creates an `Identifier` for this `HirId` +pub fn unique_identifier_for_item(tcx: TyCtxt, did: D) -> Identifier { + let did = did.into_def_id(tcx); + let get_parent = || unique_identifier_for_item(tcx, tcx.parent(did)); Identifier::new_intern(&format!( "{}_{}", tcx.opt_item_name(did) @@ -802,28 +837,49 @@ pub fn identifier_for_item(tcx: TyCtxt, did: D) -> I )) } +#[derive(Error, Debug)] +pub enum BodyResolutionError { + #[error("not a function-like object")] + /// The provided id did not refer to a function-like object. + NotAFunction, + #[error("body not available")] + /// The provided id refers to an external entity and we have no access to + /// its body + External, +} + /// Extension trait for [`TyCtxt`] pub trait TyCtxtExt<'tcx> { - /// Resolve this [`BodyId`] to its actual body. Returns + /// Resolve this [`DefId`] to a body. Returns /// [`BodyWithBorrowckFacts`](crate::rust::rustc_borrowck::BodyWithBorrowckFacts), /// because it internally uses flowistry's body resolution - /// ([`flowistry::mir::borrowck_facts::get_body_with_borrowck_facts`]) which + /// ([`rustc_utils::mir::borrowck_facts::get_body_with_borrowck_facts`]) which /// memoizes its results so this is actually a cheap query. - fn body_for_body_id( + /// + /// Returns `None` if the id does not refer to a function or if its body is + /// unavailable. + fn body_for_def_id( self, - b: BodyId, - ) -> &'tcx rustc_utils::mir::borrowck_facts::CachedSimplifedBodyWithFacts<'tcx>; + def_id: DefId, + ) -> Result< + &'tcx rustc_utils::mir::borrowck_facts::CachedSimplifedBodyWithFacts<'tcx>, + BodyResolutionError, + >; } impl<'tcx> TyCtxtExt<'tcx> for TyCtxt<'tcx> { - fn body_for_body_id( + fn body_for_def_id( self, - b: BodyId, - ) -> &'tcx rustc_utils::mir::borrowck_facts::CachedSimplifedBodyWithFacts<'tcx> { - rustc_utils::mir::borrowck_facts::get_simplified_body_with_borrowck_facts( - self, - self.hir().body_owner_def_id(b), - ) + def_id: DefId, + ) -> Result< + &'tcx rustc_utils::mir::borrowck_facts::CachedSimplifedBodyWithFacts<'tcx>, + BodyResolutionError, + > { + let def_id = def_id.as_local().ok_or(BodyResolutionError::External)?; + if !self.def_kind(def_id).is_fn_like() { + return Err(BodyResolutionError::NotAFunction); + } + Ok(rustc_utils::mir::borrowck_facts::get_simplified_body_with_borrowck_facts(self, def_id)) } } @@ -1028,14 +1084,14 @@ impl IntoBodyId for DefId { } pub trait CallSiteExt { - fn new(loc: &GlobalLocation, function: DefId, tcx: TyCtxt<'_>) -> Self; + fn new(loc: &GlobalLocation, function: DefId) -> Self; } impl CallSiteExt for CallSite { - fn new(location: &GlobalLocation, function: DefId, tcx: TyCtxt<'_>) -> Self { + fn new(location: &GlobalLocation, function: DefId) -> Self { Self { location: *location, - function: identifier_for_item(tcx, function), + function, } } } @@ -1056,18 +1112,18 @@ pub fn data_source_from_global_location bool>( DataSource::Argument(loc.outermost_location().statement_index - 1) } else { let terminator = - tcx.body_for_body_id(dep_fun) + tcx.body_for_def_id(dep_fun) + .unwrap() .simplified_body() .maybe_stmt_at(dep_loc) .unwrap_or_else(|e| - panic!("Could not convert {loc} to data source with body {}. is at root: {}, is real: {}. Reason: {e:?}", body_name_pls(tcx, dep_fun), loc.is_at_root(), is_real_location) + panic!("Could not convert {loc} to data source with body {}. is at root: {}, is real: {}. Reason: {e:?}", body_name_pls(tcx, dep_fun.expect_local()), loc.is_at_root(), is_real_location) ) .right() .expect("not a terminator"); DataSource::FunctionCall(CallSite::new( &loc, terminator.as_fn_and_args(tcx).unwrap().0, - tcx, )) } } diff --git a/crates/paralegal-flow/tests/marker_tests.rs b/crates/paralegal-flow/tests/marker_tests.rs index 1b3663ede1..52efbb0e15 100644 --- a/crates/paralegal-flow/tests/marker_tests.rs +++ b/crates/paralegal-flow/tests/marker_tests.rs @@ -25,5 +25,5 @@ define_test!(use_wrapper: ctrl -> { let cs = ctrl.call_site(&uwf); assert!(ctrl.types_for(¶legal_flow::desc::DataSource::FunctionCall(cs.call_site().clone())) .expect("Type not found on method") - .iter().any(|t| t.as_str().strip_prefix("Wrapper").is_some())) + .iter().any(|t| ctrl.graph().desc.def_info[t].name.as_str() == "Wrapper")) }); diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 891076cc62..850bc9f49e 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -5,7 +5,9 @@ use paralegal_spdg::{ MarkerAnnotation, MarkerRefinement, ProgramDescription, }; -use anyhow::{anyhow, ensure, Result}; +pub use paralegal_spdg::rustc_portable::DefId; + +use anyhow::{anyhow, bail, ensure, Result}; use indexical::ToIndex; use itertools::Itertools; @@ -18,8 +20,13 @@ pub use crate::diagnostics::DiagnosticMessage; /// User-defined PDG markers. pub type Marker = Identifier; -type MarkerIndex = HashMap>; -type FlowsTo = HashMap; +/// The type identifying a controller +pub type ControllerId = DefId; +/// The type identifying a function that is used in call sites. +pub type FunctionId = DefId; + +type MarkerIndex = HashMap>; +type FlowsTo = HashMap; /// Check the condition and emit a [`Context::error`] if it fails. #[macro_export] @@ -64,6 +71,7 @@ pub struct Context { desc: ProgramDescription, flows_to: FlowsTo, diagnostics: Diagnostics, + name_map: HashMap>, } impl Context { @@ -71,14 +79,68 @@ impl Context { /// /// This also precomputes some data structures like an index over markers. pub fn new(desc: ProgramDescription) -> Self { + let name_map = desc + .def_info + .iter() + .map(|(k, v)| (v.name, *k)) + .into_group_map(); Context { marker_to_ids: Self::build_index_on_markers(&desc), flows_to: Self::build_flows_to(&desc), desc, diagnostics: Default::default(), + name_map, + } + } + + /// Find a type, controller or function id by its name. + /// + /// Since many often share the same name this can fail with too many + /// candidates. To handle such cases use [`Self::find_by_path`] or + /// [`Self::find_all_by_name`]. + pub fn find_by_name(&self, name: impl AsRef) -> Result { + let name = name.as_ref(); + match self.find_all_by_name(name)? { + [one] => Ok(*one), + [] => bail!("Impossible, group cannot be empty ({name})"), + _other => bail!("Too many candidates for name '{name}'"), } } + /// Find all types, controllers and functions with this name. + pub fn find_all_by_name(&self, name: impl AsRef) -> Result<&[DefId]> { + let name = Identifier::new_intern(name.as_ref()); + self.name_map + .get(&name) + .ok_or_else(|| anyhow!("Did not know the name {name}")) + .map(Vec::as_slice) + } + + /// Find a type, controller or function with this path. + pub fn find_by_path(&self, path: impl AsRef<[Identifier]>) -> Result { + let slc = path.as_ref(); + let (name, path) = slc + .split_last() + .ok_or_else(|| anyhow!("Path must be at least of length 1"))?; + let matching = self + .name_map + .get(name) + .ok_or_else(|| anyhow!("Did not know the name {name}"))?; + for candidate in matching.iter() { + if self + .desc() + .def_info + .get(candidate) + .ok_or_else(|| anyhow!("Impossible"))? + .path + == path + { + return Ok(*candidate); + } + } + Err(anyhow!("Found no candidate matching the path.")) + } + /// Dispatch and drain all queued diagnostics, aborts the program if any of /// them demand failure. pub fn emit_diagnostics(&self, w: impl Write) -> Result<()> { @@ -122,7 +184,7 @@ impl Context { } /// Returns true if `src` has a data-flow to `sink` in the controller `ctrl_id` - pub fn flows_to(&self, ctrl_id: Identifier, src: &DataSource, sink: &DataSink) -> bool { + pub fn flows_to(&self, ctrl_id: ControllerId, src: &DataSource, sink: &DataSink) -> bool { let ctrl_flows = &self.flows_to[&ctrl_id]; ctrl_flows .flows_to @@ -134,7 +196,7 @@ impl Context { pub fn marked( &self, marker: Marker, - ) -> impl Iterator + '_ { + ) -> impl Iterator + '_ { self.marker_to_ids .get(&marker) .into_iter() @@ -180,7 +242,7 @@ impl Context { pub fn srcs_with_type<'a>( &self, c: &'a Ctrl, - t: Identifier, + t: DefId, ) -> impl Iterator + 'a { c.types .0 @@ -194,28 +256,18 @@ impl Context { } /// Returns all the [`Annotation::OType`]s for a controller `id`. - pub fn otypes(&self, id: Identifier) -> Vec { - let inner = || -> Option<_> { - self.desc() - .annotations - .get(&id)? - .0 - .iter() - .filter_map(|annot| match annot { - Annotation::OType(ids) => Some(ids.clone()), + pub fn otypes(&self, id: DefId) -> Vec { + self.desc() + .annotations + .get(&id) + .into_iter() + .flat_map(|(anns, _)| { + anns.iter().filter_map(|annot| match annot { + Annotation::OType(id) => Some(*id), _ => None, }) - .next() - }; - inner().unwrap_or_default() - } - - /// Returns true if `id` identifies a function with name `name`. - pub fn is_function(&self, id: Identifier, name: &str) -> bool { - match id.as_str().rsplit_once('_') { - Some((id_name, _)) => id_name == name, - None => false, - } + }) + .collect() } /// Enforce that on every path from the `starting_points` to `is_terminal` a @@ -231,7 +283,7 @@ impl Context { /// always return the same result for the same input. pub fn always_happens_before( &self, - ctrl: Identifier, + ctrl: ControllerId, starting_points: impl Iterator, mut is_checkpoint: impl FnMut(&DataSink) -> bool, mut is_terminal: impl FnMut(&DataSink) -> bool, @@ -248,7 +300,7 @@ impl Context { .desc() .controllers .get(&ctrl) - .ok_or_else(|| anyhow!("Controller {ctrl} not found"))? + .ok_or_else(|| anyhow!("Controller not found"))? .data_flow .0; @@ -300,6 +352,8 @@ pub struct AlwaysHappensBefore { } impl std::fmt::Display for AlwaysHappensBefore { + /// Format the results of this combinator, using the `def_info` to print + /// readable names instead of ids fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { num_reached, @@ -344,17 +398,15 @@ fn test_context() { let ctx = crate::test_utils::test_ctx(); let input = Marker::new_intern("input"); let sink = Marker::new_intern("sink"); - assert!(ctx - .marked(input) - .any(|(id, _)| id.as_str().starts_with("Foo"))); + assert!(ctx.marked(input).any(|(id, _)| ctx + .desc + .def_info + .get(id) + .map_or(false, |info| info.name.as_str().starts_with("Foo")))); let desc = ctx.desc(); - let controller = desc - .controllers - .keys() - .find(|id| ctx.is_function(**id, "controller")) - .unwrap(); - let ctrl = &desc.controllers[controller]; + let controller = ctx.find_by_name("controller").unwrap(); + let ctrl = &desc.controllers[&controller]; assert_eq!(ctx.marked_sinks(ctrl.data_sinks(), input).count(), 0); assert_eq!(ctx.marked_sinks(ctrl.data_sinks(), sink).count(), 2); @@ -394,7 +446,7 @@ fn test_happens_before() -> Result<()> { fn marked_sources( desc: &ProgramDescription, - ctrl_name: Identifier, + ctrl_name: DefId, marker: Marker, ) -> impl Iterator { desc.controllers[&ctrl_name] @@ -426,11 +478,7 @@ fn test_happens_before() -> Result<()> { }) } - let ctrl_name = *desc - .controllers - .keys() - .find(|id| ctx.is_function(**id, "happens_before_pass")) - .unwrap(); + let ctrl_name = ctx.find_by_name("happens_before_pass")?; let pass = ctx.always_happens_before( ctrl_name, @@ -442,11 +490,7 @@ fn test_happens_before() -> Result<()> { ensure!(pass.holds()); ensure!(!pass.is_vacuous(), "{pass}"); - let ctrl_name = *desc - .controllers - .keys() - .find(|id| ctx.is_function(**id, "happens_before_fail")) - .unwrap(); + let ctrl_name = ctx.find_by_name("happens_before_fail")?; let fail = ctx.always_happens_before( ctrl_name, diff --git a/crates/paralegal-policy/src/flows_to.rs b/crates/paralegal-policy/src/flows_to.rs index b45fa91ef7..a9aad9d22e 100644 --- a/crates/paralegal-policy/src/flows_to.rs +++ b/crates/paralegal-policy/src/flows_to.rs @@ -95,19 +95,18 @@ impl fmt::Debug for CtrlFlowsTo { #[test] fn test_flows_to() { + use paralegal_spdg::Identifier; let ctx = crate::test_utils::test_ctx(); - let controller = *ctx - .desc() - .controllers - .keys() - .find(|id| ctx.is_function(**id, "controller")) - .unwrap(); + let controller = ctx.find_by_name("controller").unwrap(); let src = DataSource::Argument(0); let get_sink = |name| { + let name = Identifier::new_intern(name); ctx.desc().controllers[&controller] .data_sinks() .find(|sink| match sink { - DataSink::Argument { function, .. } => ctx.is_function(function.function, name), + DataSink::Argument { function, .. } => { + ctx.desc().def_info[&function.function].name == name + } _ => false, }) .unwrap() diff --git a/crates/paralegal-policy/src/test_utils.rs b/crates/paralegal-policy/src/test_utils.rs index ac763c4305..f610dba7f6 100644 --- a/crates/paralegal-policy/src/test_utils.rs +++ b/crates/paralegal-policy/src/test_utils.rs @@ -7,7 +7,7 @@ static TEST_CTX: OnceLock = OnceLock::new(); pub fn test_ctx() -> &'static Context { TEST_CTX.get_or_init(|| { paralegal_flow::test_utils::run_paralegal_flow_with_flow_graph_dump("tests/test-crate"); - let desc = PreFrg::from_file_at("tests/test-crate").0; + let desc = PreFrg::from_file_at("tests/test-crate").desc; Context::new(desc) }) } diff --git a/crates/paralegal-spdg/src/global_location.rs b/crates/paralegal-spdg/src/global_location.rs index 83a9462c5d..7a3bfb5770 100644 --- a/crates/paralegal-spdg/src/global_location.rs +++ b/crates/paralegal-spdg/src/global_location.rs @@ -61,6 +61,7 @@ //! The innermost location is what you'd want to look up if you are wanting to //! see the actual statement or terminator that this location refers to. +#[cfg(feature = "rustc")] use crate::rustc_proxies; use internment::Intern; use serde::{Deserialize, Serialize}; @@ -73,15 +74,32 @@ use crate::rustc_portable::*; /// You will probably want to operate on the interned wrapper type [`GlobalLocation`]. #[derive(PartialEq, Eq, Hash, Debug, Clone, Copy, Serialize, Deserialize)] pub struct GlobalLocationS { - #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::BodyId"))] + #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] /// The id of the body in which this location is located. - pub function: BodyId, + pub function: DefId, #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::Location"))] /// The location itself pub location: Location, } +impl std::cmp::PartialOrd for GlobalLocationS { + /// Compares in order crate, index and location, returning the first value + /// that is not `Some(Ordering::Equal)`. + /// + /// The implementation is a bit inefficient, because we're always running + /// all comparisons and only return after, but it makes it much easier to + /// read and cleaner, plus we can use `Ordering::then` and know that it + /// handles the chaining correctly. (In fact the earlier version of this + /// comparison was buggy precisely because of this.) + fn partial_cmp(&self, other: &Self) -> Option { + let krate_cmp = self.function.krate.partial_cmp(&other.function.krate)?; + let index_cmp = self.function.index.partial_cmp(&other.function.index)?; + let location_cmp = self.location.partial_cmp(&other.location)?; + Some(krate_cmp.then(index_cmp).then(location_cmp)) + } +} + impl GlobalLocationS { pub fn relativize(self, location: GlobalLocation) -> GlobalLocation { let mut location = location.to_owned(); @@ -103,7 +121,7 @@ impl RawGlobalLocation { } /// Get the `function` field of the underlying location. - pub fn outermost_function(&self) -> BodyId { + pub fn outermost_function(&self) -> DefId { self.outermost().function } @@ -120,7 +138,7 @@ impl RawGlobalLocation { self.innermost().location } - pub fn innermost_function(&self) -> BodyId { + pub fn innermost_function(&self) -> DefId { self.innermost().function } @@ -151,16 +169,7 @@ impl RawGlobalLocation { impl PartialOrd for RawGlobalLocation { fn partial_cmp(&self, other: &Self) -> Option { - for (slf, othr) in self.as_slice().iter().zip(other.as_slice().iter()) { - if slf.function != othr.function { - return slf.function.hir_id.partial_cmp(&othr.function.hir_id); - } - if slf.location == othr.location { - return slf.location.partial_cmp(&othr.location); - } - } - - self.as_slice().len().partial_cmp(&other.as_slice().len()) + self.as_slice().partial_cmp(other.as_slice()) } } @@ -249,7 +258,7 @@ impl GlobalLocation { GlobalLocation(Intern::from_ref(path)) } - pub fn single(location: Location, function: BodyId) -> Self { + pub fn single(location: Location, function: DefId) -> Self { Self::into_intern(RawGlobalLocation(vec![GlobalLocationS { location, function, diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 73c1733666..2b20d4d7a5 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -32,14 +32,15 @@ use global_location::GlobalLocation; use indexical::define_index_type; use internment::Intern; use itertools::Itertools; +use rustc_portable::DefId; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::{borrow::Cow, fmt, hash::Hash, iter}; pub use crate::tiny_bitset::TinyBitSet; pub use std::collections::{HashMap, HashSet}; -pub type Endpoint = Identifier; -pub type TypeDescriptor = Identifier; +pub type Endpoint = DefId; +pub type TypeDescriptor = DefId; pub type Function = Identifier; /// Name of the file used for emitting the JSON serialized @@ -55,7 +56,7 @@ pub const FLOW_GRAPH_OUT_NAME: &str = "flow-graph.json"; #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize, strum::EnumIs)] pub enum Annotation { Marker(MarkerAnnotation), - OType(Vec), + OType(#[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] TypeDescriptor), Exception(ExceptionAnnotation), } @@ -69,9 +70,9 @@ impl Annotation { } /// If this is an [`Annotation::OType`], returns the underlying [`TypeDescriptor`]. - pub fn as_otype(&self) -> Option<&[TypeDescriptor]> { + pub fn as_otype(&self) -> Option { match self { - Annotation::OType(t) => Some(t), + Annotation::OType(t) => Some(*t), _ => None, } } @@ -196,16 +197,64 @@ impl ObjectType { } } -pub type AnnotationMap = HashMap, ObjectType)>; +pub type AnnotationMap = HashMap, ObjectType)>; + +#[cfg(feature = "rustc")] +mod ser_defid_map { + use serde::{Deserialize, Serialize}; + + use crate::rustc_proxies; + + #[derive(Serialize, Deserialize)] + struct Helper(#[serde(with = "rustc_proxies::DefId")] super::DefId); + + pub fn serialize( + map: &super::HashMap, + serializer: S, + ) -> Result { + map.iter() + .map(|(k, v)| (Helper(*k), v)) + .collect::>() + .serialize(serializer) + } + + pub fn deserialize<'de, D: serde::Deserializer<'de>, V: serde::Deserialize<'de>>( + deserializer: D, + ) -> Result, D::Error> { + Ok(Vec::deserialize(deserializer)? + .into_iter() + .map(|(Helper(k), v)| (k, v)) + .collect()) + } +} + +#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] +pub struct DefInfo { + pub name: Identifier, + pub path: Vec, + pub kind: DefKind, +} + +#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] +pub enum DefKind { + Function, + Type, +} /// The annotated program dependence graph. #[derive(Serialize, Deserialize, Debug)] pub struct ProgramDescription { + #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] /// Mapping from function names to dependencies within the function. pub controllers: HashMap, + #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] /// Mapping from objects to annotations on those objects. pub annotations: AnnotationMap, + + #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] + /// Metadata about the `DefId`s + pub def_info: HashMap, } impl ProgramDescription { @@ -227,7 +276,7 @@ impl ProgramDescription { /// Gather all [`DataSource`]s that are mentioned in this program description. /// /// Essentially just `self.controllers.flat_map(|c| c.keys())` - pub fn all_sources_with_ctrl(&self) -> HashSet<(Identifier, &DataSource)> { + pub fn all_sources_with_ctrl(&self) -> HashSet<(DefId, &DataSource)> { self.controllers .iter() .flat_map(|(name, c)| { @@ -284,15 +333,15 @@ impl ProgramDescription { /// Gather all function identifiers that are mentioned in this program description. /// /// Essentially just `self.all_call_sites().map(|cs| cs.function)` - pub fn all_functions(&self) -> HashSet<&Identifier> { + pub fn all_functions(&self) -> HashSet { self.all_call_sites() .into_iter() - .map(|cs| &cs.function) + .map(|cs| cs.function) .chain( self.annotations .iter() .filter(|f| f.1 .1.as_function().is_some()) - .map(|f| f.0), + .map(|f| *f.0), ) .collect() } @@ -335,6 +384,20 @@ impl std::fmt::Display for Identifier { #[derive(Debug)] pub struct Relation(pub HashMap>); +impl std::ops::Deref for Relation { + type Target = HashMap>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for Relation { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + impl Serialize for Relation { fn serialize(&self, serializer: S) -> Result where @@ -369,8 +432,9 @@ pub struct CallSite { /// The location of the call. pub location: GlobalLocation, - /// The name of the function being called. - pub function: Function, + #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] + /// The id of the function being called. + pub function: DefId, } /// Create a hash for this object that is no longer than six hex digits @@ -408,15 +472,15 @@ fn hash_pls(t: T) -> u64 { hasher.finish() } -impl std::string::ToString for CallSite { - fn to_string(&self) -> String { - format!( - "cs_{}_{}", - self.function.as_str(), - ShortHash::new(self.location), - ) - } -} +// impl std::string::ToString for CallSite { +// fn to_string(&self) -> String { +// format!( +// "cs_{}_{}", +// self.function.as_str(), +// ShortHash::new(self.location), +// ) +// } +// } /// A representation of something that can emit data into the flow. /// @@ -481,6 +545,37 @@ define_index_type! { pub type CtrlTypes = Relation; +#[cfg(feature = "rustc")] +mod ser_ctrl_types { + use serde::{Deserialize, Serialize}; + + use crate::rustc_proxies; + + #[derive(Serialize, Deserialize)] + struct Helper(#[serde(with = "rustc_proxies::DefId")] super::DefId); + + pub fn serialize( + map: &super::CtrlTypes, + serializer: S, + ) -> Result { + map.iter() + .map(|(k, v)| (k.clone(), v.iter().copied().map(Helper).collect::>())) + .collect::>() + .serialize(serializer) + } + + pub fn deserialize<'de, D: serde::Deserializer<'de>>( + deserializer: D, + ) -> Result { + Ok(crate::Relation( + Vec::deserialize(deserializer)? + .into_iter() + .map(|(k, v): (_, Vec<_>)| (k, v.into_iter().map(|Helper(id)| id).collect())) + .collect(), + )) + } +} + /// Dependencies within a controller function. #[derive(Serialize, Deserialize, Debug)] pub struct Ctrl { @@ -490,6 +585,7 @@ pub struct Ctrl { /// Transitive control dependencies between sources and call sites. pub ctrl_flow: Relation, + #[cfg_attr(feature = "rustc", serde(with = "ser_ctrl_types"))] /// Annotations on types in a controller. /// /// Only types that have annotations are present in this map, meaning diff --git a/crates/paralegal-spdg/src/rustc_impls.rs b/crates/paralegal-spdg/src/rustc_impls.rs index 2515f41754..00502f3c4f 100644 --- a/crates/paralegal-spdg/src/rustc_impls.rs +++ b/crates/paralegal-spdg/src/rustc_impls.rs @@ -54,6 +54,16 @@ pub fn def_index_as_u32(i: &def_id::DefIndex) -> u32 { i.as_u32() } +pub fn crate_num_as_u32(num: &hir::def_id::CrateNum) -> u32 { + (*num).into() +} + +impl From for hir::def_id::CrateNum { + fn from(value: CrateNum) -> Self { + hir::def_id::CrateNum::from_u32(value.private) + } +} + impl From for def_id::DefIndex { fn from(proxy: DefIndex) -> def_id::DefIndex { def_id::DefIndex::from_u32(proxy.private) diff --git a/crates/paralegal-spdg/src/rustc_portable.rs b/crates/paralegal-spdg/src/rustc_portable.rs index c3a0083375..044b424866 100644 --- a/crates/paralegal-spdg/src/rustc_portable.rs +++ b/crates/paralegal-spdg/src/rustc_portable.rs @@ -17,9 +17,18 @@ cfg_if::cfg_if! { if #[cfg(feature = "rustc")] { use crate::rustc::{hir, mir, def_id}; - pub use mir::{Location, BasicBlock}; - pub use hir::{BodyId, ItemLocalId, hir_id::OwnerId, HirId}; - pub use def_id::{DefIndex, LocalDefId}; + // We are redefining these here as a type alias instead of just `pub + // use`, because the latter requires of consumers of this library to use + // the `rustc_private` feature, whereas it doesn't with type aliases. + pub type Location = mir::Location; + pub type BasicBlock = mir::BasicBlock; + pub type BodyId = hir::BodyId; + pub type ItemLocalId = hir::ItemLocalId; + pub type OwnerId = hir::hir_id::OwnerId; + pub type HirId = hir::HirId; + pub type DefIndex = def_id::DefIndex; + pub type LocalDefId = def_id::LocalDefId; + pub type DefId = def_id::DefId; } else { pub use crate::rustc_proxies::*; } diff --git a/crates/paralegal-spdg/src/rustc_proxies.rs b/crates/paralegal-spdg/src/rustc_proxies.rs index dc902acca8..c1ccf5acfa 100644 --- a/crates/paralegal-spdg/src/rustc_proxies.rs +++ b/crates/paralegal-spdg/src/rustc_proxies.rs @@ -73,7 +73,10 @@ proxy_index! { ItemLocalId("hir::ItemLocalId") from "item_local_id_as_u32"; /// Proxy for `def_id::DefIndex` - DefIndex("def_id::DefIndex") from "def_index_as_u32" + DefIndex("def_id::DefIndex") from "def_index_as_u32"; + + /// Proxy for [`hir::def_id::CrateNum`] + CrateNum("hir::def_id::CrateNum") from "crate_num_as_u32" } proxy_struct! { @@ -104,6 +107,13 @@ proxy_struct! { BodyId("hir::BodyId") { hir_id: hir::HirId => HirId, "HirId" } + + #[derive(Ord, PartialOrd)] + /// Proxy for [`def_id::DefId`] + DefId("def_id::DefId") { + index: def_id::DefIndex => DefIndex, "DefIndex", + krate: hir::def_id::CrateNum => CrateNum, "CrateNum" + } } impl HirId { diff --git a/props/websubmit/src/main.rs b/props/websubmit/src/main.rs index cf62ea4590..0ada36288c 100644 --- a/props/websubmit/src/main.rs +++ b/props/websubmit/src/main.rs @@ -3,7 +3,7 @@ extern crate anyhow; use anyhow::{anyhow, Result}; use std::sync::Arc; -use paralegal_policy::{assert_error, paralegal_spdg::Identifier, Context, Marker}; +use paralegal_policy::{assert_error, Context, DefId, Marker}; pub struct DeletionProp { cx: Arc, @@ -14,7 +14,7 @@ impl DeletionProp { DeletionProp { cx } } - fn flows_to_store(&self, t: Identifier) -> bool { + fn flows_to_store(&self, t: DefId) -> bool { let stores = Marker::new_intern("stores"); for (c_id, c) in &self.cx.desc().controllers { @@ -36,7 +36,7 @@ impl DeletionProp { false } - fn flows_to_deletion(&self, t: Identifier) -> bool { + fn flows_to_deletion(&self, t: DefId) -> bool { let deletes = Marker::new_intern("deletes"); let mut ots = self.cx.otypes(t);