diff --git a/.gitignore b/.gitignore index b0609954a5..9d2daec0dc 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ target/ /rust-dfpp-patch flow-graph.json +flow-graph.stat.json # Local cargo configuration .cargo/config.toml diff --git a/Cargo.lock b/Cargo.lock index fd384380c2..431e6ca473 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -454,7 +454,7 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flowistry" version = "0.5.41" -source = "git+https://github.com/brownsys/flowistry?rev=08c4ad9587b3251a8f7c64aa60be31404e6e04c0#08c4ad9587b3251a8f7c64aa60be31404e6e04c0" +source = "git+https://github.com/brownsys/flowistry?rev=57627ed24802cb76c745118bbc46f83f402a1e88#57627ed24802cb76c745118bbc46f83f402a1e88" dependencies = [ "anyhow", "cfg-if", @@ -926,6 +926,7 @@ dependencies = [ "rustc_utils 0.7.4-nightly-2023-08-25 (registry+https://github.com/rust-lang/crates.io-index)", "serde", "serde_bare", + "serde_json", "serial_test", "simple_logger", "strum", @@ -939,6 +940,7 @@ name = "paralegal-policy" version = "0.1.0" dependencies = [ "anyhow", + "autocfg", "bitvec", "colored 1.9.4", "indexical", diff --git a/Cargo.toml b/Cargo.toml index b3ef519232..7b57d72319 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ rustc_plugin = "=0.7.4-nightly-2023-08-25" [workspace.dependencies.flowistry] # path = "../flowistry/crates/flowistry" git = "https://github.com/brownsys/flowistry" -rev = "08c4ad9587b3251a8f7c64aa60be31404e6e04c0" +rev = "57627ed24802cb76c745118bbc46f83f402a1e88" default-features = false diff --git a/Makefile.toml b/Makefile.toml index 9825cf046c..905c0ed142 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -5,7 +5,7 @@ # - `fix-all` tries to fix all the formatting and code style issues. This is # most likely the command you want to run. # - `check-all` runs all the formatting and code-style checks we also run in CI -# This performs all teh same operations as `fix-ll` but complains instead of +# This performs all the same operations as `fix-ll` but complains instead of # solving them. # - `pdg-tests` which runs our current complete test suite # @@ -20,7 +20,7 @@ default_to_workspace = false [tasks.install] command = "cargo" -args = ["install", "--locked", "--path", "crates/paralegal-flow"] +args = ["install", "--locked", "-f", "--path", "crates/paralegal-flow"] [tasks.ci-tests] description = "The suite of tests we run in CI" @@ -35,7 +35,7 @@ description = """\ Attempt to perform any formatting and code structure related fixes. Note: This uses the clippy-fix command under the hood. This can introduce -breaking changes and so this tscript only passes `--allow-staged` to the +breaking changes and so this script only passes `--allow-staged` to the fix command. What this means for you is that you need to stage (or commit) your changes in git for the fix command to work. This is a precaution that allows you to inspect, test and potentially reject clippy's changes before diff --git a/crates/flowistry_pdg_construction/src/async_support.rs b/crates/flowistry_pdg_construction/src/async_support.rs index 6f39fed871..a639839b51 100644 --- a/crates/flowistry_pdg_construction/src/async_support.rs +++ b/crates/flowistry_pdg_construction/src/async_support.rs @@ -138,9 +138,6 @@ fn match_pin_box_dyn_ty(lang_items: &rustc_hir::LanguageItems, t: ty::Ty) -> boo let ty::TyKind::Dynamic(pred, _, ty::DynKind::Dyn) = t_a.boxed_ty().kind() else { return false; }; - if pred.len() != 2 { - return false; - } pred.iter().any(|p| { let ty::ExistentialPredicate::Trait(t) = p.skip_binder() else { return false; diff --git a/crates/flowistry_pdg_construction/src/body_cache.rs b/crates/flowistry_pdg_construction/src/body_cache.rs index 65acc36a14..bff59cb0ba 100644 --- a/crates/flowistry_pdg_construction/src/body_cache.rs +++ b/crates/flowistry_pdg_construction/src/body_cache.rs @@ -1,11 +1,16 @@ -use std::path::PathBuf; +use std::{ + cell::RefCell, + path::PathBuf, + time::{Duration, Instant}, +}; use flowistry::mir::FlowistryInput; use polonius_engine::FactTypes; use rustc_borrowck::consumers::{ConsumerOptions, RustcFacts}; +use rustc_hash::FxHashMap; use rustc_hir::{ - def_id::{CrateNum, DefId, LocalDefId, LOCAL_CRATE}, + def_id::{CrateNum, DefId, DefIndex, LocalDefId, LOCAL_CRATE}, intravisit::{self}, }; use rustc_macros::{Decodable, Encodable, TyDecodable, TyEncodable}; @@ -36,7 +41,7 @@ impl<'tcx> CachedBody<'tcx> { let mut body_with_facts = rustc_borrowck::consumers::get_body_with_borrowck_facts( tcx, local_def_id, - ConsumerOptions::PoloniusInputFacts, + ConsumerOptions::RegionInferenceContext, ); clean_undecodable_data_from_body(&mut body_with_facts.body); @@ -44,25 +49,32 @@ impl<'tcx> CachedBody<'tcx> { Self { body: body_with_facts.body, input_facts: FlowistryFacts { - subset_base: body_with_facts.input_facts.unwrap().subset_base, + subset_base: body_with_facts + .region_inference_context + .outlives_constraints() + .map(|constraint| (constraint.sup, constraint.sub)) + .collect(), }, } } } -impl<'tcx> FlowistryInput<'tcx> for &'tcx CachedBody<'tcx> { +impl<'tcx> FlowistryInput<'tcx, 'tcx> for &'tcx CachedBody<'tcx> { fn body(self) -> &'tcx Body<'tcx> { &self.body } fn input_facts_subset_base( self, - ) -> &'tcx [( - ::Origin, - ::Origin, - ::Point, - )] { - &self.input_facts.subset_base + ) -> Box< + dyn Iterator< + Item = ( + ::Origin, + ::Origin, + ), + > + 'tcx, + > { + Box::new(self.input_facts.subset_base.iter().copied()) } } @@ -73,19 +85,22 @@ pub struct FlowistryFacts { pub subset_base: Vec<( ::Origin, ::Origin, - ::Point, )>, } pub type LocationIndex = ::Point; +type BodyMap<'tcx> = FxHashMap>; + /// Allows loading bodies from previosly written artifacts. /// /// Ensure this cache outlives any flowistry analysis that is performed on the /// bodies it returns or risk UB. pub struct BodyCache<'tcx> { tcx: TyCtxt<'tcx>, - cache: Cache>, + cache: Cache>, + local_cache: Cache>, + timer: RefCell, } impl<'tcx> BodyCache<'tcx> { @@ -93,28 +108,47 @@ impl<'tcx> BodyCache<'tcx> { Self { tcx, cache: Default::default(), + local_cache: Default::default(), + timer: RefCell::new(Duration::ZERO), } } + pub fn timer(&self) -> Duration { + *self.timer.borrow() + } + /// Serve the body from the cache or read it from the disk. /// /// Returns `None` if the policy forbids loading from this crate. pub fn get(&self, key: DefId) -> &'tcx CachedBody<'tcx> { - let cbody = self.cache.get(key, |_| load_body_and_facts(self.tcx, key)); + let body = if let Some(local) = key.as_local() { + self.local_cache.get(local.local_def_index, |_| { + let start = Instant::now(); + let res = CachedBody::retrieve(self.tcx, local); + *self.timer.borrow_mut() += start.elapsed(); + res + }) + } else { + self.cache + .get(key.krate, |_| load_body_and_facts(self.tcx, key.krate)) + .get(&key.index) + .expect("Invariant broken, body for this is should exist") + }; + // SAFETY: Theoretically this struct may not outlive the body, but // to simplify lifetimes flowistry uses 'tcx anywhere. But if we // actually try to provide that we're risking race conditions // (because it needs global variables like MIR_BODIES). // // So until we fix flowistry's lifetimes this is good enough. - unsafe { std::mem::transmute(cbody) } + unsafe { std::mem::transmute(body) } } } /// A visitor to collect all bodies in the crate and write them to disk. struct DumpingVisitor<'tcx> { tcx: TyCtxt<'tcx>, - target_dir: PathBuf, + targets: Vec, } /// Some data in a [Body] is not cross-crate compatible. Usually because it @@ -150,20 +184,7 @@ impl<'tcx> intravisit::Visitor<'tcx> for DumpingVisitor<'tcx> { _: rustc_span::Span, local_def_id: rustc_hir::def_id::LocalDefId, ) { - let to_write = CachedBody::retrieve(self.tcx, local_def_id); - - let dir = &self.target_dir; - let path = dir.join( - self.tcx - .def_path(local_def_id.to_def_id()) - .to_filename_friendly_no_crate(), - ); - - if !dir.exists() { - std::fs::create_dir(dir).unwrap(); - } - - encode_to_file(self.tcx, path, &to_write); + self.targets.push(local_def_id); intravisit::walk_fn( self, @@ -179,14 +200,30 @@ impl<'tcx> intravisit::Visitor<'tcx> for DumpingVisitor<'tcx> { /// calculating the necessary borrowcheck facts to store for later points-to /// analysis. /// -/// Ensure this gets called early in the compiler before the unoptimmized mir +/// Ensure this gets called early in the compiler before the unoptimized mir /// bodies are stolen. -pub fn dump_mir_and_borrowck_facts(tcx: TyCtxt) { +pub fn dump_mir_and_borrowck_facts<'tcx>(tcx: TyCtxt<'tcx>) -> (Duration, Duration) { let mut vis = DumpingVisitor { tcx, - target_dir: intermediate_out_dir(tcx, INTERMEDIATE_ARTIFACT_EXT), + targets: vec![], }; tcx.hir().visit_all_item_likes_in_crate(&mut vis); + + let tc_start = Instant::now(); + let bodies: BodyMap<'tcx> = vis + .targets + .iter() + .map(|local_def_id| { + let to_write = CachedBody::retrieve(tcx, *local_def_id); + + (local_def_id.local_def_index, to_write) + }) + .collect(); + let tc_time = tc_start.elapsed(); + let dump_time = Instant::now(); + let path = intermediate_out_dir(tcx, INTERMEDIATE_ARTIFACT_EXT); + encode_to_file(tcx, path, &bodies); + (tc_time, dump_time.elapsed()) } const INTERMEDIATE_ARTIFACT_EXT: &str = "bwbf"; @@ -206,16 +243,18 @@ pub fn local_or_remote_paths(krate: CrateNum, tcx: TyCtxt, ext: &str) -> Vec, def_id: DefId) -> CachedBody<'_> { - let paths = local_or_remote_paths(def_id.krate, tcx, INTERMEDIATE_ARTIFACT_EXT); +fn load_body_and_facts(tcx: TyCtxt<'_>, krate: CrateNum) -> BodyMap<'_> { + let paths = local_or_remote_paths(krate, tcx, INTERMEDIATE_ARTIFACT_EXT); for path in &paths { - let path = path.join(tcx.def_path(def_id).to_filename_friendly_no_crate()); - if let Ok(data) = decode_from_file(tcx, path) { - return data; - }; + if !path.exists() { + continue; + } + + let data = decode_from_file(tcx, path).unwrap(); + return data; } - panic!("No facts for {def_id:?} found at any path tried: {paths:?}"); + panic!("No facts for {krate:?} found at any path tried: {paths:?}"); } /// Create the name of the file in which to store intermediate artifacts. diff --git a/crates/flowistry_pdg_construction/src/calling_convention.rs b/crates/flowistry_pdg_construction/src/calling_convention.rs index dbb121b753..db1b17cbbb 100644 --- a/crates/flowistry_pdg_construction/src/calling_convention.rs +++ b/crates/flowistry_pdg_construction/src/calling_convention.rs @@ -9,7 +9,11 @@ use rustc_middle::{ ty::TyCtxt, }; -use crate::{async_support::AsyncInfo, local_analysis::CallKind, utils}; +use crate::{ + async_support::AsyncInfo, + local_analysis::CallKind, + utils::{self, ShimType}, +}; /// Describes how the formal parameters of a given function call relate to the /// actual parameters. @@ -21,7 +25,7 @@ pub enum CallingConvention<'tcx> { /// tuple that contains the actual argument to the call of the closure /// function. Indirect { - once_shim: bool, + shim: Option, closure_arg: Operand<'tcx>, tupled_arguments: Operand<'tcx>, }, @@ -72,8 +76,8 @@ impl<'tcx> CallingConvention<'tcx> { match kind { CallKind::AsyncPoll(poll) => CallingConvention::Async(poll.generator_data), CallKind::Direct => CallingConvention::Direct(args.into()), - CallKind::Indirect { once_shim } => CallingConvention::Indirect { - once_shim: *once_shim, + CallKind::Indirect { shim: once_shim } => CallingConvention::Indirect { + shim: *once_shim, closure_arg: args[0].clone(), tupled_arguments: args[1].clone(), }, @@ -184,13 +188,24 @@ impl<'a, 'tcx> PlaceTranslator<'a, 'tcx> { // Map closure captures to the first argument. // Map formal parameters to the second argument. CallingConvention::Indirect { - once_shim, + shim, closure_arg, tupled_arguments, } => { - if child.local.as_usize() == 1 { + // Accounting fot FnPtrShim + // + // The shim gets an extra first argument (the function pointer) + // but we replace it with the function iself which doesn't have + // that argument, so we need to adjust the indices + let local = if matches!(shim, Some(ShimType::FnPtr)) && child.local != RETURN_PLACE + { + child.local + 1 + } else { + child.local + }; + if local.as_usize() == 1 { // Accounting for shims - let next_idx = if *once_shim { + let next_idx = if matches!(shim, Some(ShimType::Once)) { // If this is a once shim then the signature of the // function and its call don't match fully. (We are // calling a closure that takes it's `self` by reference @@ -214,7 +229,7 @@ impl<'a, 'tcx> PlaceTranslator<'a, 'tcx> { } else { let tuple_arg = tupled_arguments.place()?; let _projection = child.projection.to_vec(); - let field = FieldIdx::from_usize(child.local.as_usize() - 2); + let field = FieldIdx::from_usize(local.as_usize() - 2); let field_ty = tuple_arg .ty(self.parent_body, self.tcx) .field_ty(self.tcx, field); diff --git a/crates/flowistry_pdg_construction/src/construct.rs b/crates/flowistry_pdg_construction/src/construct.rs index 7a3bb7ae7e..b6986e6cc7 100644 --- a/crates/flowistry_pdg_construction/src/construct.rs +++ b/crates/flowistry_pdg_construction/src/construct.rs @@ -41,7 +41,7 @@ pub struct MemoPdgConstructor<'tcx> { pub(crate) call_change_callback: Option + 'tcx>>, pub(crate) dump_mir: bool, pub(crate) async_info: Rc, - pub(crate) pdg_cache: PdgCache<'tcx>, + pub pdg_cache: PdgCache<'tcx>, pub(crate) body_cache: Rc>, } @@ -429,6 +429,17 @@ impl<'tcx> PartialGraph<'tcx> { ); } } + self.edges.extend( + constructor + .find_control_inputs(location) + .into_iter() + .flat_map(|(ctrl_src, edge)| { + child_graph + .nodes + .iter() + .map(move |dest| (ctrl_src, *dest, edge)) + }), + ); self.nodes.extend(child_graph.nodes); self.edges.extend(child_graph.edges); true diff --git a/crates/flowistry_pdg_construction/src/encoder.rs b/crates/flowistry_pdg_construction/src/encoder.rs index 3aa1e80a01..c270ab88f1 100644 --- a/crates/flowistry_pdg_construction/src/encoder.rs +++ b/crates/flowistry_pdg_construction/src/encoder.rs @@ -23,6 +23,7 @@ use std::fs::File; use std::io::{self, Read}; use std::path::Path; +use std::rc::Rc; use std::{num::NonZeroU64, path::PathBuf}; use rustc_const_eval::interpret::AllocId; @@ -33,7 +34,9 @@ use rustc_serialize::{ opaque::{FileEncoder, MemDecoder}, Decodable, Decoder, Encodable, Encoder, }; -use rustc_span::{BytePos, FileName, RealFileName, Span, SpanData, SyntaxContext, DUMMY_SP}; +use rustc_span::{ + BytePos, FileName, RealFileName, SourceFile, Span, SpanData, SyntaxContext, DUMMY_SP, +}; use rustc_type_ir::{TyDecoder, TyEncoder}; macro_rules! encoder_methods { @@ -50,6 +53,7 @@ pub struct ParalegalEncoder<'tcx> { file_encoder: FileEncoder, type_shorthands: FxHashMap, usize>, predicate_shorthands: FxHashMap, usize>, + filepath_shorthands: FxHashMap, } impl<'tcx> ParalegalEncoder<'tcx> { @@ -60,6 +64,7 @@ impl<'tcx> ParalegalEncoder<'tcx> { file_encoder: FileEncoder::new(path).unwrap(), type_shorthands: Default::default(), predicate_shorthands: Default::default(), + filepath_shorthands: Default::default(), } } @@ -137,6 +142,7 @@ pub struct ParalegalDecoder<'tcx, 'a> { tcx: TyCtxt<'tcx>, mem_decoder: MemDecoder<'a>, shorthand_map: FxHashMap>, + file_shorthands: FxHashMap>, } impl<'tcx, 'a> ParalegalDecoder<'tcx, 'a> { @@ -146,6 +152,7 @@ impl<'tcx, 'a> ParalegalDecoder<'tcx, 'a> { tcx, mem_decoder: MemDecoder::new(buf, 0), shorthand_map: Default::default(), + file_shorthands: Default::default(), } } } @@ -285,7 +292,7 @@ impl<'tcx> Encodable> for SpanData { if let FileName::Real(RealFileName::Remapped { local_path, .. }) = &mut name { local_path.take(); } - name.encode(s); + s.encode_file_name(&name); let lo = self.lo - source_file.start_pos; let len = self.hi - self.lo; @@ -294,6 +301,81 @@ impl<'tcx> Encodable> for SpanData { } } +impl<'tcx> ParalegalEncoder<'tcx> { + fn encode_file_name(&mut self, n: &FileName) { + if let Some(&idx) = self.filepath_shorthands.get(n) { + TAG_ENCODE_REMOTE.encode(self); + idx.encode(self); + } else { + TAG_ENCODE_LOCAL.encode(self); + self.filepath_shorthands + .insert(n.clone(), self.file_encoder.position()); + n.encode(self); + } + } +} + +impl<'tcx, 'a> ParalegalDecoder<'tcx, 'a> { + fn decode_file_name(&mut self, crate_num: CrateNum) -> Rc { + let tag = u8::decode(self); + let pos = if tag == TAG_ENCODE_REMOTE { + let index = usize::decode(self); + if let Some(cached) = self.file_shorthands.get(&index) { + return cached.clone(); + } + Some(index) + } else if tag == TAG_ENCODE_LOCAL { + None + } else { + panic!("Unexpected tag value {tag}"); + }; + let (index, file) = if let Some(idx) = pos { + ( + idx, + self.with_position(idx, |slf| slf.decode_filename_local(crate_num)), + ) + } else { + (self.position(), self.decode_filename_local(crate_num)) + }; + + self.file_shorthands.insert(index, file.clone()); + file + } + + fn decode_filename_local(&mut self, crate_num: CrateNum) -> Rc { + let file_name = FileName::decode(self); + let source_map = self.tcx.sess.source_map(); + let matching_source_files = source_map + .files() + .iter() + .filter(|f| { + f.cnum == crate_num && (file_name == f.name || matches!((&file_name, &f.name), (FileName::Real(r), FileName::Real(other)) if { + let before = path_in_real_path(r); + let after = path_in_real_path(other); + after.ends_with(before) + })) + }) + .cloned() + .collect::>(); + match matching_source_files.as_ref() { + [sf] => sf.clone(), + [] => match &file_name { + FileName::Real(RealFileName::LocalPath(local)) if source_map.file_exists(local) => { + source_map.load_file(local).unwrap() + } + _ => panic!("Could not load file {}", file_name.prefer_local()), + }, + other => { + let names = other.iter().map(|f| &f.name).collect::>(); + panic!("Too many matching file names for {file_name:?}: {names:?}") + } + } + } +} + +const TAG_ENCODE_REMOTE: u8 = 0; +const TAG_ENCODE_LOCAL: u8 = 1; + /// For now this does nothing, simply dropping the context. We don't use it /// anyway in our errors. impl<'tcx> Encodable> for SyntaxContext { @@ -327,33 +409,7 @@ impl<'tcx, 'a> Decodable> for SpanData { } debug_assert_eq!(tag, TAG_VALID_SPAN_FULL); let crate_num = CrateNum::decode(d); - let file_name = FileName::decode(d); - let source_map = d.tcx.sess.source_map(); - let matching_source_files = source_map - .files() - .iter() - .filter(|f| { - f.cnum == crate_num && (file_name == f.name || matches!((&file_name, &f.name), (FileName::Real(r), FileName::Real(other)) if { - let before = path_in_real_path(r); - let after = path_in_real_path(other); - after.ends_with(before) - })) - }) - .cloned() - .collect::>(); - let source_file = match matching_source_files.as_ref() { - [sf] => sf.clone(), - [] => match &file_name { - FileName::Real(RealFileName::LocalPath(local)) if source_map.file_exists(local) => { - source_map.load_file(local).unwrap() - } - _ => panic!("Could not load file {}", file_name.prefer_local()), - }, - other => { - let names = other.iter().map(|f| &f.name).collect::>(); - panic!("Too many matching file names for {file_name:?}: {names:?}") - } - }; + let source_file = d.decode_file_name(crate_num); let lo = BytePos::decode(d); let len = BytePos::decode(d); let hi = lo + len; diff --git a/crates/flowistry_pdg_construction/src/local_analysis.rs b/crates/flowistry_pdg_construction/src/local_analysis.rs index 010c7a2595..9b0ccddf8e 100644 --- a/crates/flowistry_pdg_construction/src/local_analysis.rs +++ b/crates/flowistry_pdg_construction/src/local_analysis.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, collections::HashSet, iter, rc::Rc}; +use std::{borrow::Cow, collections::HashSet, fmt::Display, iter, rc::Rc}; use flowistry::mir::{placeinfo::PlaceInfo, FlowistryInput}; use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; @@ -14,10 +14,10 @@ use rustc_middle::{ visit::Visitor, AggregateKind, BasicBlock, Body, HasLocalDecls, Location, Operand, Place, PlaceElem, Rvalue, Statement, Terminator, TerminatorEdges, TerminatorKind, RETURN_PLACE, }, - ty::{GenericArgKind, GenericArgsRef, Instance, InstanceDef, TyCtxt, TyKind}, + ty::{GenericArgKind, GenericArgsRef, Instance, TyCtxt, TyKind}, }; use rustc_mir_dataflow::{self as df, fmt::DebugWithContext, Analysis}; -use rustc_span::Span; +use rustc_span::{DesugaringKind, Span}; use rustc_utils::{mir::control_dependencies::ControlDependencies, BodyExt, PlaceExt}; use crate::{ @@ -27,7 +27,7 @@ use crate::{ calling_convention::*, graph::{DepEdge, DepNode, PartialGraph, SourceUse, TargetUse}, mutation::{ModularMutationVisitor, Mutation, Time}, - utils::{self, is_async, is_virtual, try_monomorphize, type_as_fn}, + utils::{self, handle_shims, is_async, is_virtual, try_monomorphize, ShimType}, CallChangeCallback, CallChanges, CallInfo, InlineMissReason, MemoPdgConstructor, SkipCall, }; @@ -137,7 +137,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { let ctrl_loc = self.mono_body.terminator_loc(dep); let Terminator { kind: TerminatorKind::SwitchInt { discr, .. }, - .. + source_info, } = self.mono_body.basic_blocks[dep].terminator() else { if blocks_seen.insert(dep) { @@ -145,6 +145,15 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { } continue; }; + if matches!( + source_info.span.desugaring_kind(), + Some(DesugaringKind::Await) + ) { + // We are dealing with control flow that was introduced + // by the "await" state machine. We don't care about + // this sine it's possible semantic impact is negligible. + continue; + } let Some(ctrl_place) = discr.place() else { continue; }; @@ -414,26 +423,15 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { return None; }; - let call_kind = if matches!(resolved_fn.def, InstanceDef::ClosureOnceShim { .. }) { - // Rustc has inserted a call to the shim that maps `Fn` and `FnMut` - // instances to an `FnOnce`. This shim has no body itself so we - // can't just inline, we must explicitly simulate it's effects by - // changing the target function and by setting the calling - // convention to that of a shim. - - // Because this is a well defined internal item we can make - // assumptions about its generic arguments. - let Some((func_a, _rest)) = generic_args.split_first() else { - unreachable!() - }; - let Some((func_t, g)) = type_as_fn(self.tcx(), func_a.expect_ty()) else { - unreachable!() + let call_kind = + if let Some((instance, shim_type)) = handle_shims(resolved_fn, self.tcx(), param_env) { + resolved_fn = instance; + CallKind::Indirect { + shim: Some(shim_type), + } + } else { + self.classify_call_kind(called_def_id, resolved_fn, &args, span) }; - resolved_fn = Instance::expect_resolve(tcx, param_env, func_t, g); - CallKind::Indirect { once_shim: true } - } else { - self.classify_call_kind(called_def_id, resolved_fn, &args, span) - }; let resolved_def_id = resolved_fn.def_id(); if log_enabled!(Level::Trace) && called_def_id != resolved_def_id { let (called, resolved) = (self.fmt_fn(called_def_id), self.fmt_fn(resolved_def_id)); @@ -444,19 +442,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { return Some(CallHandling::ApproxAsyncSM(handler)); }; - trace!( - " Handling call! with kind {}", - match &call_kind { - CallKind::Direct => "direct", - CallKind::Indirect { once_shim } => - if *once_shim { - "indirect (once shim)" - } else { - "indirect" - }, - CallKind::AsyncPoll { .. } => "async poll", - } - ); + trace!(" Handling call! with kind {}", call_kind); // Recursively generate the PDG for the child function. let cache_key = resolved_fn; @@ -720,7 +706,7 @@ impl<'tcx, 'a> LocalAnalysis<'tcx, 'a> { fn try_indirect_call_kind(&self, def_id: DefId) -> Option> { self.tcx() .is_closure(def_id) - .then_some(CallKind::Indirect { once_shim: false }) + .then_some(CallKind::Indirect { shim: None }) } fn terminator_visitor<'b: 'a>( @@ -815,12 +801,24 @@ pub enum CallKind<'tcx> { Indirect { /// The call takes place via a shim that implements `FnOnce` for a `Fn` /// or `FnMut` closure. - once_shim: bool, + shim: Option, }, /// A poll to an async function, like `f.await`. AsyncPoll(AsyncFnPollEnv<'tcx>), } +impl Display for CallKind<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Direct => f.write_str("direct")?, + Self::AsyncPoll(_) => f.write_str("async poll")?, + Self::Indirect { shim: None } => f.write_str("indirect")?, + Self::Indirect { shim: Some(shim) } => write!(f, "({} shim)", shim.as_ref())?, + } + Ok(()) + } +} + #[derive(strum::AsRefStr)] pub(crate) enum CallHandling<'tcx, 'a> { ApproxAsyncFn, diff --git a/crates/flowistry_pdg_construction/src/utils.rs b/crates/flowistry_pdg_construction/src/utils.rs index 3312f3cfe9..38a12bb2c7 100644 --- a/crates/flowistry_pdg_construction/src/utils.rs +++ b/crates/flowistry_pdg_construction/src/utils.rs @@ -13,8 +13,8 @@ use rustc_middle::{ StatementKind, Terminator, TerminatorKind, }, ty::{ - AssocItemContainer, Binder, EarlyBinder, GenericArg, GenericArgsRef, Instance, List, - ParamEnv, Region, Ty, TyCtxt, TyKind, + AssocItemContainer, Binder, EarlyBinder, GenericArg, GenericArgsRef, Instance, InstanceDef, + List, ParamEnv, Region, Ty, TyCtxt, TyKind, }, }; use rustc_span::{ErrorGuaranteed, Span}; @@ -340,3 +340,49 @@ pub fn manufacture_substs_for( }); tcx.mk_args_from_iter(types) } + +#[derive(Clone, Copy, Debug, strum::AsRefStr)] +#[strum(serialize_all = "kebab-case")] +pub enum ShimType { + Once, + FnPtr, +} + +pub fn handle_shims<'tcx>( + resolved_fn: Instance<'tcx>, + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, +) -> Option<(Instance<'tcx>, ShimType)> { + match resolved_fn.def { + InstanceDef::ClosureOnceShim { .. } => { + // Rustc has inserted a call to the shim that maps `Fn` and `FnMut` + // instances to an `FnOnce`. This shim has no body itself so we + // can't just inline, we must explicitly simulate it's effects by + // changing the target function and by setting the calling + // convention to that of a shim. + + // Because this is a well defined internal item we can make + // assumptions about its generic arguments. + let Some((func_a, _rest)) = resolved_fn.args.split_first() else { + unreachable!() + }; + let Some((func_t, g)) = type_as_fn(tcx, func_a.expect_ty()) else { + unreachable!() + }; + let instance = Instance::expect_resolve(tcx, param_env, func_t, g); + Some((instance, ShimType::Once)) + } + InstanceDef::FnPtrShim { .. } => { + let Some((func_a, _rest)) = resolved_fn.args.split_first() else { + unreachable!() + }; + let Some((func_t, g)) = type_as_fn(tcx, func_a.expect_ty()) else { + unreachable!() + }; + let instance = Instance::expect_resolve(tcx, param_env, func_t, g); + + Some((instance, ShimType::FnPtr)) + } + _ => None, + } +} diff --git a/crates/paralegal-flow/Cargo.toml b/crates/paralegal-flow/Cargo.toml index 255f6079f9..fbae415946 100644 --- a/crates/paralegal-flow/Cargo.toml +++ b/crates/paralegal-flow/Cargo.toml @@ -46,6 +46,7 @@ anyhow = "1.0.72" thiserror = "1" serde_bare = "0.5.0" toml = "0.7" +serde_json = "1" #dot = "0.1" dot = { git = "https://github.com/JustusAdam/dot-rust", rev = "ff2b42ceda98c639c8ea3cbfc56b83d6e06e8106" } diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 059c80c2f6..b9d706da09 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -11,7 +11,7 @@ use flowistry_pdg_construction::{ body_cache::BodyCache, graph::{DepEdge, DepEdgeKind, DepGraph, DepNode}, is_async_trait_fn, match_async_trait_assign, - utils::{try_monomorphize, try_resolve_function, type_as_fn}, + utils::{handle_shims, try_monomorphize, try_resolve_function, type_as_fn}, }; use paralegal_spdg::{Node, SPDGStats}; @@ -71,11 +71,9 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { target: &'a FnToAnalyze, ) -> Result { let local_def_id = target.def_id; - let start = Instant::now(); - let (dep_graph, stats) = Self::create_flowistry_graph(generator, local_def_id)?; - generator - .stats - .record_timed(TimedStat::Flowistry, start.elapsed()); + let (dep_graph, stats) = generator.stats.measure(TimedStat::Flowistry, || { + Self::create_flowistry_graph(generator, local_def_id) + })?; if generator.opts.dbg().dump_flowistry_pdg() { dep_graph.generate_graphviz(format!( @@ -186,6 +184,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { else { return; }; + debug!("Assigning markers to {:?}", term.kind); let res = self.call_string_resolver.resolve(weight.at); let param_env = self.tcx().param_env(res.def_id()); let func = @@ -193,13 +192,22 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { .unwrap(); let (inst, args) = type_as_fn(self.tcx(), ty_of_const(func.constant().unwrap())).unwrap(); - let f = try_resolve_function( + let mres = try_resolve_function( self.tcx(), inst, self.tcx().param_env(leaf_loc.function), args, ) - .map_or(inst, |i| i.def_id()); + .map(|inst| handle_shims(inst, self.tcx(), param_env).map_or(inst, |t| t.0)); + + if mres.is_none() { + debug!("Could not resolve {inst:?} properly during marker assignment"); + } else { + debug!("Function monomorphized to {:?}", mres.unwrap().def_id()); + } + + let f = mres.map_or(inst, |i| i.def_id()); + self.known_def_ids.extend(Some(f)); // Question: Could a function with no input produce an diff --git a/crates/paralegal-flow/src/ana/inline_judge.rs b/crates/paralegal-flow/src/ana/inline_judge.rs index a6d8bac233..78964b6b96 100644 --- a/crates/paralegal-flow/src/ana/inline_judge.rs +++ b/crates/paralegal-flow/src/ana/inline_judge.rs @@ -1,4 +1,4 @@ -use std::rc::Rc; +use std::{fmt::Display, rc::Rc}; use flowistry_pdg_construction::{body_cache::BodyCache, CallInfo}; use paralegal_spdg::{utils::write_sep, Identifier}; @@ -42,6 +42,16 @@ pub enum InlineJudgement { AbstractViaType(&'static str), } +impl Display for InlineJudgement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_ref())?; + if let Self::AbstractViaType(reason) = self { + write!(f, "({reason})")?; + } + Ok(()) + } +} + impl<'tcx> InlineJudge<'tcx> { pub fn new(tcx: TyCtxt<'tcx>, body_cache: Rc>, opts: &'static Args) -> Self { let included_crate_names = opts @@ -172,8 +182,8 @@ impl<'tcx> SafetyChecker<'tcx> { fn err(&self, s: &str, span: Span) { let sess = self.tcx.sess; let msg = format!( - "the call is not safe to abstract as demanded by '{}', because of: {s}", - self.reason + "the call to {:?} is not safe to abstract as demanded by '{}', because of: {s}", + self.resolved, self.reason ); if self.emit_err { let mut diagnostic = sess.struct_span_err(span, msg); diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index a34b7ed6c5..56acd979da 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -11,23 +11,28 @@ use crate::{ discover::FnToAnalyze, stats::{Stats, TimedStat}, utils::*, - HashMap, HashSet, LogLevelConfig, MarkerCtx, + DumpStats, HashMap, HashSet, LogLevelConfig, MarkerCtx, INTERMEDIATE_STAT_EXT, }; -use std::{rc::Rc, time::Instant}; +use std::{fs::File, io::BufReader, rc::Rc, time::Instant}; use anyhow::Result; use either::Either; use flowistry::mir::FlowistryInput; use flowistry_pdg_construction::{ - body_cache::BodyCache, calling_convention::CallingConvention, CallChangeCallback, CallChanges, - CallInfo, InlineMissReason, MemoPdgConstructor, SkipCall, + body_cache::{local_or_remote_paths, BodyCache}, + calling_convention::CallingConvention, + utils::is_async, + CallChangeCallback, CallChanges, CallInfo, InlineMissReason, MemoPdgConstructor, SkipCall, }; use inline_judge::InlineJudgement; use itertools::Itertools; use petgraph::visit::GraphBase; -use rustc_hir::{self as hir, def, def_id::DefId}; +use rustc_hir::{ + self as hir, def, + def_id::{DefId, LOCAL_CRATE}, +}; use rustc_middle::{ mir::{Location, Operand}, ty::{Instance, ParamEnv, TyCtxt}, @@ -38,6 +43,7 @@ mod graph_converter; mod inline_judge; use graph_converter::GraphConverter; +use std::time::Duration; pub use self::inline_judge::InlineJudge; @@ -105,7 +111,10 @@ impl<'tcx> SPDGGenerator<'tcx> { /// other setup necessary for the flow graph creation. /// /// Should only be called after the visit. - pub fn analyze(&mut self, targets: Vec) -> Result { + pub fn analyze( + &mut self, + targets: Vec, + ) -> Result<(ProgramDescription, AnalyzerStats)> { if let LogLevelConfig::Targeted(s) = self.opts.direct_debug() { assert!( targets.iter().any(|target| target.name().as_str() == s), @@ -150,17 +159,17 @@ impl<'tcx> SPDGGenerator<'tcx> { controllers: HashMap, mut known_def_ids: HashSet, _targets: &[FnToAnalyze], - ) -> ProgramDescription { + ) -> (ProgramDescription, AnalyzerStats) { let tcx = self.tcx; let instruction_info = self.collect_instruction_info(&controllers); - let inlined_functions = instruction_info + let called_functions = instruction_info .keys() .map(|l| l.function) .collect::>(); - known_def_ids.extend(&inlined_functions); + known_def_ids.extend(&called_functions); let type_info = self.collect_type_info(); known_def_ids.extend(type_info.keys()); @@ -169,29 +178,140 @@ impl<'tcx> SPDGGenerator<'tcx> { .map(|id| (*id, def_info_for_item(*id, self.marker_ctx(), tcx))) .collect(); - let dedup_locs = 0; - let dedup_functions = 0; - let seen_locs = 0; - let seen_functions = 0; - type_info_sanity_check(&controllers, &type_info); - ProgramDescription { - type_info, - instruction_info, - controllers, - def_info, - marker_annotation_count: self - .marker_ctx() + + let (stats, analyzed_spans) = self.collect_stats_and_analyzed_spans(); + + ( + ProgramDescription { + type_info, + instruction_info, + controllers, + def_info, + analyzed_spans, + }, + stats, + ) + } + + fn collect_stats_and_analyzed_spans(&self) -> (AnalyzerStats, AnalyzedSpans) { + let tcx = self.tcx; + + // In this we don't have to filter out async functions. They are already + // not in it. For the top-level (target) an async function immediately + // redirects to its closure and never inserts the parent DefId into the + // map. For called async functions we skip inlining and therefore also + // do not insert the DefId into the map. + let inlined_functions = self + .pdg_constructor + .pdg_cache + .borrow() + .keys() + .map(|k| k.def.def_id()) + .collect::>(); + + let mut seen_locs = 0; + let mut seen_functions = 0; + let mut pdg_locs = 0; + let mut pdg_functions = 0; + + let mctx = self.marker_ctx(); + + let analyzed_functions = mctx + .functions_seen() + .into_iter() + .map(|f| f.def_id()) + // Async functions always show up twice, once as the function + // itself, once as the generator. Here we filter out one of those + // (the function) + .filter(|d| !is_async(tcx, *d)) + .filter(|f| !mctx.is_marked(f)) + // It's annoying I have to do this merge here, but what the marker + // context sees doesn't contain the targets and not just that but + // also if those targets are async, then their closures are also not + // contained and lastly if we're not doing adaptive depth then we + // need to use the inlined functions anyway so this is just easier. + .chain(inlined_functions.iter().copied()) + .collect::>(); + + let analyzed_spans = analyzed_functions + .into_iter() + .map(|f| { + let body = self.pdg_constructor.body_for_def_id(f); + let span = body_span(tcx, body.body()); + let pspan = src_loc_for_span(span, tcx); + assert!( + pspan.start.line <= pspan.end.line, + "Weird span for {f:?}: {pspan:?}. It was created from {span:?}" + ); + let l = pspan.line_len(); + assert!(l < 5000, "Span for {f:?} is {l} lines long ({span:?})"); + + seen_locs += pspan.line_len(); + seen_functions += 1; + + let handling = if inlined_functions.contains(&f) { + pdg_locs += pspan.line_len(); + pdg_functions += 1; + FunctionHandling::PDG + } else { + FunctionHandling::Elided + }; + + (f, (pspan, handling)) + }) + .collect::(); + + let prior_stats = self.get_prior_stats(); + + // A few notes on these stats. We calculate most of them here, because + // this is where we easily have access to the information. For example + // the included crates, the paths where the intermediate stats are + // located and the marker annotations. + // + // However some pieces of information we don't have and that is how long + // *this* run of the analyzer took in total and how long serialziation + // took, but we do want to add that to the stats. As a workaround we set + // "self_time" and "serialization_time" to 0 and "total_time" to the + // accumulated intermediate analyis time. Then, after the serialization + // and whatever teardown rustc wants to do is finished we set + // "self_time" and increment "total_time". See lib.rs for that. + let stats = AnalyzerStats { + marker_annotation_count: mctx .all_annotations() .filter_map(|m| m.1.either(Annotation::as_marker, Some)) .count() as u32, rustc_time: self.stats.get_timed(TimedStat::Rustc), - dedup_locs, - dedup_functions, + pdg_functions, + pdg_locs, seen_functions, seen_locs, - analyzed_spans: Default::default(), - } + self_time: Duration::ZERO, + dep_time: prior_stats.total_time, + tycheck_time: prior_stats.tycheck_time + self.pdg_constructor.body_cache().timer(), + dump_time: prior_stats.dump_time, + serialization_time: Duration::ZERO, + }; + + (stats, analyzed_spans) + } + + fn get_prior_stats(&self) -> DumpStats { + self.judge + .included_crates() + .iter() + .copied() + .filter(|c| *c != LOCAL_CRATE) + .map(|c| { + let paths = local_or_remote_paths(c, self.tcx, INTERMEDIATE_STAT_EXT); + + let path = paths.iter().find(|p| p.exists()).unwrap_or_else(|| { + panic!("No stats path found for included crate {c:?}, searched {paths:?}") + }); + let rdr = BufReader::new(File::open(path).unwrap()); + serde_json::from_reader(rdr).unwrap() + }) + .fold(DumpStats::zero(), |s, o| s.add(&o)) } /// Create an [`InstructionInfo`] record for each [`GlobalLocation`] @@ -267,7 +387,7 @@ impl<'tcx> SPDGGenerator<'tcx> { .filter(|(id, _)| def_kind_for_item(*id, self.tcx).is_type()) .into_grouping_map() .fold_with( - |id, _| (format!("{id:?}"), vec![], vec![]), + |id, _| (*id, vec![], vec![]), |mut desc, _, ann| { match ann { Either::Right(MarkerAnnotation { refinement, marker }) @@ -275,7 +395,7 @@ impl<'tcx> SPDGGenerator<'tcx> { refinement, marker, })) => { - assert!(refinement.on_self()); + assert!(refinement.on_self(), "Cannot refine a marker on a type (tried assigning refinement {refinement} to {:?})", desc.0); desc.2.push(*marker) } Either::Left(Annotation::OType(id)) => desc.1.push(*id), @@ -285,11 +405,11 @@ impl<'tcx> SPDGGenerator<'tcx> { }, ) .into_iter() - .map(|(k, (rendering, otypes, markers))| { + .map(|(k, (_, otypes, markers))| { ( k, TypeDescription { - rendering, + rendering: format!("{k:?}"), otypes: otypes.into(), markers, }, @@ -513,7 +633,7 @@ impl Stub { } }; CallingConvention::Indirect { - once_shim: false, + shim: None, closure_arg: clj.clone(), // This is incorrect, but we only support // non-argument closures at the moment so this @@ -531,7 +651,11 @@ impl<'tcx> CallChangeCallback<'tcx> for MyCallback<'tcx> { fn on_inline(&self, info: CallInfo<'tcx, '_>) -> CallChanges<'tcx> { let changes = CallChanges::default(); - let skip = match self.judge.should_inline(&info) { + let judgement = self.judge.should_inline(&info); + + debug!("Judgement for {:?}: {judgement}", info.callee.def_id(),); + + let skip = match judgement { InlineJudgement::AbstractViaType(_) => SkipCall::Skip, InlineJudgement::UseStub(model) => { if let Ok((instance, calling_convention)) = model.apply( diff --git a/crates/paralegal-flow/src/ann/db.rs b/crates/paralegal-flow/src/ann/db.rs index 440f7aff70..a31c3553c3 100644 --- a/crates/paralegal-flow/src/ann/db.rs +++ b/crates/paralegal-flow/src/ann/db.rs @@ -24,7 +24,7 @@ use flowistry_pdg_construction::{ body_cache::{local_or_remote_paths, BodyCache}, determine_async, encoder::ParalegalDecoder, - utils::{is_virtual, try_monomorphize, try_resolve_function}, + utils::{handle_shims, is_virtual, try_monomorphize, try_resolve_function}, }; use paralegal_spdg::Identifier; @@ -43,7 +43,7 @@ use rustc_utils::cache::Cache; use std::{borrow::Cow, fs::File, io::Read, rc::Rc}; -use super::{MarkerMeta, MARKER_META_EXT}; +use super::{MarkerMeta, MarkerRefinement, MARKER_META_EXT}; type ExternalMarkers = HashMap>; @@ -241,6 +241,11 @@ impl<'tcx> MarkerCtx<'tcx> { return self.get_reachable_markers(async_fn).into(); } let expect_resolve = res.is_monomorphized(); + let variable_markers = mono_body + .local_decls + .iter() + .flat_map(|v| self.deep_type_markers(v.ty)) + .map(|(_, m)| *m); mono_body .basic_blocks .iter() @@ -251,6 +256,7 @@ impl<'tcx> MarkerCtx<'tcx> { expect_resolve, ) }) + .chain(variable_markers) .collect::>() .into_iter() .collect() @@ -289,7 +295,10 @@ impl<'tcx> MarkerCtx<'tcx> { ); return v.into_iter(); }; - MaybeMonomorphized::Monomorphized(instance) + MaybeMonomorphized::Monomorphized( + handle_shims(instance, self.tcx(), param_env) + .map_or(instance, |(shimmed, _)| shimmed), + ) } else { MaybeMonomorphized::Plain(def_id) }; @@ -639,11 +648,59 @@ fn load_annotations( .collect() } -type RawExternalMarkers = HashMap>; +#[derive(serde::Deserialize)] +struct ExternalAnnotationEntry { + marker: Option, + #[serde(default)] + markers: Vec, + #[serde(flatten)] + refinement: MarkerRefinement, + #[serde(default)] + refinements: Vec, +} + +impl ExternalAnnotationEntry { + fn flatten(&self) -> impl Iterator + '_ { + let refinement_iter = self + .refinements + .iter() + .chain(self.refinements.is_empty().then_some(&self.refinement)); + self.marker + .into_iter() + .chain(self.markers.iter().copied()) + .flat_map(move |marker| { + refinement_iter + .clone() + .map(move |refinement| MarkerAnnotation { + marker, + refinement: refinement.clone(), + }) + }) + } + + fn check_integrity(&self, tcx: TyCtxt, element: DefId) { + let merror = if self.marker.is_none() && self.markers.is_empty() { + Some("neither") + } else if self.marker.is_some() && !self.markers.is_empty() { + Some("both") + } else { + None + }; + if let Some(complaint) = merror { + tcx.sess.err(format!("External marker annotation should specify either a 'marker' or a 'markers' field, found {complaint} for {}", tcx.def_path_str(element))); + } + if !self.refinement.on_self() && !self.refinements.is_empty() { + tcx.sess.err(format!("External marker annotation should specify either a single refinement or the 'refinements' field, found both for {}", tcx.def_path_str(element))); + } + } +} + +type RawExternalMarkers = HashMap>; /// Given the TOML of external annotations we have parsed, resolve the paths /// (keys of the map) to [`DefId`]s. fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { + let relaxed = opts.relaxed(); if let Some(annotation_file) = opts.marker_control().external_annotations() { let from_toml: RawExternalMarkers = toml::from_str( &std::fs::read_to_string(annotation_file).unwrap_or_else(|_| { @@ -659,11 +716,16 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { .unwrap(); let new_map: ExternalMarkers = from_toml .iter() - .filter_map(|(path, marker)| { - Some(( - expect_resolve_string_to_def_id(tcx, path, opts.relaxed())?, - marker.clone(), - )) + .filter_map(|(path, entries)| { + let def_id = expect_resolve_string_to_def_id(tcx, path, relaxed)?; + let markers = entries + .iter() + .flat_map(|entry| { + entry.check_integrity(tcx, def_id); + entry.flatten() + }) + .collect(); + Some((def_id, markers)) }) .collect(); new_map diff --git a/crates/paralegal-flow/src/ann/mod.rs b/crates/paralegal-flow/src/ann/mod.rs index 0cc87f3288..fbb7013ee1 100644 --- a/crates/paralegal-flow/src/ann/mod.rs +++ b/crates/paralegal-flow/src/ann/mod.rs @@ -1,3 +1,5 @@ +use std::fmt::{Display, Write}; + use either::Either; use flowistry_pdg_construction::{body_cache::intermediate_out_dir, encoder::ParalegalEncoder}; use rustc_ast::Attribute; @@ -11,7 +13,9 @@ use rustc_middle::{hir::map::Map, ty::TyCtxt}; use rustc_serialize::Encodable; use serde::{Deserialize, Serialize}; -use paralegal_spdg::{rustc_proxies, tiny_bitset_pretty, Identifier, TinyBitSet, TypeId}; +use paralegal_spdg::{ + rustc_proxies, tiny_bitset_pretty, utils::write_sep, Identifier, TinyBitSet, TypeId, +}; pub mod db; pub mod parse; @@ -110,6 +114,28 @@ pub struct MarkerRefinement { on_return: bool, } +impl Display for MarkerRefinement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !self.on_argument.is_empty() { + f.write_str("argument = [")?; + write_sep( + f, + ", ", + self.on_argument.into_iter_set_in_domain(), + |elem, f| elem.fmt(f), + )?; + f.write_char(']')?; + if self.on_return { + f.write_str(" + ")?; + } + } + if self.on_return { + f.write_str("return")?; + } + Ok(()) + } +} + /// Disaggregated version of [`MarkerRefinement`]. Can be added to an existing /// refinement [`MarkerRefinement::merge_kind`]. #[derive(Clone, Deserialize, Serialize)] diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 72e121de31..c63616de9b 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -114,7 +114,7 @@ impl<'tcx> CollectingVisitor<'tcx> { /// Driver function. Performs the data collection via visit, then calls /// [`Self::analyze`] to construct the Forge friendly description of all /// endpoints. - pub fn run(mut self) -> Result { + pub fn run(mut self) -> Result<(ProgramDescription, AnalyzerStats)> { let tcx = self.tcx; tcx.hir().visit_all_item_likes_in_crate(&mut self); let targets = std::mem::take(&mut self.functions_to_analyze); diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 498c238dc8..44203d876c 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -47,15 +47,22 @@ extern crate rustc_type_ir; use ann::dump_markers; use args::{ClapArgs, Debugger, LogLevelConfig}; -use desc::{utils::write_sep, ProgramDescription}; +use desc::utils::write_sep; -use flowistry_pdg_construction::body_cache::dump_mir_and_borrowck_facts; +use flowistry_pdg_construction::body_cache::{dump_mir_and_borrowck_facts, intermediate_out_dir}; use log::Level; +use paralegal_spdg::{AnalyzerStats, STAT_FILE_EXT}; use rustc_middle::ty::TyCtxt; use rustc_plugin::CrateFilter; pub use std::collections::{HashMap, HashSet}; -use std::{fmt::Display, time::Instant}; +use std::{ + fmt::Display, + fs::File, + io::BufWriter, + path::PathBuf, + time::{Duration, Instant}, +}; // This import is sort of special because it comes from the private rustc // dependencies and not from our `Cargo.toml`. @@ -117,58 +124,105 @@ struct ArgWrapper { args: ClapArgs, } -struct Callbacks { +struct Callbacks<'a> { opts: &'static Args, - start: Instant, stats: Stats, + rustc_second_timer: Option, + stat_ref: &'a mut Option, + output_location: &'a mut Option, + time: &'a mut DumpStats, } struct NoopCallbacks; impl rustc_driver::Callbacks for NoopCallbacks {} -impl Callbacks { - pub fn run(&self, tcx: TyCtxt) -> anyhow::Result { - tcx.sess.abort_if_errors(); - discover::CollectingVisitor::new(tcx, self.opts, self.stats.clone()).run() - } - - pub fn new(opts: &'static Args) -> Self { +impl<'a> Callbacks<'a> { + pub fn new( + opts: &'static Args, + stat_ref: &'a mut Option, + output_location: &'a mut Option, + time: &'a mut DumpStats, + ) -> Self { Self { opts, stats: Default::default(), - start: Instant::now(), + rustc_second_timer: None, + stat_ref, + output_location, + time, + } + } +} + +#[derive(serde::Deserialize, serde::Serialize)] +struct DumpStats { + dump_time: Duration, + total_time: Duration, + tycheck_time: Duration, +} + +impl DumpStats { + fn zero() -> Self { + Self { + dump_time: Duration::ZERO, + total_time: Duration::ZERO, + tycheck_time: Duration::ZERO, + } + } + + fn add(&self, other: &Self) -> Self { + Self { + total_time: self.total_time + other.total_time, + dump_time: self.dump_time + other.dump_time, + tycheck_time: self.tycheck_time + other.tycheck_time, } } } -struct DumpOnlyCallbacks; +struct DumpOnlyCallbacks<'a> { + time: &'a mut DumpStats, + output_location: &'a mut Option, +} + +const INTERMEDIATE_STAT_EXT: &str = "stats.json"; -impl rustc_driver::Callbacks for DumpOnlyCallbacks { +fn dump_mir_and_update_stats(tcx: TyCtxt, timer: &mut DumpStats) { + let (tycheck_time, dump_time) = dump_mir_and_borrowck_facts(tcx); + let dump_marker_start = Instant::now(); + dump_markers(tcx); + timer.dump_time = dump_marker_start.elapsed() + dump_time; + timer.tycheck_time = tycheck_time; +} + +impl<'a> rustc_driver::Callbacks for DumpOnlyCallbacks<'a> { fn after_expansion<'tcx>( &mut self, _compiler: &rustc_interface::interface::Compiler, queries: &'tcx rustc_interface::Queries<'tcx>, ) -> rustc_driver::Compilation { queries.global_ctxt().unwrap().enter(|tcx| { - dump_mir_and_borrowck_facts(tcx); - dump_markers(tcx); + dump_mir_and_update_stats(tcx, self.time); + assert!(self + .output_location + .replace(intermediate_out_dir(tcx, INTERMEDIATE_STAT_EXT)) + .is_none()); }); rustc_driver::Compilation::Continue } } -impl rustc_driver::Callbacks for Callbacks { +impl<'a> rustc_driver::Callbacks for Callbacks<'a> { fn after_expansion<'tcx>( &mut self, _compiler: &rustc_interface::interface::Compiler, queries: &'tcx rustc_interface::Queries<'tcx>, ) -> rustc_driver::Compilation { - queries.global_ctxt().unwrap().enter(|tcx| { - dump_mir_and_borrowck_facts(tcx); - dump_markers(tcx); - }); - rustc_driver::Compilation::Continue + self.stats + .record_timed(TimedStat::Rustc, self.stats.elapsed()); + let compilation = self.run_the_analyzer(queries); + self.rustc_second_timer = Some(Instant::now()); + compilation } // This used to run `after_parsing` but that now makes `tcx.crates()` empty @@ -180,15 +234,27 @@ impl rustc_driver::Callbacks for Callbacks { &mut self, _handler: &rustc_session::EarlyErrorHandler, _compiler: &rustc_interface::interface::Compiler, - queries: &'tcx rustc_interface::Queries<'tcx>, + _queries: &'tcx rustc_interface::Queries<'tcx>, ) -> rustc_driver::Compilation { self.stats - .record_timed(TimedStat::Rustc, self.start.elapsed()); - queries + .record_timed(TimedStat::Rustc, self.rustc_second_timer.unwrap().elapsed()); + rustc_driver::Compilation::Continue + } +} + +impl<'a> Callbacks<'a> { + fn run_the_analyzer<'tcx>( + &mut self, + queries: &'tcx rustc_interface::Queries<'tcx>, + ) -> rustc_driver::Compilation { + let abort = queries .global_ctxt() .unwrap() .enter(|tcx| { - let desc = self.run(tcx)?; + dump_markers(tcx); + tcx.sess.abort_if_errors(); + let (desc, mut stats) = + discover::CollectingVisitor::new(tcx, self.opts, self.stats.clone()).run()?; info!("All elems walked"); tcx.sess.abort_if_errors(); @@ -197,21 +263,34 @@ impl rustc_driver::Callbacks for Callbacks { paralegal_spdg::dot::dump(&desc, out).unwrap(); } - let ser = Instant::now(); - desc.canonical_write(self.opts.result_path()).unwrap(); - self.stats - .record_timed(TimedStat::Serialization, ser.elapsed()); + self.stats.measure(TimedStat::Serialization, || { + desc.canonical_write(self.opts.result_path()).unwrap() + }); + + stats.serialization_time = self.stats.get_timed(TimedStat::Serialization); println!("Analysis finished with timing: {}", self.stats); - anyhow::Ok(if self.opts.abort_after_analysis() { - debug!("Aborting"); - rustc_driver::Compilation::Stop - } else { - rustc_driver::Compilation::Continue - }) + assert!(self.stat_ref.replace(stats).is_none()); + + assert!(self + .output_location + .replace(intermediate_out_dir(tcx, INTERMEDIATE_STAT_EXT)) + .is_none()); + anyhow::Ok(self.opts.abort_after_analysis()) }) - .unwrap() + .unwrap(); + + if abort { + rustc_driver::Compilation::Stop + } else { + queries.global_ctxt().unwrap().enter(|tcx| { + self.stats.measure(TimedStat::MirEmission, || { + dump_mir_and_update_stats(tcx, self.time); + }) + }); + rustc_driver::Compilation::Continue + } } } @@ -238,14 +317,22 @@ fn add_to_rustflags(new: impl IntoIterator) -> Result<(), std::en Ok(()) } +#[derive(strum::AsRefStr, PartialEq, Eq)] enum CrateHandling { JustCompile, CompileAndDump, Analyze, } +struct CrateInfo { + name: Option, + handling: CrateHandling, + #[allow(dead_code)] + is_build_script: bool, +} + /// Also adds and additional features required by the Paralegal build config -fn how_to_handle_this_crate(plugin_args: &Args, compiler_args: &mut Vec) -> CrateHandling { +fn how_to_handle_this_crate(plugin_args: &Args, compiler_args: &mut Vec) -> CrateInfo { let crate_name = compiler_args .iter() .enumerate() @@ -265,8 +352,12 @@ fn how_to_handle_this_crate(plugin_args: &Args, compiler_args: &mut Vec) ); } - match &crate_name { - Some(krate) if krate == "build_script_build" => CrateHandling::JustCompile, + let is_build_script = matches!( + &crate_name, + Some(krate) if krate == "build_script_build" + ); + let handling = match &crate_name { + _ if is_build_script => CrateHandling::JustCompile, Some(krate) if matches!( plugin_args @@ -281,6 +372,12 @@ fn how_to_handle_this_crate(plugin_args: &Args, compiler_args: &mut Vec) CrateHandling::CompileAndDump } _ => CrateHandling::JustCompile, + }; + + CrateInfo { + name: crate_name, + handling, + is_build_script, } } @@ -385,48 +482,91 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { // `log::set_max_level`. //println!("compiling {compiler_args:?}"); - let mut callbacks = match how_to_handle_this_crate(&plugin_args, &mut compiler_args) { - CrateHandling::JustCompile => { - Box::new(NoopCallbacks) as Box - } - CrateHandling::CompileAndDump => { - compiler_args.extend(EXTRA_RUSTC_ARGS.iter().copied().map(ToString::to_string)); - Box::new(DumpOnlyCallbacks) - } - CrateHandling::Analyze => { - plugin_args.setup_logging(); - - let opts = Box::leak(Box::new(plugin_args)); - - const RERUN_VAR: &str = "RERUN_WITH_PROFILER"; - if let Ok(debugger) = std::env::var(RERUN_VAR) { - info!("Restarting with debugger '{debugger}'"); - let mut dsplit = debugger.split(' '); - let mut cmd = std::process::Command::new(dsplit.next().unwrap()); - cmd.args(dsplit) - .args(std::env::args()) - .env_remove(RERUN_VAR); - std::process::exit(cmd.status().unwrap().code().unwrap_or(0)); + let mut output_path_location = None; + let mut dump_stats = DumpStats::zero(); + let start = Instant::now(); + let mut stat_ref = None; + let out_path = plugin_args.result_path().to_owned(); + + let info = how_to_handle_this_crate(&plugin_args, &mut compiler_args); + println!( + "Handling crate {} as {}", + info.name.as_ref().map_or("unnamed", String::as_str), + info.handling.as_ref() + ); + let result = { + let mut callbacks = match info.handling { + CrateHandling::JustCompile => { + Box::new(NoopCallbacks) as Box } - - compiler_args.extend(EXTRA_RUSTC_ARGS.iter().copied().map(ToString::to_string)); - if opts.verbosity() >= Level::Debug { - compiler_args.push("-Ztrack-diagnostics".to_string()); + CrateHandling::CompileAndDump => { + compiler_args.extend(EXTRA_RUSTC_ARGS.iter().copied().map(ToString::to_string)); + Box::new(DumpOnlyCallbacks { + time: &mut dump_stats, + output_location: &mut output_path_location, + }) } - - if let Some(dbg) = opts.attach_to_debugger() { - dbg.attach() + CrateHandling::Analyze => { + plugin_args.setup_logging(); + + let opts = Box::leak(Box::new(plugin_args)); + + const RERUN_VAR: &str = "RERUN_WITH_PROFILER"; + if let Ok(debugger) = std::env::var(RERUN_VAR) { + info!("Restarting with debugger '{debugger}'"); + let mut dsplit = debugger.split(' '); + let mut cmd = std::process::Command::new(dsplit.next().unwrap()); + cmd.args(dsplit) + .args(std::env::args()) + .env_remove(RERUN_VAR); + std::process::exit(cmd.status().unwrap().code().unwrap_or(0)); + } + + compiler_args.extend(EXTRA_RUSTC_ARGS.iter().copied().map(ToString::to_string)); + if opts.verbosity() >= Level::Debug { + compiler_args.push("-Ztrack-diagnostics".to_string()); + } + + if let Some(dbg) = opts.attach_to_debugger() { + dbg.attach() + } + + debug!( + "Arguments: {}", + Print(|f| write_sep(f, " ", &compiler_args, Display::fmt)) + ); + Box::new(Callbacks::new( + opts, + &mut stat_ref, + &mut output_path_location, + &mut dump_stats, + )) } + }; - debug!( - "Arguments: {}", - Print(|f| write_sep(f, " ", &compiler_args, Display::fmt)) - ); - Box::new(Callbacks::new(opts)) - } + rustc_driver::RunCompiler::new(&compiler_args, callbacks.as_mut()).run() }; - - rustc_driver::RunCompiler::new(&compiler_args, callbacks.as_mut()).run() + if info.handling != CrateHandling::JustCompile { + let filepath = output_path_location + .take() + .expect("Output path should be set"); + dump_stats.total_time = start.elapsed(); + println!( + "Writing statistics for dependency to {}", + filepath.display() + ); + let out = BufWriter::new(File::create(filepath).unwrap()); + serde_json::to_writer(out, &dump_stats).unwrap(); + } + if info.handling == CrateHandling::Analyze { + let out = BufWriter::new(File::create(out_path.with_extension(STAT_FILE_EXT)).unwrap()); + let mut stat = stat_ref.take().expect("stats must have been set"); + let self_time = start.elapsed(); + // See ana/mod.rs for explanations as to these adjustments. + stat.self_time = self_time; + serde_json::to_writer(out, &stat).unwrap(); + } + result } } @@ -434,7 +574,6 @@ impl Debugger { fn attach(self) { use std::process::{id, Command}; use std::thread::sleep; - use std::time::Duration; match self { Debugger::CodeLldb => { diff --git a/crates/paralegal-flow/src/stats.rs b/crates/paralegal-flow/src/stats.rs index d88816bc74..a7ab4cd73b 100644 --- a/crates/paralegal-flow/src/stats.rs +++ b/crates/paralegal-flow/src/stats.rs @@ -1,7 +1,7 @@ use std::{ fmt::Display, sync::{Arc, Mutex}, - time::Duration, + time::{Duration, Instant}, }; use paralegal_spdg::utils::TruncatedHumanTime; @@ -20,11 +20,22 @@ pub enum TimedStat { Conversion, /// How long it took to serialize the SPDG Serialization, + /// How long it took to collect and dump the MIR + MirEmission, } -#[derive(Default)] struct StatsInner { timed: enum_map::EnumMap>, + started: Instant, +} + +impl Default for StatsInner { + fn default() -> Self { + Self { + started: std::time::Instant::now(), + timed: Default::default(), + } + } } impl StatsInner { @@ -48,6 +59,17 @@ impl Stats { pub fn get_timed(&self, stat: TimedStat) -> Duration { self.0.lock().unwrap().timed[stat].unwrap_or(Duration::ZERO) } + + pub fn elapsed(&self) -> Duration { + self.0.lock().unwrap().started.elapsed() + } + + pub fn measure(&self, stat: TimedStat, target: impl FnOnce() -> R) -> R { + let start = Instant::now(); + let r = target(); + self.record_timed(stat, start.elapsed()); + r + } } impl Default for Stats { @@ -64,6 +86,11 @@ impl Display for Stats { write!(f, "{}: {} ", s.as_ref(), TruncatedHumanTime::from(dur))?; } } + write!( + f, + "current runtime: {}", + TruncatedHumanTime::from(borrow.started.elapsed()) + )?; Ok(()) } } diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 91ce1b4e57..b512f6d7e4 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -4,13 +4,14 @@ extern crate rustc_hir as hir; extern crate rustc_middle; extern crate rustc_span; -use flowistry_pdg_construction::body_cache::dump_mir_and_borrowck_facts; use hir::def_id::DefId; use rustc_interface::interface; use crate::{ ann::dump_markers, desc::{Identifier, ProgramDescription}, + discover, + stats::Stats, utils::Print, HashSet, EXTRA_RUSTC_ARGS, }; @@ -22,7 +23,7 @@ use std::{ }; use paralegal_spdg::{ - traverse::{generic_flows_to, EdgeSelection}, + traverse::{generic_flows_to, generic_influencers, EdgeSelection}, utils::write_sep, DefInfo, EdgeInfo, Endpoint, Node, TypeId, SPDG, }; @@ -32,7 +33,6 @@ use itertools::Itertools; use petgraph::visit::{Control, Data, DfsEvent, EdgeRef, FilterEdge, GraphBase, IntoEdges}; use petgraph::visit::{IntoNeighbors, IntoNodeReferences}; use petgraph::visit::{NodeRef as _, Visitable}; -use petgraph::Direction; use std::path::Path; lazy_static! { @@ -189,6 +189,7 @@ macro_rules! define_flow_test_template { pub struct InlineTestBuilder { ctrl_name: String, input: String, + extra_args: Vec, } impl InlineTestBuilder { @@ -205,6 +206,7 @@ impl InlineTestBuilder { Self { input: input.into(), ctrl_name: "crate::main".into(), + extra_args: Default::default(), } } @@ -215,6 +217,11 @@ impl InlineTestBuilder { self } + pub fn with_extra_args(&mut self, args: impl IntoIterator) -> &mut Self { + self.extra_args.extend(args); + self + } + pub fn expect_fail_compile(&self) { let reached = Once::new(); let res = self.run(|_| reached.call_once(|| ())); @@ -241,11 +248,12 @@ impl InlineTestBuilder { args: crate::ClapArgs, } - let args = crate::Args::try_from( - TopLevelArgs::parse_from(["".into(), "--analyze".into(), self.ctrl_name.to_string()]) - .args, - ) - .unwrap(); + let args = ["".into(), "--analyze".into(), self.ctrl_name.to_string()] + .into_iter() + .chain(self.extra_args.iter().cloned()) + .collect::>(); + + let args = crate::Args::try_from(TopLevelArgs::parse_from(args).args).unwrap(); args.setup_logging(); @@ -253,11 +261,11 @@ impl InlineTestBuilder { .with_args(EXTRA_RUSTC_ARGS.iter().copied().map(ToOwned::to_owned)) .with_query_override(None) .compile(move |result| { - dump_mir_and_borrowck_facts(result.tcx); + let args: &'static _ = Box::leak(Box::new(args)); dump_markers(result.tcx); let tcx = result.tcx; - let memo = crate::Callbacks::new(Box::leak(Box::new(args))); - let pdg = memo.run(tcx).unwrap(); + let memo = discover::CollectingVisitor::new(tcx, args, Stats::default()); + let (pdg, _) = memo.run().unwrap(); let graph = PreFrg::from_description(pdg); f(graph) }) @@ -733,7 +741,7 @@ pub trait FlowsTo { /// All edges are data, except the last one. This is meant to convey /// a "direct" control flow influence. fn influences_ctrl(&self, other: &impl FlowsTo) -> bool { - influences_ctrl_impl(self, other, EdgeSelection::Both) + influences_ctrl_impl(self, other, EdgeSelection::Data) } /// A special case of a path between `self` and `other`. @@ -793,23 +801,19 @@ fn influences_ctrl_impl( return false; } - let nodes = other - .nodes() - .iter() - .flat_map(|n| { - slf.spdg() - .graph - .edges_directed(*n, Direction::Incoming) - .filter(|e| e.weight().kind.is_control()) - .map(|e| e.source()) - }) - .collect::>(); + let ctrl_influencing = generic_influencers( + &slf.spdg().graph, + other.nodes().iter().copied(), + EdgeSelection::Control, + ); + generic_flows_to( slf.nodes().iter().copied(), edge_selection, slf.spdg(), - nodes, + dbg!(ctrl_influencing), ) + .is_some() } fn is_neighbor_impl( @@ -853,6 +857,7 @@ fn flows_to_impl( slf.spdg(), other.nodes().iter().copied(), ) + .is_some() } fn always_happens_before_impl( diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 9e41a7d410..cf38d73e09 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -36,22 +36,51 @@ pub use print::*; /// sadly, the `Span` value attached to a body directly refers only to the /// `#[tracing::instrument]` macro call. This function instead reconstitutes the /// span from the collection of spans on each statement. -pub fn body_span(body: &mir::Body<'_>) -> RustSpan { - let combined = body - .basic_blocks - .iter() - .flat_map(|bbdat| { - bbdat - .statements - .iter() - .map(|s| s.source_info.span.source_callsite()) - .chain([bbdat.terminator().source_info.span]) +pub fn body_span<'tcx>(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) -> RustSpan { + let source_map = tcx.sess.source_map(); + let body_span = body.span; + + // get me an iterator over the (meaningful) spans in this body. We will use + // this twice, hence this labmda. + let mk_span_iter = || { + body.basic_blocks + .iter() + .flat_map(|bbdat| { + bbdat + .statements + .iter() + .map(|s| s.source_info.span) + .chain([bbdat.terminator().source_info.span]) + }) + // If macro try to go to the call + .map(|s| s.source_callsite()) + // Discard anything not meaningful + .filter(|s| !s.is_dummy() || !s.is_empty()) + }; + + // Probe into the contents of the function. If any span lies within the + // range of the function span, this is probably not a + // `#[tracing::instrument]` kind of expantion and we can just use the body span. + let can_use_body_span = mk_span_iter().all(|sp| sp.from_expansion() || body_span.contains(sp)); + + if can_use_body_span { + return body_span; + } + + // This is the slow path, where we need to combine multiple spans and check + // that they are in a reasonable range so we generally avoid this. + + let outer_source_file_idx = source_map.lookup_source_file_idx(body_span.data().lo); + + mk_span_iter() + // Here we get rid of any spans that don't lie in our source file. + // Apparently this can happen these days in the macro expansions??? + .filter(|span| { + let file_idx = source_map.lookup_source_file_idx(span.data().lo); + file_idx == outer_source_file_idx }) - .map(|s| s.source_callsite()) - .filter(|s| !s.is_dummy() || !s.is_empty()) .reduce(RustSpan::to) - .unwrap(); - combined + .unwrap_or(body_span) } /// This is meant as an extension trait for `ast::Attribute`. The main method of diff --git a/crates/paralegal-flow/tests/adaptive-inlining.rs b/crates/paralegal-flow/tests/adaptive-inlining.rs new file mode 100644 index 0000000000..83e27809c8 --- /dev/null +++ b/crates/paralegal-flow/tests/adaptive-inlining.rs @@ -0,0 +1,45 @@ +#![feature(rustc_private)] + +use paralegal_flow::test_utils::*; +use paralegal_spdg::Identifier; + +fn marking_function_found(ctrl: CtrlRef<'_>) { + let fns = ctrl.marked(Identifier::new_intern("source")); + assert!(!fns.is_empty()) +} + +#[test] +fn marking_closure_does_not_inline() { + InlineTestBuilder::new(stringify!( + #[paralegal_flow::marker(source, return)] + fn marking_function() -> usize {} + + fn call_the_closure(f: impl FnOnce() -> R) -> R { + f() + } + + fn main() { + call_the_closure(|| marking_function()); + } + )) + .with_extra_args(["--adaptive-depth".to_string()]) + .check_ctrl(marking_function_found); +} + +#[test] +fn marking_fn_ptr_does_not_inline() { + InlineTestBuilder::new(stringify!( + #[paralegal_flow::marker(source, return)] + fn marking_function() -> usize {} + + fn call_the_closure(f: impl FnOnce() -> R) -> R { + f() + } + + fn main() { + call_the_closure(marking_function); + } + )) + .with_extra_args(["--adaptive-depth".to_string()]) + .check_ctrl(marking_function_found); +} diff --git a/crates/paralegal-flow/tests/async-tests/Cargo.lock b/crates/paralegal-flow/tests/async-tests/Cargo.lock index 79096caab8..622b228382 100644 --- a/crates/paralegal-flow/tests/async-tests/Cargo.lock +++ b/crates/paralegal-flow/tests/async-tests/Cargo.lock @@ -6,7 +6,20 @@ version = 3 name = "async-tests" version = "0.1.0" dependencies = [ + "async-trait", "paralegal", + "tracing", +] + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -15,6 +28,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + [[package]] name = "paralegal" version = "0.1.0" @@ -24,6 +43,12 @@ dependencies = [ "quote", ] +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + [[package]] name = "proc-macro2" version = "1.0.78" @@ -42,6 +67,48 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "syn" +version = "2.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +dependencies = [ + "once_cell", +] + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/crates/paralegal-flow/tests/async-tests/Cargo.toml b/crates/paralegal-flow/tests/async-tests/Cargo.toml index 3b60e7ee29..12a2ea5a56 100644 --- a/crates/paralegal-flow/tests/async-tests/Cargo.toml +++ b/crates/paralegal-flow/tests/async-tests/Cargo.toml @@ -8,3 +8,5 @@ edition = "2021" [dependencies] #async-std = "1" paralegal = { path = "../../../paralegal" } +tracing = { version = "0.1", feaures = ["attributes"] } +async-trait = "0.1" diff --git a/crates/paralegal-flow/tests/async-tests/src/main.rs b/crates/paralegal-flow/tests/async-tests/src/main.rs index be9906a1be..15f5caa594 100644 --- a/crates/paralegal-flow/tests/async-tests/src/main.rs +++ b/crates/paralegal-flow/tests/async-tests/src/main.rs @@ -1,6 +1,8 @@ // extern crate async_std; // use async_std::prelude::*; +use async_trait::async_trait; + #[paralegal::marker(source)] fn get_user_data() -> UserData { return UserData { @@ -220,4 +222,119 @@ async fn async_return_from_async() -> usize { id_fun(some_input()).await } +#[paralegal::marker(is_check, return)] +async fn perform_check(data: usize) -> Result<(), ()> { + if data != 0 { + Ok(()) + } else { + Err(()) + } +} + +#[paralegal::marker(is_sensitive1, return)] +async fn sensitive_action1(data: usize) {} + +#[paralegal::marker(is_sensitive2, return)] +async fn sensitive_action2(data: usize) {} + +#[paralegal::analyze] +#[tracing::instrument(skip_all)] +async fn control_flow_overtaint_tracing(condition: bool, data: usize) -> Result<(), ()> { + if condition { + perform_check(data).await?; + + sensitive_action1(data).await; + } else { + sensitive_action2(data).await; + } + Ok(()) +} + +#[paralegal::marker(is_check, return)] +fn perform_check_unasync(data: usize) -> Result<(), ()> { + if data != 0 { + Ok(()) + } else { + Err(()) + } +} + +#[paralegal::marker(is_sensitive1, return)] +fn sensitive_action1_unasync(data: usize) {} + +#[paralegal::marker(is_sensitive2, return)] +fn sensitive_action2_unasync(data: usize) {} + +#[paralegal::analyze] +#[tracing::instrument(skip_all)] +fn control_flow_overtaint_tracing_unasync(condition: bool, data: usize) -> Result<(), ()> { + if condition { + perform_check_unasync(data)?; + + sensitive_action1_unasync(data); + } else { + sensitive_action2_unasync(data); + } + Ok(()) +} + +#[async_trait] +trait ControlFlowOvertaintAsyncTrait { + async fn control_flow_overtaint_async_trait( + &self, + condition: bool, + data: usize, + ) -> Result<(), ()>; +} + +struct ControlFlowOvertaintAsyncTraitTestStruct; + +#[async_trait] +impl ControlFlowOvertaintAsyncTrait for ControlFlowOvertaintAsyncTraitTestStruct { + #[paralegal::analyze] + async fn control_flow_overtaint_async_trait( + &self, + condition: bool, + data: usize, + ) -> Result<(), ()> { + if condition { + perform_check(data).await?; + + sensitive_action1(data).await; + } else { + sensitive_action2(data).await; + } + Ok(()) + } +} + +#[async_trait] +trait ControlFlowOvertaintAsyncTraitTracing { + async fn control_flow_overtaint_async_trait_tracing( + &self, + condition: bool, + data: usize, + ) -> Result<(), ()>; +} + +#[async_trait] +impl ControlFlowOvertaintAsyncTraitTracing for ControlFlowOvertaintAsyncTraitTestStruct { + #[paralegal::analyze] + #[tracing::instrument(skip_all)] + async fn control_flow_overtaint_async_trait_tracing( + &self, + condition: bool, + data: usize, + ) -> Result<(), ()> { + if condition { + perform_check(data).await?; + + sensitive_action1(data).await; + } else { + sensitive_action2(data).await; + } + Ok(()) + } +} + fn main() {} diff --git a/crates/paralegal-flow/tests/async_tests.rs b/crates/paralegal-flow/tests/async_tests.rs index 8bccdb96a1..cab577afb6 100644 --- a/crates/paralegal-flow/tests/async_tests.rs +++ b/crates/paralegal-flow/tests/async_tests.rs @@ -534,3 +534,64 @@ fn field_precision_at_future_consumption() { // assert!(sinks.flows_to_any(&ctrl.marked(Identifier::new_intern("sink")))); }) } + +fn control_flow_overtaint_check(ctrl: CtrlRef<'_>) { + let checks = ctrl.marked(Identifier::new_intern("is_check")); + let sensitive_1 = ctrl.marked(Identifier::new_intern("is_sensitive1")); + let sensitive_2 = ctrl.marked(Identifier::new_intern("is_sensitive2")); + + assert!(!checks.is_empty()); + assert!(!sensitive_1.is_empty()); + assert!(!sensitive_2.is_empty()); + + assert!(checks.influences_ctrl(&sensitive_1)); + assert!(!checks.influences_ctrl(&sensitive_2)); +} + +#[test] +fn control_flow_overtaint() { + InlineTestBuilder::new(stringify!( + #[paralegal_flow::marker(is_check, return)] + async fn perform_check(data: usize) -> Result<(), ()> { + if data != 0 { + Ok(()) + } else { + Err(()) + } + } + + #[paralegal_flow::marker(is_sensitive1, return)] + async fn sensitive_action1(data: usize) {} + + #[paralegal_flow::marker(is_sensitive2, return)] + async fn sensitive_action2(data: usize) {} + + async fn main(condition: bool, data: usize) -> Result<(), ()> { + if condition { + perform_check(data).await?; + + sensitive_action1(data).await; + } else { + sensitive_action2(data).await; + } + Ok(()) + } + )) + .check_ctrl(control_flow_overtaint_check); +} + +define_test!(control_flow_overtaint_tracing: graph -> { + control_flow_overtaint_check(graph) +}); + +define_test!(control_flow_overtaint_tracing_unasync: graph -> { + control_flow_overtaint_check(graph) +}); + +define_test!(control_flow_overtaint_async_trait: graph -> { + control_flow_overtaint_check(graph) +}); + +define_test!(control_flow_overtaint_async_trait_tracing: graph -> { + control_flow_overtaint_check(graph) +}); diff --git a/crates/paralegal-flow/tests/control-flow-tests/src/main.rs b/crates/paralegal-flow/tests/control-flow-tests/src/main.rs index 5b8d9ac83c..8180e2732b 100644 --- a/crates/paralegal-flow/tests/control-flow-tests/src/main.rs +++ b/crates/paralegal-flow/tests/control-flow-tests/src/main.rs @@ -36,16 +36,6 @@ fn process_nested_if() { } } } - -#[paralegal::analyze] -fn process_if_multiple_statements() { - let mut user_data = get_user_data(); - if check_user_data(&user_data) { - modify(&mut user_data); - send_user_data(&user_data); - } -} - #[paralegal::analyze] fn process_if_not_function_call() { let x = get_x(); diff --git a/crates/paralegal-flow/tests/control_flow_tests.rs b/crates/paralegal-flow/tests/control_flow_tests.rs index e74e54b516..913aa23b64 100644 --- a/crates/paralegal-flow/tests/control_flow_tests.rs +++ b/crates/paralegal-flow/tests/control_flow_tests.rs @@ -79,23 +79,60 @@ define_test!(process_nested_if : graph -> { assert!(check2.output().influences_next_control(&send.input())); }); -define_test!(process_if_multiple_statements : graph -> { - let get_fn = graph.function("get_user_data"); - let get = graph.call_site(&get_fn); - let check_fn = graph.function("check_user_data"); - let check = graph.call_site(&check_fn); - let modify_fn = graph.function("modify"); - let modify = graph.call_site(&modify_fn); - let send_fn = graph.function("send_user_data"); - let send = graph.call_site(&send_fn); - - assert!(get.output().flows_to_data(&check.input())); - assert!(get.output().flows_to_data(&modify.input())); - assert!(modify.output().flows_to_data(&send.input())); - assert!(check.output().influences_next_control(&modify.input())); - assert!(check.output().influences_next_control(&send.input())); - assert!(!modify.output().influences_next_control(&send.input())); -}); +#[test] +fn process_if_multiple_statements() { + InlineTestBuilder::new(stringify!( + #[paralegal_flow::marker(sensitive)] + struct UserData { + pub data: Vec, + } + #[paralegal_flow::marker{ sink, arguments = [0] }] + fn send_user_data(_user_data: &UserData) {} + + #[paralegal_flow::marker{checks, arguments = [0]}] + fn check_user_data(user_data: &UserData) -> bool { + for i in &user_data.data { + if i < &0 { + return false; + } + } + return true; + } + #[paralegal_flow::marker{ noinline, return }] + fn modify(_user_data: &mut UserData) {} + + #[paralegal_flow::marker{source, return}] + fn get_user_data() -> UserData { + return UserData { + data: vec![1, 2, 3], + }; + } + fn main() { + let mut user_data = get_user_data(); + if check_user_data(&user_data) { + modify(&mut user_data); + send_user_data(&user_data); + } + } + )) + .check_ctrl(|graph| { + let get_fn = graph.function("get_user_data"); + let get = graph.call_site(&get_fn); + let check_fn = graph.function("check_user_data"); + let check = graph.call_site(&check_fn); + let modify_fn = graph.function("modify"); + let modify = graph.call_site(&modify_fn); + let send_fn = graph.function("send_user_data"); + let send = graph.call_site(&send_fn); + + assert!(get.output().flows_to_data(&check.input())); + assert!(get.output().flows_to_data(&modify.input())); + assert!(modify.output().flows_to_data(&send.input())); + assert!(check.output().influences_next_control(&modify.input())); + assert!(check.output().influences_next_control(&send.input())); + assert!(!modify.output().influences_next_control(&send.input())); + }); +} define_test!(process_if_not_function_call : graph -> { let getx_fn = graph.function("get_x"); diff --git a/crates/paralegal-flow/tests/non_transitive_graph_tests.rs b/crates/paralegal-flow/tests/non_transitive_graph_tests.rs index 79b7e57ae3..82358b8f4c 100644 --- a/crates/paralegal-flow/tests/non_transitive_graph_tests.rs +++ b/crates/paralegal-flow/tests/non_transitive_graph_tests.rs @@ -209,7 +209,10 @@ define_test!(control_flow_tracking_for_non_fn_compound_conditions: graph -> { assert!(other_cond.output().influences_ctrl(&read.input())); }); -define_test!(control_flow_tracking_for_compound_cond_with_fun: graph -> { +define_test!(control_flow_tracking_for_compound_cond_with_fun + skip + "https://github.com/brownsys/paralegal/issues/168" + : graph -> { let cond_input_fn = graph.function("source"); let cond_input = graph.call_site(&cond_input_fn); let other_cond_fn = graph.function("input"); @@ -233,7 +236,10 @@ define_test!(control_flow_tracking_for_compound_cond_with_fun: graph -> { // assert!(!cond_input.output().is_neighbor_ctrl(&read.input())); }); -define_test!(and_desugaring_similar_pattern: graph -> { +define_test!(and_desugaring_similar_pattern + skip + "This was for testing async desugaring (https://github.com/brownsys/paralegal/issues/168) but actually this shouldn't work." + : graph -> { let cond_input_fn = graph.function("input"); let cond_input = graph.call_site(&cond_input_fn); let other_cond_fn = graph.function("source"); diff --git a/crates/paralegal-flow/tests/rebuttal-tests.rs b/crates/paralegal-flow/tests/rebuttal-tests.rs index ff21c489d1..7134d75acc 100644 --- a/crates/paralegal-flow/tests/rebuttal-tests.rs +++ b/crates/paralegal-flow/tests/rebuttal-tests.rs @@ -98,6 +98,11 @@ const EXISTENTIAL_MISS: &str = stringify!( #[test] fn plume_policy_exists_quantifier() { + let test_builder = |code| { + let mut builder = InlineTestBuilder::new(code); + builder.with_extra_args(["--relaxed".to_string()]); + builder + }; let policy = |expect_success, msg| { move |ctrl: CtrlRef<'_>| { let result = policy(ctrl, |srcs, snks| srcs.flows_to_data(snks)); @@ -110,14 +115,19 @@ fn plume_policy_exists_quantifier() { } } }; - InlineTestBuilder::new(SIMPLE_BUG).check_ctrl(policy(false, "simple bug")); - InlineTestBuilder::new(CORRECT).check_ctrl(policy(true, "correct")); - InlineTestBuilder::new(EXISTENTIAL_MISS).check_ctrl(policy(true, "existential fail")); - InlineTestBuilder::new(FORALL_FAIL).check_ctrl(policy(true, "Forall false-positive")); + test_builder(SIMPLE_BUG).check_ctrl(policy(false, "simple bug")); + test_builder(CORRECT).check_ctrl(policy(true, "correct")); + test_builder(EXISTENTIAL_MISS).check_ctrl(policy(true, "existential fail")); + test_builder(FORALL_FAIL).check_ctrl(policy(true, "Forall false-positive")); } #[test] fn plume_policy_forall_quantifier() { + let test_builder = |code| { + let mut builder = InlineTestBuilder::new(code); + builder.with_extra_args(["--relaxed".to_string()]); + builder + }; let policy = |expect_success, msg| { move |ctrl: CtrlRef<'_>| { let result = policy(ctrl, |srcs, snks| { @@ -132,8 +142,8 @@ fn plume_policy_forall_quantifier() { } } }; - InlineTestBuilder::new(SIMPLE_BUG).check_ctrl(policy(false, "bug")); - InlineTestBuilder::new(CORRECT).check_ctrl(policy(true, "correct")); - InlineTestBuilder::new(EXISTENTIAL_MISS).check_ctrl(policy(false, "existential bug")); - InlineTestBuilder::new(FORALL_FAIL).check_ctrl(policy(false, "forall false-positive")); + test_builder(SIMPLE_BUG).check_ctrl(policy(false, "bug")); + test_builder(CORRECT).check_ctrl(policy(true, "correct")); + test_builder(EXISTENTIAL_MISS).check_ctrl(policy(false, "existential bug")); + test_builder(FORALL_FAIL).check_ctrl(policy(false, "forall false-positive")); } diff --git a/crates/paralegal-flow/tests/stub-tests.rs b/crates/paralegal-flow/tests/stub-tests.rs index ab5577a225..6fd7b54cfe 100644 --- a/crates/paralegal-flow/tests/stub-tests.rs +++ b/crates/paralegal-flow/tests/stub-tests.rs @@ -23,7 +23,7 @@ macro_rules! define_test { }; } -define_test!(thread_spawn: graph -> { +fn check_source_pass_target(graph: CtrlRef<'_>) { let src = graph.marked(Identifier::new_intern("source")); let pass = graph.marked(Identifier::new_intern("pass")); let target = graph.marked(Identifier::new_intern("target")); @@ -34,19 +34,34 @@ define_test!(thread_spawn: graph -> { assert!(src.flows_to_data(&pass)); assert!(pass.flows_to_data(&target)); +} + +define_test!(thread_spawn: graph -> { + check_source_pass_target(graph); +}); + +define_test!(marked_thread_spawn: graph -> { + simple_source_target_flow(graph); }); define_test!(async_spawn: graph -> { - let src = graph.marked(Identifier::new_intern("source")); - let pass = graph.marked(Identifier::new_intern("pass")); - let target = graph.marked(Identifier::new_intern("target")); + check_source_pass_target(graph); +}); - assert!(!src.is_empty()); - assert!(!pass.is_empty()); - assert!(!target.is_empty()); +define_test!(marked_async_spawn: graph -> { + simple_source_target_flow(graph); +}); - assert!(src.flows_to_data(&pass)); - assert!(pass.flows_to_data(&target)); +define_test!(blocking_with_marker: graph -> { + simple_source_target_flow(graph); +}); + +define_test!(marked_blocking_like: graph -> { + simple_source_target_flow(graph); +}); + +define_test!(test_blocking_like: graph -> { + simple_source_target_flow(graph); }); fn simple_source_target_flow(graph: CtrlRef<'_>) { diff --git a/crates/paralegal-flow/tests/stub-tests/Cargo.lock b/crates/paralegal-flow/tests/stub-tests/Cargo.lock index 88de3e83e3..faa5700b6e 100644 --- a/crates/paralegal-flow/tests/stub-tests/Cargo.lock +++ b/crates/paralegal-flow/tests/stub-tests/Cargo.lock @@ -445,15 +445,6 @@ dependencies = [ "miniz_oxide 0.8.0", ] -[[package]] -name = "flow-model-tests" -version = "0.1.0" -dependencies = [ - "actix-web", - "paralegal", - "tokio", -] - [[package]] name = "fnv" version = "1.0.7" @@ -1021,6 +1012,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "stub-tests" +version = "0.1.0" +dependencies = [ + "actix-web", + "paralegal", + "tokio", +] + [[package]] name = "syn" version = "2.0.76" diff --git a/crates/paralegal-flow/tests/stub-tests/src/main.rs b/crates/paralegal-flow/tests/stub-tests/src/main.rs index edff823519..696bde9bc6 100644 --- a/crates/paralegal-flow/tests/stub-tests/src/main.rs +++ b/crates/paralegal-flow/tests/stub-tests/src/main.rs @@ -19,6 +19,49 @@ fn thread_spawn() { target(next); } +#[allow(dead_code)] +#[paralegal::analyze] +fn marked_thread_spawn() { + let next = std::thread::spawn(source).join().unwrap(); + target(next); +} + +#[allow(dead_code)] +#[paralegal::analyze] +fn marked_blocking_like(to_close_over: &str) { + let next = blocking_like(to_close_over, second_source); + target(next); +} + +fn contains_source(_: T) -> usize { + source() +} + +#[allow(dead_code)] +#[paralegal::analyze] +fn marked_blocking_like_2(to_close_over: &str) { + let next = blocking_like(to_close_over, contains_source); + target(next); +} + +#[allow(dead_code)] +#[paralegal::analyze] +fn test_blocking_like(to_close_over: &str) { + let next = blocking_like(to_close_over, |_| second_source(0_usize)); + target(next); +} + +pub fn blocking_like(pool: &str, f: F) -> T +where + F: FnOnce(usize) -> T + 'static + Send, + T: 'static + Send, +{ + let pool = pool.parse().unwrap(); + let res = std::thread::spawn(move || (f)(pool)).join().unwrap(); + + res +} + fn main() {} #[allow(dead_code)] @@ -29,6 +72,18 @@ async fn async_spawn() { target(next); } +#[paralegal::marker(source, return)] +async fn async_source() -> usize { + 0 +} + +#[allow(dead_code)] +#[paralegal::analyze] +async fn marked_async_spawn() { + let next = tokio::spawn(async_source()).await.unwrap(); + target(next); +} + fn to_block() -> Result { Ok(source()) } @@ -58,6 +113,17 @@ where res } +#[paralegal::marker(source, return)] +fn second_source(_: T) -> usize { + 0 +} + +#[allow(dead_code)] +#[paralegal::analyze] +async fn blocking_with_marker(to_close_over: &str) { + target(blocking(to_close_over, second_source).await) +} + #[allow(dead_code)] #[paralegal::analyze] async fn test_blocking_with_let_bound_closure(to_close_over: &str) { diff --git a/crates/paralegal-policy/Cargo.toml b/crates/paralegal-policy/Cargo.toml index 7092e27843..a7a0d30486 100644 --- a/crates/paralegal-policy/Cargo.toml +++ b/crates/paralegal-policy/Cargo.toml @@ -23,3 +23,9 @@ indexmap = "2.2.6" [dev-dependencies] paralegal-flow = { path = "../paralegal-flow", features = ["test"] } rand = "0.8.5" + +[build-dependencies] +autocfg = "1" + +[features] +post-rust-175 = [] diff --git a/crates/paralegal-policy/build.rs b/crates/paralegal-policy/build.rs index 0c1063561b..a34a592f0b 100644 --- a/crates/paralegal-policy/build.rs +++ b/crates/paralegal-policy/build.rs @@ -20,4 +20,8 @@ fn main() { let rustup_lib = get_rustup_lib_path(); println!("cargo:rustc-link-search=native={}", rustup_lib.display()); } + + let autocfg = autocfg::AutoCfg::new().unwrap(); + + autocfg.emit_rustc_version(1, 75); } diff --git a/crates/paralegal-policy/src/algo/ahb.rs b/crates/paralegal-policy/src/algo/ahb.rs index ad8c32f8ff..863f79666e 100644 --- a/crates/paralegal-policy/src/algo/ahb.rs +++ b/crates/paralegal-policy/src/algo/ahb.rs @@ -20,17 +20,18 @@ use crate::{ }; use crate::{Diagnostics, NodeExt}; -/// Statistics about the result of running [`crate::Context::always_happens_before`] -/// that are useful to understand how the property failed. +/// Statistics about the result of running +/// [`crate::RootContext::always_happens_before`] that are useful to understand +/// how the property failed. /// /// The [`std::fmt::Display`] implementation presents the information in human /// readable form. /// /// Note: Both the number of seen paths and the number of violation paths are /// approximations. This is because the traversal terminates when it reaches a -/// node that was already seen. However it is guaranteed that if there -/// are any violating paths, then the number of reaching paths reported in this -/// struct is at least one (e.g. [`Self::holds`] is sound). +/// node that was already seen. However it is guaranteed that if there are any +/// violating paths, then the number of reaching paths reported in this struct +/// is at least one (e.g. [`Self::holds`] is sound). /// /// The stable API of this struct is [`Self::holds`], [`Self::assert_holds`] and /// [`Self::is_vacuous`]. Otherwise the information in this struct and its @@ -118,7 +119,7 @@ impl AlwaysHappensBefore { } } -impl crate::Context { +impl crate::RootContext { /// Enforce that on every data flow path from the `starting_points` to `is_terminal` a /// node satisfying `is_checkpoint` is passed. /// diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index d836c4beda..214c456788 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -5,19 +5,19 @@ use std::time::{Duration, Instant}; use std::vec; use std::{io::Write, process::exit, sync::Arc}; -use paralegal_spdg::rustc_portable::defid_as_local; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; -use paralegal_spdg::traverse::{generic_flows_to, EdgeSelection}; +use paralegal_spdg::traverse::{ + generic_flows_to, generic_influencees, generic_influencers, EdgeSelection, +}; use paralegal_spdg::{ - CallString, DefKind, DisplayNode, Endpoint, GlobalNode, HashMap, HashSet, Identifier, - InstructionInfo, IntoIterGlobalNodes, Node as SPDGNode, NodeCluster, NodeInfo, - ProgramDescription, SPDGImpl, Span, TypeId, SPDG, + CallString, DefKind, DisplayNode, Endpoint, FunctionHandling, GlobalNode, HashMap, HashSet, + Identifier, InstructionInfo, IntoIterGlobalNodes, NodeCluster, NodeInfo, ProgramDescription, + Span, TypeId, SPDG, }; use anyhow::{anyhow, bail, Result}; use itertools::{Either, Itertools}; -use petgraph::prelude::Bfs; -use petgraph::visit::{EdgeFiltered, EdgeRef, IntoNeighborsDirected, Reversed, Topo, Walker}; +use petgraph::visit::{EdgeRef, IntoNeighborsDirected, Reversed, Topo, Walker}; use petgraph::Direction::Outgoing; use petgraph::{Direction, Incoming}; @@ -58,28 +58,8 @@ impl MarkerTargets { } } -use petgraph::visit::{GraphRef, IntoNeighbors, Visitable}; - use self::private::Sealed; -fn bfs_iter< - G: IntoNeighbors + GraphRef + Visitable::Map>, ->( - g: G, - controller_id: Endpoint, - start: impl IntoIterator, -) -> impl Iterator { - let mut discovered = g.visit_map(); - let stack: std::collections::VecDeque = - start.into_iter().collect(); - for n in stack.iter() { - petgraph::visit::VisitMap::visit(&mut discovered, *n); - } - let bfs = Bfs { stack, discovered }; - let walker_iter = Walker::iter(bfs, g); - walker_iter.map(move |inner| GlobalNode::from_local_node(controller_id, inner)) -} - /// Interface for defining policies. /// /// Holds a PDG ([`Self::desc`]) and defines basic queries like @@ -101,7 +81,7 @@ fn bfs_iter< /// [`super::GraphLocation::with_context`] this will be done automatically for /// you. #[derive(Debug)] -pub struct Context { +pub struct RootContext { marker_to_ids: MarkerIndex, desc: ProgramDescription, flows_to: Option, @@ -119,7 +99,7 @@ pub struct ContextStats { pub precomputation: Duration, } -impl Context { +impl RootContext { /// Construct a [`Context`] from a [`ProgramDescription`]. /// /// This also precomputes some data structures like an index over markers. @@ -328,9 +308,13 @@ impl Context { #[deprecated = "Use NodeQueries::flows_to instead"] /// Returns whether a node flows to a node through the configured edge type. /// - /// Nodes do not flow to themselves. CallArgument nodes do flow to their respective CallSites. + /// Nodes do not flow to themselves. CallArgument nodes do flow to their + /// respective CallSites. /// - /// If you use flows_to with [`EdgeSelection::Control`], you might want to consider using [`Context::has_ctrl_influence`], which additionally considers intermediate nodes which the src node has data flow to and has ctrl influence on the sink. + /// If you use flows_to with [`EdgeSelection::Control`], you might want to + /// consider using [`RootContext::has_ctrl_influence`], which additionally + /// considers intermediate nodes which the src node has data flow to and has + /// ctrl influence on the sink. pub fn flows_to( &self, src: impl IntoIterGlobalNodes, @@ -340,24 +324,6 @@ impl Context { src.flows_to(sink, self, edge_type) } - /// All nodes that have this marker through a type - pub fn nodes_marked_via_type(&self, marker: Marker) -> impl Iterator + '_ { - self.marked_type(marker).iter().copied().flat_map(|t| { - self.all_controllers().flat_map(move |(cid, c)| { - c.type_assigns - .iter() - .filter(move |(_, tys)| tys.0.contains(&t)) - .map(move |(n, _)| GlobalNode::from_local_node(cid, *n)) - }) - }) - } - - /// All nodes with this marker, be that via type or directly - pub fn nodes_marked_any_way(&self, marker: Marker) -> impl Iterator + '_ { - self.marked_nodes(marker) - .chain(self.nodes_marked_via_type(marker)) - } - /// Find the node that represents the `index`th argument of the controller /// `ctrl_id`. /// @@ -405,16 +371,6 @@ impl Context { src.influencees(self, edge_type).into_iter() } - /// Returns an iterator over all objects marked with `marker`. - pub fn marked_nodes(&self, marker: Marker) -> impl Iterator + '_ { - self.report_marker_if_absent(marker); - self.marker_to_ids - .get(&marker) - .into_iter() - .flat_map(|t| &t.nodes) - .copied() - } - #[deprecated = "Use NodeExt::types instead"] /// Get the type(s) of a Node. pub fn get_node_types(&self, node: GlobalNode) -> &[DefId] { @@ -616,11 +572,14 @@ impl Context { &self, mut out: impl Write, include_signatures: bool, + include_elided_code: bool, ) -> std::io::Result<()> { let ordered_span_set = self .desc .analyzed_spans .values() + .filter(|(_, h)| include_elided_code || matches!(h, FunctionHandling::PDG)) + .map(|v| &v.0) .zip(std::iter::repeat(true)) .chain( include_signatures @@ -628,11 +587,7 @@ impl Context { self.desc .def_info .iter() - .filter(|(did, _)| { - !matches!(defid_as_local(**did), Some(local) - if self.desc.analyzed_spans.contains_key(&local) - ) - }) + .filter(|(did, _)| self.desc.analyzed_spans.contains_key(did)) .map(|(_, i)| (&i.src_info, matches!(i.kind, DefKind::Type))) }) .into_iter() @@ -662,13 +617,58 @@ impl Context { } } +/// Actions that behave differently depending on the context +pub trait Context { + /// Get the root context object + fn root(&self) -> &RootContext; + + /// Returns an iterator over all objects marked with `marker`. + fn marked_nodes(&self, marker: Marker) -> impl Iterator + '_; + + /// All nodes with this marker, be that via type or directly + fn nodes_marked_any_way(&self, marker: Marker) -> impl Iterator + '_; + /// All nodes that have this marker through a type + fn nodes_marked_via_type(&self, marker: Marker) -> impl Iterator + '_; +} + +impl Context for RootContext { + fn root(&self) -> &RootContext { + self + } + + fn nodes_marked_any_way(&self, marker: Marker) -> impl Iterator + '_ { + self.marked_nodes(marker) + .chain(self.nodes_marked_via_type(marker)) + } + + fn nodes_marked_via_type(&self, marker: Marker) -> impl Iterator + '_ { + self.marked_type(marker).iter().copied().flat_map(|t| { + self.all_controllers().flat_map(move |(cid, c)| { + c.type_assigns + .iter() + .filter(move |(_, tys)| tys.0.contains(&t)) + .map(move |(n, _)| GlobalNode::from_local_node(cid, *n)) + }) + }) + } + + fn marked_nodes(&self, marker: Marker) -> impl Iterator + '_ { + self.report_marker_if_absent(marker); + self.marker_to_ids + .get(&marker) + .into_iter() + .flat_map(|t| &t.nodes) + .copied() + } +} + /// Context queries conveniently accessible on nodes pub trait NodeQueries<'a>: IntoIterGlobalNodes where Self::Iter: 'a, { /// Get other nodes at the same instruction - fn siblings(self, ctx: &Context) -> NodeCluster { + fn siblings(self, ctx: &RootContext) -> NodeCluster { NodeCluster::new( self.controller_id(), self.iter_global_nodes() @@ -687,13 +687,17 @@ where /// Returns whether a node flows to a node through the configured edge type. /// - /// Nodes do not flow to themselves. CallArgument nodes do flow to their respective CallSites. + /// Nodes do not flow to themselves. CallArgument nodes do flow to their + /// respective CallSites. /// - /// If you use flows_to with [`EdgeSelection::Control`], you might want to consider using [`Context::has_ctrl_influence`], which additionally considers intermediate nodes which the src node has data flow to and has ctrl influence on the sink. + /// If you use flows_to with [`EdgeSelection::Control`], you might want to + /// consider using [`RootContext::has_ctrl_influence`], which additionally + /// considers intermediate nodes which the src node has data flow to and has + /// ctrl influence on the sink. fn flows_to( self, sink: impl IntoIterGlobalNodes, - ctx: &Context, + ctx: &RootContext, edge_type: EdgeSelection, ) -> bool { let cf_id = self.controller_id(); @@ -716,10 +720,53 @@ where &ctx.desc.controllers[&cf_id], sink.iter_nodes(), ) + .is_some() + } + + /// Returns the sink node that is reached + /// + /// Nodes do not flow to themselves. CallArgument nodes do flow to their + /// respective CallSites. + /// + /// If you use flows_to with [`EdgeSelection::Control`], you might want to + /// consider using [`RootContext::has_ctrl_influence`], which additionally + /// considers intermediate nodes which the src node has data flow to and has + /// ctrl influence on the sink. + fn find_flow( + self, + sink: impl IntoIterGlobalNodes, + ctx: &RootContext, + edge_type: EdgeSelection, + ) -> Option { + let cf_id = self.controller_id(); + if sink.controller_id() != cf_id { + return None; + } + + if let Some(index) = ctx.flows_to.as_ref() { + if edge_type.is_data() { + let flows_to = &index[&cf_id]; + return self.iter_nodes().find_map(|src| { + sink.iter_nodes() + .find(|sink| flows_to.data_flows_to[src.index()][sink.index()]) + .map(|n| GlobalNode::from_local_node(cf_id, n)) + }); + } + } + generic_flows_to( + self.iter_nodes(), + edge_type, + &ctx.desc.controllers[&cf_id], + sink.iter_nodes(), + ) + .map(|n| GlobalNode::from_local_node(cf_id, n)) } /// Call sites that consume this node directly. E.g. the outgoing edges. - fn consuming_call_sites(self, ctx: &'a Context) -> Box + 'a> { + fn consuming_call_sites( + self, + ctx: &'a RootContext, + ) -> Box + 'a> { let ctrl = &ctx.desc.controllers[&self.controller_id()]; Box::new(self.iter_nodes().flat_map(move |local| { @@ -735,7 +782,7 @@ where fn has_ctrl_influence( self, target: impl IntoIterGlobalNodes + Sized + Copy, - ctx: &Context, + ctx: &RootContext, ) -> bool { self.flows_to(target, ctx, EdgeSelection::Control) || NodeCluster::try_from_iter(self.influencees(ctx, EdgeSelection::Data)) @@ -746,32 +793,19 @@ where /// Returns iterator over all Nodes that influence the given sink Node. /// /// Does not return the input node. A CallSite sink will return all of the associated CallArgument nodes. - fn influencers(self, ctx: &Context, edge_type: EdgeSelection) -> Vec { - use petgraph::visit::*; + fn influencers(self, ctx: &RootContext, edge_type: EdgeSelection) -> Vec { let cf_id = self.controller_id(); let nodes = self.iter_nodes(); - - let reversed_graph = Reversed(&ctx.desc.controllers[&cf_id].graph); - - match edge_type { - EdgeSelection::Data => { - let edges_filtered = - EdgeFiltered::from_fn(reversed_graph, |e| e.weight().is_data()); - bfs_iter(&edges_filtered, cf_id, nodes).collect::>() - } - EdgeSelection::Control => { - let edges_filtered = - EdgeFiltered::from_fn(reversed_graph, |e| e.weight().is_control()); - bfs_iter(&edges_filtered, cf_id, nodes).collect::>() - } - EdgeSelection::Both => bfs_iter(reversed_graph, cf_id, nodes).collect::>(), - } + generic_influencers(&ctx.desc.controllers[&cf_id].graph, nodes, edge_type) + .into_iter() + .map(|n| GlobalNode::from_local_node(cf_id, n)) + .collect() } /// Returns iterator over all Nodes that are influenced by the given src Node. /// /// Does not return the input node. A CallArgument src will return the associated CallSite. - fn influencees(self, ctx: &Context, edge_type: EdgeSelection) -> Vec { + fn influencees(self, ctx: &RootContext, edge_type: EdgeSelection) -> Vec { let cf_id = self.controller_id(); let graph = &ctx.desc.controllers[&cf_id].graph; @@ -788,18 +822,10 @@ where .collect::>(); } } - - match edge_type { - EdgeSelection::Data => { - let edges_filtered = EdgeFiltered::from_fn(graph, |e| e.weight().is_data()); - bfs_iter(&edges_filtered, cf_id, self.iter_nodes()).collect::>() - } - EdgeSelection::Both => bfs_iter(graph, cf_id, self.iter_nodes()).collect::>(), - EdgeSelection::Control => { - let edges_filtered = EdgeFiltered::from_fn(graph, |e| e.weight().is_control()); - bfs_iter(&edges_filtered, cf_id, self.iter_nodes()).collect::>() - } - } + generic_influencees(graph, self.iter_nodes(), edge_type) + .into_iter() + .map(move |n| GlobalNode::from_local_node(cf_id, n)) + .collect() } } @@ -814,70 +840,71 @@ mod private { /// Extension trait with queries for single nodes pub trait NodeExt: private::Sealed { /// Returns true if this node has the provided type - fn has_type(self, t: TypeId, ctx: &Context) -> bool; + fn has_type(self, t: TypeId, ctx: &RootContext) -> bool; /// Find the call string for the statement or function that produced this node. - fn associated_call_site(self, ctx: &Context) -> CallString; + fn associated_call_site(self, ctx: &RootContext) -> CallString; /// Get the type(s) of a Node. - fn types(self, ctx: &Context) -> &[TypeId]; + fn types(self, ctx: &RootContext) -> &[TypeId]; /// Returns a DisplayNode for the given Node - fn describe(self, ctx: &Context) -> DisplayNode; + fn describe(self, ctx: &RootContext) -> DisplayNode; /// Retrieve metadata about a node. - fn info(self, ctx: &Context) -> &NodeInfo; + fn info(self, ctx: &RootContext) -> &NodeInfo; /// Retrieve metadata about the instruction executed by a specific node. - fn instruction(self, ctx: &Context) -> &InstructionInfo; + fn instruction(self, ctx: &RootContext) -> &InstructionInfo; /// Return the immediate successors of this node - fn successors(self, ctx: &Context) -> Box + '_>; + fn successors(self, ctx: &RootContext) -> Box + '_>; /// Return the immediate predecessors of this node - fn predecessors(self, ctx: &Context) -> Box + '_>; + fn predecessors(self, ctx: &RootContext) -> Box + '_>; /// Get the span of a node - fn get_location(self, ctx: &Context) -> &Span; + fn get_location(self, ctx: &RootContext) -> &Span; /// Returns whether this Node has the marker applied to it directly or via its type. fn has_marker(self, ctx: C, marker: Marker) -> bool; /// The shortest path between this and a target node + #[deprecated = "This function is known to be buggy at the moment. Only use for debugging, don't rely on it for policy correctness."] fn shortest_path( self, to: GlobalNode, - ctx: &Context, + ctx: &RootContext, edge_selection: EdgeSelection, ) -> Option>; } impl NodeExt for GlobalNode { - fn has_type(self, t: TypeId, ctx: &Context) -> bool { + fn has_type(self, t: TypeId, ctx: &RootContext) -> bool { ctx.desc().controllers[&self.controller_id()] .type_assigns .get(&self.local_node()) .map_or(false, |tys| tys.0.contains(&t)) } - fn associated_call_site(self, ctx: &Context) -> CallString { + fn associated_call_site(self, ctx: &RootContext) -> CallString { ctx.desc.controllers[&self.controller_id()] .node_info(self.local_node()) .at } - fn types(self, ctx: &Context) -> &[TypeId] { + fn types(self, ctx: &RootContext) -> &[TypeId] { ctx.desc.controllers[&self.controller_id()] .type_assigns .get(&self.local_node()) .map_or(&[], |v| v.0.as_ref()) } - fn describe(self, ctx: &Context) -> DisplayNode { + fn describe(self, ctx: &RootContext) -> DisplayNode { DisplayNode::pretty( self.local_node(), &ctx.desc.controllers[&self.controller_id()], ) } - fn info(self, ctx: &Context) -> &NodeInfo { + fn info(self, ctx: &RootContext) -> &NodeInfo { ctx.desc.controllers[&self.controller_id()].node_info(self.local_node()) } - fn instruction(self, ctx: &Context) -> &InstructionInfo { + fn instruction(self, ctx: &RootContext) -> &InstructionInfo { &ctx.desc.instruction_info[&self.info(ctx).at.leaf()] } - fn successors(self, ctx: &Context) -> Box + '_> { + fn successors(self, ctx: &RootContext) -> Box + '_> { Box::new( ctx.desc.controllers[&self.controller_id()] .graph @@ -886,7 +913,7 @@ impl NodeExt for GlobalNode { ) } - fn predecessors(self, ctx: &Context) -> Box + '_> { + fn predecessors(self, ctx: &RootContext) -> Box + '_> { Box::new( ctx.desc.controllers[&self.controller_id()] .graph @@ -894,7 +921,7 @@ impl NodeExt for GlobalNode { .map(move |n| GlobalNode::from_local_node(self.controller_id(), n)), ) } - fn get_location(self, ctx: &Context) -> &Span { + fn get_location(self, ctx: &RootContext) -> &Span { &self.info(ctx).span } @@ -914,7 +941,7 @@ impl NodeExt for GlobalNode { fn shortest_path( self, to: Self, - ctx: &Context, + ctx: &RootContext, edge_selection: EdgeSelection, ) -> Option> { let g = if self.controller_id() != to.controller_id() { @@ -951,18 +978,18 @@ impl NodeExt for GlobalNode { impl Sealed for &'_ T {} impl NodeExt for &'_ T { - fn has_type(self, t: TypeId, ctx: &Context) -> bool { + fn has_type(self, t: TypeId, ctx: &RootContext) -> bool { (*self).has_type(t, ctx) } - fn info(self, ctx: &Context) -> &NodeInfo { + fn info(self, ctx: &RootContext) -> &NodeInfo { (*self).info(ctx) } - fn types(self, ctx: &Context) -> &[TypeId] { + fn types(self, ctx: &RootContext) -> &[TypeId] { (*self).types(ctx) } - fn describe(self, ctx: &Context) -> DisplayNode { + fn describe(self, ctx: &RootContext) -> DisplayNode { (*self).describe(ctx) } @@ -970,32 +997,33 @@ impl NodeExt for &'_ T { (*self).has_marker(ctx, marker) } - fn successors(self, ctx: &Context) -> Box + '_> { + fn successors(self, ctx: &RootContext) -> Box + '_> { (*self).successors(ctx) } - fn instruction(self, ctx: &Context) -> &InstructionInfo { + fn instruction(self, ctx: &RootContext) -> &InstructionInfo { (*self).instruction(ctx) } - fn get_location(self, ctx: &Context) -> &Span { + fn get_location(self, ctx: &RootContext) -> &Span { (*self).get_location(ctx) } - fn predecessors(self, ctx: &Context) -> Box + '_> { + fn predecessors(self, ctx: &RootContext) -> Box + '_> { (*self).predecessors(ctx) } + #[allow(deprecated)] fn shortest_path( self, to: GlobalNode, - ctx: &Context, + ctx: &RootContext, edge_selection: EdgeSelection, ) -> Option> { (*self).shortest_path(to, ctx, edge_selection) } - fn associated_call_site(self, ctx: &Context) -> CallString { + fn associated_call_site(self, ctx: &RootContext) -> CallString { (*self).associated_call_site(ctx) } } @@ -1005,7 +1033,7 @@ pub struct DisplayDef<'a> { /// DefId to display. pub def_id: DefId, /// Context for the DefId. - pub ctx: &'a Context, + pub ctx: &'a RootContext, } impl<'a> std::fmt::Display for DisplayDef<'a> { diff --git a/crates/paralegal-policy/src/diagnostics.rs b/crates/paralegal-policy/src/diagnostics.rs index 4e068dd789..bf8222fa91 100644 --- a/crates/paralegal-policy/src/diagnostics.rs +++ b/crates/paralegal-policy/src/diagnostics.rs @@ -9,7 +9,7 @@ //! This manifests for instance in [`Diagnostics::error`]. This function records //! a severe error that should fail the policy but it does not exit the program. //! Instead the message is recorded and emitted later, for instance by -//! [`Context::emit_diagnostics`]. +//! [`RootContext::emit_diagnostics`]. //! //! ## Emitting Messages //! @@ -25,31 +25,31 @@ //! condition, message)`. `ctx` here is anything that implements //! [`Diagnostics`]. //! -//! [`Diagnostics`] is implemented directly by [`Context`] so you can use +//! [`Diagnostics`] is implemented directly by [`RootContext`] so you can use //! `ctx.error()` or `ctx.warning()`. You can also call it on scoped contexts //! (see below). //! //! ## Scoping messages //! //! You may however add additional contextual information about which policy or -//! combinator is currently executing. [`Context::named_policy`] returns a +//! combinator is currently executing. [`RootContext::named_policy`] returns a //! wrapper that can be used the same way that you use [`Context`], but when //! [`error`][Diagnostics::error] or [`warning`][Diagnostics::warning] is called //! it also appends the name of the policy to you specified. //! -//! Similarly you can use [`Context::named_combinator`] or +//! Similarly you can use [`RootContext::named_combinator`] or //! [`PolicyContext::named_combinator`] to add context about a named combinator. //! //! ## Intended Workflow //! //! ```no_run //! use paralegal_policy::{ -//! Context, assert_error, assert_warning, +//! Context, RootContext, assert_error, assert_warning, //! paralegal_spdg::Identifier //! }; //! use std::sync::Arc; //! -//! fn my_check(ctx: Arc) { +//! fn my_check(ctx: Arc) { //! ctx.named_policy(Identifier::new_intern("cannot escape"), |ctx| { //! let result_1 = ctx.clone().named_combinator( //! Identifier::new_intern("collect something"), @@ -84,7 +84,7 @@ //! named `ctx`, to avoid using the outer context by accident. The methods off //! outer contexts are always available on the inner ones. //! -//! Note that some methods, like [`Context::always_happens_before`] add a named +//! Note that some methods, like [`RootContext::always_happens_before`] add a named //! combinator context by themselves when you use their //! [`report`][crate::algo::ahb::AlwaysHappensBefore::report] functions. @@ -95,9 +95,9 @@ use indexmap::IndexMap; use std::rc::Rc; use std::{io::Write, sync::Arc}; -use paralegal_spdg::{Endpoint, GlobalNode, Identifier, Span, SpanCoord, SPDG}; +use paralegal_spdg::{DisplayPath, Endpoint, GlobalNode, Identifier, Span, SpanCoord, SPDG}; -use crate::{Context, NodeExt}; +use crate::{Context, NodeExt, RootContext}; /// Check the condition and emit a [`Diagnostics::error`] if it fails. #[macro_export] @@ -451,7 +451,7 @@ pub trait HasDiagnosticsBase { fn record(&self, diagnostic: Diagnostic); /// Access to [`Context`], usually also available via [`std::ops::Deref`]. - fn as_ctx(&self) -> &Context; + fn as_ctx(&self) -> &RootContext; } impl HasDiagnosticsBase for Arc { @@ -460,13 +460,13 @@ impl HasDiagnosticsBase for Arc { t.record(diagnostic) } - fn as_ctx(&self) -> &Context { + fn as_ctx(&self) -> &RootContext { self.as_ref().as_ctx() } } impl HasDiagnosticsBase for &'_ T { - fn as_ctx(&self) -> &Context { + fn as_ctx(&self) -> &RootContext { (*self).as_ctx() } @@ -476,7 +476,7 @@ impl HasDiagnosticsBase for &'_ T { } impl HasDiagnosticsBase for Rc { - fn as_ctx(&self) -> &Context { + fn as_ctx(&self) -> &RootContext { (**self).as_ctx() } @@ -683,7 +683,7 @@ pub trait Diagnostics: HasDiagnosticsBase { } } -fn highlighted_node_span(ctx: &Context, node: GlobalNode) -> HighlightedSpan { +fn highlighted_node_span(ctx: &RootContext, node: GlobalNode) -> HighlightedSpan { let node_span = node.get_location(ctx); let stmt_span = &ctx.instruction_at_node(node).span; if stmt_span.contains(node_span) { @@ -758,7 +758,7 @@ pub struct PolicyContext { } impl std::ops::Deref for PolicyContext { - type Target = Context; + type Target = RootContext; fn deref(&self) -> &Self::Target { self.as_ctx() } @@ -809,7 +809,7 @@ impl HasDiagnosticsBase for PolicyContext { self.inner.record(diagnostic) } - fn as_ctx(&self) -> &Context { + fn as_ctx(&self) -> &RootContext { self.inner.as_ctx() } } @@ -828,7 +828,7 @@ pub struct ControllerContext { } impl std::ops::Deref for ControllerContext { - type Target = Context; + type Target = RootContext; fn deref(&self) -> &Self::Target { self.as_ctx() } @@ -893,18 +893,46 @@ impl ControllerContext { impl HasDiagnosticsBase for ControllerContext { fn record(&self, mut diagnostic: Diagnostic) { - let name = self.as_ctx().desc().controllers[&self.id].name; - diagnostic - .context - .push(Identifier::new_intern(&format!("[controller: {}]", name))); + let ctrl = &self.as_ctx().desc().controllers[&self.id]; + diagnostic.context.push(Identifier::new_intern(&format!( + "[controller: {}]", + DisplayPath::from(&ctrl.path) + ))); self.inner.record(diagnostic) } - fn as_ctx(&self) -> &Context { + fn as_ctx(&self) -> &RootContext { self.inner.as_ctx() } } +impl Context for ControllerContext { + fn root(&self) -> &RootContext { + self.as_ctx() + } + + fn marked_nodes(&self, marker: crate::Marker) -> impl Iterator + '_ { + self.root() + .marked_nodes(marker) + .filter(|n| n.controller_id() == self.id) + } + + fn nodes_marked_via_type( + &self, + marker: crate::Marker, + ) -> impl Iterator + '_ { + self.root() + .nodes_marked_via_type(marker) + .filter(|n| n.controller_id() == self.id) + } + + fn nodes_marked_any_way(&self, marker: crate::Marker) -> impl Iterator + '_ { + self.root() + .nodes_marked_any_way(marker) + .filter(|n| n.controller_id() == self.id) + } +} + /// A context for combinators. /// /// You may call any method and access any field defined on [`Context`]. In @@ -919,7 +947,7 @@ pub struct CombinatorContext { } impl std::ops::Deref for CombinatorContext { - type Target = Context; + type Target = RootContext; fn deref(&self) -> &Self::Target { self.as_ctx() } @@ -952,12 +980,12 @@ impl HasDiagnosticsBase for CombinatorContext { self.inner.record(diagnostic) } - fn as_ctx(&self) -> &Context { + fn as_ctx(&self) -> &RootContext { self.inner.as_ctx() } } -impl Context { +impl RootContext { /// Add a policy to the diagnostic context. /// /// See the [module level documentation][self] for more information on @@ -1034,13 +1062,13 @@ impl DiagnosticsRecorder { } } -impl HasDiagnosticsBase for Context { +impl HasDiagnosticsBase for RootContext { /// Record a diagnostic message. fn record(&self, diagnostic: Diagnostic) { self.diagnostics.0.lock().unwrap().insert(diagnostic, ()); } - fn as_ctx(&self) -> &Context { + fn as_ctx(&self) -> &RootContext { self } } diff --git a/crates/paralegal-policy/src/lib.rs b/crates/paralegal-policy/src/lib.rs index be8729c0a8..b8a557da7f 100644 --- a/crates/paralegal-policy/src/lib.rs +++ b/crates/paralegal-policy/src/lib.rs @@ -37,7 +37,7 @@ //! 3. [`.with_context()`](GraphLocation::with_context) reads and parses the //! graph file, then invokes the provided closure with a [`Context`]. After //! the closure returns it automatically invokes -//! [`Context::emit_diagnostics`]. +//! [`RootContext::emit_diagnostics`]. //! //! For information about how to specify policies see the [`Context`] struct. //! @@ -48,12 +48,14 @@ //! checker implementation. #![warn(missing_docs)] +#![cfg_attr(not(rustc_1_75), feature(return_position_impl_trait_in_trait))] extern crate core; use anyhow::{ensure, Result}; pub use paralegal_spdg; use paralegal_spdg::utils::TruncatedHumanTime; +use paralegal_spdg::STAT_FILE_EXT; pub use paralegal_spdg::{ traverse::EdgeSelection, GlobalNode, IntoIterGlobalNodes, ProgramDescription, }; @@ -203,6 +205,12 @@ impl GraphLocation { } } + /// Get the path where the analyzzer wrote statistics to. Use this path to + /// read a [paralegal_spdg::AnalyzerStats::canonical_read]. + pub fn stats_path(&self) -> PathBuf { + self.path.with_extension(STAT_FILE_EXT) + } + /// Use a completely custom path (directory and file name). pub fn custom(path: PathBuf) -> Self { Self { @@ -222,7 +230,7 @@ impl GraphLocation { /// if they were severe enough. pub fn with_context( &self, - prop: impl FnOnce(Arc) -> Result, + prop: impl FnOnce(Arc) -> Result, ) -> Result> { self.with_context_configured(Default::default(), prop) } @@ -234,7 +242,7 @@ impl GraphLocation { pub fn with_context_configured( &self, config: Config, - prop: impl FnOnce(Arc) -> Result, + prop: impl FnOnce(Arc) -> Result, ) -> Result> { let ctx = Arc::new(self.build_context(config)?); assert_warning!( @@ -263,12 +271,12 @@ impl GraphLocation { /// /// Prefer using [`Self::with_context`] which takes care of emitting any /// diagnostic messages after the property is done. - pub fn build_context(&self, config: Config) -> Result { + pub fn build_context(&self, config: Config) -> Result { let _ = simple_logger::init_with_env(); let deser_started = Instant::now(); let desc = ProgramDescription::canonical_read(&self.path)?; - let mut ctx = Context::new(desc, config); + let mut ctx = RootContext::new(desc, config); ctx.stats.pdg_construction = self.construction_time; ctx.stats.deserialization = Some(deser_started.elapsed()); Ok(ctx) diff --git a/crates/paralegal-policy/src/test_utils.rs b/crates/paralegal-policy/src/test_utils.rs index 9f322d6c4a..71e224aa7e 100644 --- a/crates/paralegal-policy/src/test_utils.rs +++ b/crates/paralegal-policy/src/test_utils.rs @@ -1,4 +1,4 @@ -use crate::Context; +use crate::RootContext; use paralegal_flow::test_utils::PreFrg; use paralegal_spdg::Endpoint; use paralegal_spdg::IntoIterGlobalNodes; @@ -7,20 +7,20 @@ use paralegal_spdg::{Identifier, InstructionKind, Node as SPDGNode, SPDG}; use std::sync::Arc; use std::sync::OnceLock; -static TEST_CTX: OnceLock> = OnceLock::new(); +static TEST_CTX: OnceLock> = OnceLock::new(); -pub fn test_ctx() -> Arc { +pub fn test_ctx() -> Arc { TEST_CTX .get_or_init(|| { paralegal_flow::test_utils::run_paralegal_flow_with_flow_graph_dump("tests/test-crate"); let desc = PreFrg::from_file_at("tests/test-crate").desc; - Arc::new(Context::new(desc, Default::default())) + Arc::new(RootContext::new(desc, Default::default())) }) .clone() } pub fn get_callsite_or_datasink_node<'a>( - ctx: &'a Context, + ctx: &'a RootContext, controller: Endpoint, name: &'a str, ) -> NodeCluster { @@ -29,7 +29,11 @@ pub fn get_callsite_or_datasink_node<'a>( .unwrap() } -pub fn get_callsite_node<'a>(ctx: &'a Context, controller: Endpoint, name: &'a str) -> NodeCluster { +pub fn get_callsite_node<'a>( + ctx: &'a RootContext, + controller: Endpoint, + name: &'a str, +) -> NodeCluster { let name = Identifier::new_intern(name); let ctrl = &ctx.desc().controllers[&controller]; let inner = ctrl @@ -39,7 +43,7 @@ pub fn get_callsite_node<'a>(ctx: &'a Context, controller: Endpoint, name: &'a s } fn is_at_function_call_with_name( - ctx: &Context, + ctx: &RootContext, ctrl: &SPDG, name: Identifier, node: SPDGNode, @@ -53,7 +57,7 @@ fn is_at_function_call_with_name( ) } -pub fn get_sink_node<'a>(ctx: &'a Context, controller: Endpoint, name: &'a str) -> NodeCluster { +pub fn get_sink_node<'a>(ctx: &'a RootContext, controller: Endpoint, name: &'a str) -> NodeCluster { let name = Identifier::new_intern(name); let ctrl = &ctx.desc().controllers[&controller]; let inner = ctrl diff --git a/crates/paralegal-policy/tests/adaptive-depth.rs b/crates/paralegal-policy/tests/adaptive-depth.rs index 104501dfea..e7903fd5f6 100644 --- a/crates/paralegal-policy/tests/adaptive-depth.rs +++ b/crates/paralegal-policy/tests/adaptive-depth.rs @@ -1,6 +1,6 @@ use anyhow::Result; use helpers::Test; -use paralegal_policy::{assert_error, EdgeSelection}; +use paralegal_policy::{assert_error, Context, EdgeSelection}; use paralegal_spdg::Identifier; mod helpers; diff --git a/crates/paralegal-policy/tests/atomic.rs b/crates/paralegal-policy/tests/atomic.rs index a27d8cadb7..20ca22a762 100644 --- a/crates/paralegal-policy/tests/atomic.rs +++ b/crates/paralegal-policy/tests/atomic.rs @@ -5,7 +5,9 @@ use std::{collections::HashSet, sync::Arc}; use helpers::Test; use anyhow::Result; -use paralegal_policy::{assert_error, Context, Diagnostics as _, EdgeSelection, NodeExt as _}; +use paralegal_policy::{ + assert_error, Context, Diagnostics as _, EdgeSelection, NodeExt as _, RootContext, +}; use paralegal_spdg::{GlobalNode, Identifier, NodeCluster, SourceUse}; use petgraph::Outgoing; @@ -19,10 +21,10 @@ macro_rules! marker { } trait NodeExt: Sized { - fn is_argument(self, ctx: &Context, num: u8) -> bool; + fn is_argument(self, ctx: &RootContext, num: u8) -> bool; } impl NodeExt for GlobalNode { - fn is_argument(self, ctx: &Context, num: u8) -> bool { + fn is_argument(self, ctx: &RootContext, num: u8) -> bool { ctx.desc().controllers[&self.controller_id()] .graph .edges_directed(self.local_node(), Outgoing) @@ -134,7 +136,7 @@ fn not_influenced_by_commit() -> Result<()> { test.run(atomic_policy) } -fn atomic_policy(ctx: Arc) -> Result<()> { +fn atomic_policy(ctx: Arc) -> Result<()> { let mut any_sink_reached = false; let check_rights = marker!(check_rights); for ctx in ctx.controller_contexts() { diff --git a/crates/paralegal-policy/tests/box.rs b/crates/paralegal-policy/tests/box.rs index ed77f9639b..b27dd86e24 100644 --- a/crates/paralegal-policy/tests/box.rs +++ b/crates/paralegal-policy/tests/box.rs @@ -2,12 +2,12 @@ use std::sync::Arc; use anyhow::Result; use helpers::Test; -use paralegal_policy::{algo::ahb, assert_error, Context}; +use paralegal_policy::{algo::ahb, assert_error, Context, RootContext}; use paralegal_spdg::{HashSet, Identifier}; mod helpers; -fn simple_policy(ctx: Arc) -> Result<()> { +fn simple_policy(ctx: Arc) -> Result<()> { let sources = ctx .nodes_marked_any_way(Identifier::new_intern("source")) .collect::>(); diff --git a/crates/paralegal-policy/tests/contile.rs b/crates/paralegal-policy/tests/contile.rs index a7209b4d9d..2354347b45 100644 --- a/crates/paralegal-policy/tests/contile.rs +++ b/crates/paralegal-policy/tests/contile.rs @@ -2,14 +2,14 @@ use std::sync::Arc; use anyhow::{Ok, Result}; use helpers::Test; -use paralegal_policy::{Context, Diagnostics, EdgeSelection, NodeExt}; +use paralegal_policy::{Context, Diagnostics, EdgeSelection, NodeExt, RootContext}; use paralegal_spdg::Identifier; mod helpers; const CODE: &str = include_str!("raw-code/contile.rs"); -fn policy(ctx: Arc) -> Result<()> { +fn policy(ctx: Arc) -> Result<()> { let m_sink = Identifier::new_intern("sink"); let m_sensitive = Identifier::new_intern("sensitive"); let m_send = Identifier::new_intern("metrics_server"); diff --git a/crates/paralegal-policy/tests/debug-ctrl-influence.rs b/crates/paralegal-policy/tests/debug-ctrl-influence.rs index 89caf49b11..4eb1024ea8 100644 --- a/crates/paralegal-policy/tests/debug-ctrl-influence.rs +++ b/crates/paralegal-policy/tests/debug-ctrl-influence.rs @@ -1,7 +1,8 @@ mod helpers; use helpers::{Result, Test}; -use paralegal_policy::{loc, Diagnostics, Marker}; +use paralegal_policy::{assert_error, loc, Context, Diagnostics, Marker, NodeQueries}; +use paralegal_spdg::Identifier; macro_rules! marker { ($id:ident) => { @@ -109,3 +110,174 @@ fn has_ctrl_flow_influence() -> Result<()> { Ok(()) }) } + +#[test] +fn lemmy_ctrl_influence() -> Result<()> { + Test::new(stringify!( + #[paralegal::marker(check, return)] + async fn check_safe(data: usize) -> Result<(), ()> { + Ok(()) + } + + #[paralegal::marker(sensitive, return)] + async fn sensitive_action(data: usize) {} + + #[paralegal::analyze] + async fn main(data: usize) -> Result<(), ()> { + check_safe(data).await?; + + sensitive_action(data).await; + Ok(()) + } + ))? + .run(|ctx| { + let mut sensitive = ctx + .marked_nodes(Identifier::new_intern("sensitive")) + .peekable(); + let checks = ctx + .marked_nodes(Identifier::new_intern("check")) + .collect::>(); + + assert!(sensitive.peek().is_some()); + assert!(!checks.is_empty()); + + assert_error!( + ctx, + sensitive.all(|sens| { checks.iter().any(|c| c.has_ctrl_influence(sens, &ctx)) }) + ); + Ok(()) + }) +} + +#[test] +fn lemmy_blocking_ctrl_influence() -> Result<()> { + Test::new(stringify!( + #[paralegal::marker(check, return)] + async fn check_safe(data: usize) -> Result<(), ()> { + Ok(()) + } + + #[paralegal::marker(sensitive, return)] + fn sensitive_action(data: usize) {} + + async fn blocking(f: impl FnOnce() -> ()) { + f() + } + + #[paralegal::analyze] + async fn main(data: usize) -> Result<(), ()> { + check_safe(data).await?; + + blocking(|| { + sensitive_action(data); + }) + .await; + Ok(()) + } + ))? + .run(|ctx| { + let mut sensitive = ctx + .marked_nodes(Identifier::new_intern("sensitive")) + .peekable(); + let checks = ctx + .marked_nodes(Identifier::new_intern("check")) + .collect::>(); + + assert!(sensitive.peek().is_some()); + assert!(!checks.is_empty()); + + assert_error!( + ctx, + sensitive.all(|sens| { checks.iter().any(|c| c.has_ctrl_influence(sens, &ctx)) }) + ); + Ok(()) + }) +} + +#[test] +fn nested_ctrl_influence() -> Result<()> { + Test::new(stringify!( + #[paralegal::marker(check, return)] + async fn check_safe(data: usize) -> Result<(), ()> { + Ok(()) + } + + #[paralegal::marker(sensitive, return)] + fn sensitive_action(data: usize) {} + + async fn blocking(data: usize) { + sensitive_action(data) + } + + #[paralegal::analyze] + async fn main(data: usize) -> Result<(), ()> { + check_safe(data).await?; + + blocking(data).await; + Ok(()) + } + ))? + .run(|ctx| { + let mut sensitive = ctx + .marked_nodes(Identifier::new_intern("sensitive")) + .peekable(); + let checks = ctx + .marked_nodes(Identifier::new_intern("check")) + .collect::>(); + + assert!(sensitive.peek().is_some()); + assert!(!checks.is_empty()); + + assert_error!( + ctx, + sensitive.all(|sens| { checks.iter().any(|c| c.has_ctrl_influence(sens, &ctx)) }) + ); + Ok(()) + }) +} + +#[test] +fn double_nested_ctrl_influence() -> Result<()> { + Test::new(stringify!( + #[paralegal::marker(check, return)] + fn check_safe(data: usize) -> Result<(), ()> { + Ok(()) + } + + #[paralegal::marker(sensitive, return)] + fn sensitive_action(data: usize) {} + + fn blocking(data: usize) { + wrap(data) + } + + fn wrap(data: usize) { + sensitive_action(data) + } + + #[paralegal::analyze] + fn main(data: usize) -> Result<(), ()> { + check_safe(data)?; + + blocking(data); + Ok(()) + } + ))? + .run(|ctx| { + let mut sensitive = ctx + .marked_nodes(Identifier::new_intern("sensitive")) + .peekable(); + let checks = ctx + .marked_nodes(Identifier::new_intern("check")) + .collect::>(); + + assert!(sensitive.peek().is_some()); + assert!(!checks.is_empty()); + + assert_error!( + ctx, + sensitive.all(|sens| { checks.iter().any(|c| c.has_ctrl_influence(sens, &ctx)) }) + ); + Ok(()) + }) +} diff --git a/crates/paralegal-policy/tests/depth-manip-tests.rs b/crates/paralegal-policy/tests/depth-manip-tests.rs index b7d85c7f5e..30e5d31dfa 100644 --- a/crates/paralegal-policy/tests/depth-manip-tests.rs +++ b/crates/paralegal-policy/tests/depth-manip-tests.rs @@ -2,7 +2,7 @@ mod helpers; use anyhow::Result; use helpers::Test; -use paralegal_policy::{assert_error, EdgeSelection}; +use paralegal_policy::{assert_error, Context, EdgeSelection}; use paralegal_spdg::Identifier; #[test] diff --git a/crates/paralegal-policy/tests/entrypoint-generics.rs b/crates/paralegal-policy/tests/entrypoint-generics.rs index 16fd155dc0..a0b80863c6 100644 --- a/crates/paralegal-policy/tests/entrypoint-generics.rs +++ b/crates/paralegal-policy/tests/entrypoint-generics.rs @@ -2,12 +2,12 @@ use std::sync::Arc; use anyhow::Result; use helpers::Test; -use paralegal_policy::{assert_error, Context}; +use paralegal_policy::{assert_error, Context, RootContext}; use paralegal_spdg::Identifier; mod helpers; -fn simple_policy(ctx: Arc) -> Result<()> { +fn simple_policy(ctx: Arc) -> Result<()> { let sources = ctx .nodes_marked_any_way(Identifier::new_intern("source")) .collect::>(); diff --git a/crates/paralegal-policy/tests/freedit.rs b/crates/paralegal-policy/tests/freedit.rs index 1674ecfdff..309ef22684 100644 --- a/crates/paralegal-policy/tests/freedit.rs +++ b/crates/paralegal-policy/tests/freedit.rs @@ -2,7 +2,7 @@ mod helpers; use anyhow::Result; use helpers::Test; -use paralegal_policy::{assert_error, Diagnostics, EdgeSelection, NodeExt}; +use paralegal_policy::{assert_error, Context, Diagnostics, EdgeSelection, NodeExt}; use paralegal_spdg::Identifier; #[test] diff --git a/crates/paralegal-policy/tests/helpers/mod.rs b/crates/paralegal-policy/tests/helpers/mod.rs index 94bb018be1..b3eba63a7f 100644 --- a/crates/paralegal-policy/tests/helpers/mod.rs +++ b/crates/paralegal-policy/tests/helpers/mod.rs @@ -11,7 +11,7 @@ use std::{ use anyhow::anyhow; pub use anyhow::{ensure, Result}; -use paralegal_policy::{Context, GraphLocation}; +use paralegal_policy::{GraphLocation, RootContext}; lazy_static::lazy_static! { static ref TOOL_BUILT: PathBuf = { @@ -180,7 +180,7 @@ impl Test { } #[allow(dead_code)] - pub fn run(self, test_function: impl FnOnce(Arc) -> Result<()>) -> Result<()> { + pub fn run(self, test_function: impl FnOnce(Arc) -> Result<()>) -> Result<()> { self.try_compile()?; let ret = GraphLocation::std(&self.tempdir) .with_context_configured(self.context_config, test_function)?; diff --git a/crates/paralegal-policy/tests/lemmy.rs b/crates/paralegal-policy/tests/lemmy.rs index 2e99cf7cff..d8cddbaebf 100644 --- a/crates/paralegal-policy/tests/lemmy.rs +++ b/crates/paralegal-policy/tests/lemmy.rs @@ -5,6 +5,7 @@ use std::{collections::hash_map::RandomState, sync::Arc}; use helpers::{Result, Test}; use paralegal_policy::{ assert_error, assert_warning, Context, Diagnostics, EdgeSelection, NodeExt, NodeQueries, + RootContext, }; use paralegal_spdg::{GlobalNode, Identifier}; @@ -38,7 +39,7 @@ const ASYNC_TRAIT_CODE: &str = stringify!( async fn save(u: usize) {} ); -fn async_trait_policy(ctx: Arc) -> Result<()> { +fn async_trait_policy(ctx: Arc) -> Result<()> { let sinks = ctx .marked_nodes(Identifier::new_intern("sink")) .collect::>(); @@ -105,7 +106,7 @@ const CALLING_ASYNC_TRAIT_CODE: &str = stringify!( } ); -fn calling_async_trait_policy(ctx: Arc) -> Result<()> { +fn calling_async_trait_policy(ctx: Arc) -> Result<()> { let sources = Vec::from_iter(ctx.marked_nodes(Identifier::new_intern("source"))); let sinks = Vec::from_iter(ctx.marked_nodes(Identifier::new_intern("sink"))); assert_error!(ctx, !sources.is_empty()); diff --git a/crates/paralegal-policy/tests/markers.rs b/crates/paralegal-policy/tests/markers.rs index 01a643f9bb..b7f5e96b4a 100644 --- a/crates/paralegal-policy/tests/markers.rs +++ b/crates/paralegal-policy/tests/markers.rs @@ -1,12 +1,12 @@ use anyhow::Result; use helpers::Test; -use paralegal_policy::{assert_error, Context, Diagnostics, EdgeSelection, NodeExt}; +use paralegal_policy::{assert_error, Context, Diagnostics, EdgeSelection, NodeExt, RootContext}; use paralegal_spdg::{GlobalNode, Identifier}; use std::sync::Arc; mod helpers; -fn policy(ctx: Arc) -> Result<()> { +fn policy(ctx: Arc) -> Result<()> { let m_dangerous = Identifier::new_intern("dangerous"); let m_sink = Identifier::new_intern("sink"); let d_tys = ctx.marked_type(m_dangerous); diff --git a/crates/paralegal-policy/tests/misc_async.rs b/crates/paralegal-policy/tests/misc_async.rs index 7ea37cabc5..5ad26cb74f 100644 --- a/crates/paralegal-policy/tests/misc_async.rs +++ b/crates/paralegal-policy/tests/misc_async.rs @@ -1,6 +1,6 @@ use anyhow::Result; use helpers::Test; -use paralegal_policy::{assert_error, EdgeSelection}; +use paralegal_policy::{assert_error, Context, EdgeSelection}; use paralegal_spdg::Identifier; mod helpers; diff --git a/crates/paralegal-policy/tests/websubmit.rs b/crates/paralegal-policy/tests/websubmit.rs index 80109f4b7a..9d939edfb4 100644 --- a/crates/paralegal-policy/tests/websubmit.rs +++ b/crates/paralegal-policy/tests/websubmit.rs @@ -1,8 +1,10 @@ mod helpers; use helpers::{Result, Test}; -use paralegal_policy::{algo::ahb, loc, paralegal_spdg, Diagnostics, Marker, NodeExt, NodeQueries}; -use paralegal_spdg::traverse::EdgeSelection; +use paralegal_policy::{ + algo::ahb, loc, paralegal_spdg, Context, Diagnostics, Marker, NodeExt, NodeQueries, +}; +use paralegal_spdg::{traverse::EdgeSelection, IntoIterGlobalNodes}; macro_rules! marker { ($id:ident) => { Marker::new_intern(stringify!($id)) @@ -138,6 +140,7 @@ fn email_send_overtaint() -> Result<()> { // scopes for the store let store_scopes = cx .influencers(&sink_callsite, EdgeSelection::Data) + .chain(sink_callsite.iter_global_nodes()) .filter(|n| n.has_marker(&cx, marker!(scopes))) .collect::>(); if store_scopes.is_empty() { diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index 16c9eb4eb7..201e6e344a 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -64,6 +64,9 @@ pub type Function = Identifier; /// [`ProgramDescription`]. pub const FLOW_GRAPH_OUT_NAME: &str = "flow-graph.o"; +/// Extension for output files containing statistics of the analzyer run. +pub const STAT_FILE_EXT: &str = "stat.json"; + #[allow(dead_code)] mod ser_localdefid_map { use serde::{Deserialize, Serialize}; @@ -321,6 +324,19 @@ pub type TypeInfoMap = HashMap; /// Endpoints with their SPDGs pub type ControllerMap = HashMap; +#[doc(hidden)] +/// How was a given function handled by the analyzer +#[derive(Serialize, Deserialize, Debug)] +pub enum FunctionHandling { + /// A PDG was generated + PDG, + /// The function was determined not to assign markers and as a result elided. + Elided, +} + +#[doc(hidden)] +pub type AnalyzedSpans = HashMap; + /// The annotated program dependence graph. #[derive(Serialize, Deserialize, Debug)] pub struct ProgramDescription { @@ -343,15 +359,34 @@ pub struct ProgramDescription { #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] /// Metadata about the `DefId`s pub def_info: HashMap, + #[doc(hidden)] + #[cfg_attr(not(feature = "rustc"), serde(with = "serde_map_via_vec"))] + #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] + pub analyzed_spans: AnalyzedSpans, +} + +/// Statistics about a single run of paralegal-flow +#[derive(Serialize, Deserialize, Debug)] +pub struct AnalyzerStats { /// How many marker annotations were found pub marker_annotation_count: u32, + /// Total time used for the last analzyer run + pub self_time: Duration, + /// Time spent dumping MIR + pub dump_time: Duration, + /// how long the rust typechecker took + pub tycheck_time: Duration, + /// How long we spent on dependencies + pub dep_time: Duration, /// How long rustc ran before out plugin executed pub rustc_time: Duration, + /// How long did it take to serialize the graphs + pub serialization_time: Duration, /// The number of functions we produced a PDG for - pub dedup_functions: u32, + pub pdg_functions: u32, /// The lines of code corresponding to the functions from - /// [`Self::dedup_functions`]. - pub dedup_locs: u32, + /// [`Self::pdg_functions`]. + pub pdg_locs: u32, /// The number of functions we produced PDGs for or we inspected to check /// for markers. pub seen_functions: u32, @@ -359,9 +394,6 @@ pub struct ProgramDescription { /// [`Self::seen_functions`]. This is the sum of all /// `analyzed_locs` of the controllers but deduplicated. pub seen_locs: u32, - #[doc(hidden)] - #[serde(with = "ser_localdefid_map")] - pub analyzed_spans: HashMap, } /// Metadata about a type diff --git a/crates/paralegal-spdg/src/ser.rs b/crates/paralegal-spdg/src/ser.rs index 9ec25f57b4..8586f37b4d 100644 --- a/crates/paralegal-spdg/src/ser.rs +++ b/crates/paralegal-spdg/src/ser.rs @@ -5,11 +5,11 @@ use anyhow::{Context, Ok, Result}; use cfg_if::cfg_if; use std::{ fs::File, - io::{Read, Write}, + io::{BufReader, BufWriter, Read, Write}, path::Path, }; -use crate::ProgramDescription; +use crate::{AnalyzerStats, ProgramDescription}; cfg_if! { if #[cfg(feature = "binenc")] { @@ -31,7 +31,7 @@ impl ProgramDescription { /// Write `self` using the configured serialization format pub fn canonical_write(&self, path: impl AsRef) -> Result<()> { let path = path.as_ref(); - let mut out_file = File::create(path)?; + let mut out_file = BufWriter::new(File::create(path)?); out_file.write_all(&ser_magic().to_le_bytes())?; cfg_if! { if #[cfg(feature = "binenc")] { @@ -60,7 +60,7 @@ impl ProgramDescription { /// Read `self` using the configured serialization format pub fn canonical_read(path: impl AsRef) -> Result { let path = path.as_ref(); - let mut in_file = File::open(path)?; + let mut in_file = BufReader::new(File::open(path)?); let magic = { let mut buf = [0u8; 8]; in_file.read_exact(&mut buf).context("Reading magic")?; @@ -73,11 +73,11 @@ impl ProgramDescription { cfg_if! { if #[cfg(feature = "binenc")] { let read = bincode::deserialize_from( - &in_file, + in_file, ); } else { let read = serde_json::from_reader( - &in_file, + in_file, ); } }; @@ -91,3 +91,17 @@ impl ProgramDescription { }) } } + +impl AnalyzerStats { + /// Read the stats from a file using the default encoding (json) + pub fn canonical_read(path: impl AsRef) -> Result { + let reader = BufReader::new(File::open(path.as_ref())?); + Ok(serde_json::from_reader(reader)?) + } + + /// Write the stats to a file using the default encoding (json) + pub fn canonical_write(&self, path: impl AsRef) -> Result<()> { + let file = BufWriter::new(File::create(path)?); + Ok(serde_json::to_writer(file, self)?) + } +} diff --git a/crates/paralegal-spdg/src/traverse.rs b/crates/paralegal-spdg/src/traverse.rs index bdac5ab82a..6fffd52f34 100644 --- a/crates/paralegal-spdg/src/traverse.rs +++ b/crates/paralegal-spdg/src/traverse.rs @@ -2,9 +2,12 @@ use std::collections::HashSet; -use petgraph::visit::{Control, Data, DfsEvent, EdgeFiltered, EdgeRef, IntoEdgeReferences}; +use petgraph::visit::{ + Bfs, Control, Data, DfsEvent, EdgeFiltered, EdgeRef, IntoEdgeReferences, IntoEdges, + IntoEdgesDirected, IntoNeighbors, VisitMap, Visitable, Walker, WalkerIter, +}; -use crate::{EdgeInfo, EdgeKind, Node}; +use crate::{EdgeInfo, EdgeKind, Node, SPDGImpl}; use super::SPDG; @@ -69,18 +72,136 @@ pub fn generic_flows_to( edge_selection: EdgeSelection, spdg: &SPDG, other: impl IntoIterator, -) -> bool { +) -> Option { let targets = other.into_iter().collect::>(); let mut from = from.into_iter().peekable(); if from.peek().is_none() || targets.is_empty() { - return false; + return None; } let graph = edge_selection.filter_graph(&spdg.graph); let result = petgraph::visit::depth_first_search(&graph, from, |event| match event { - DfsEvent::Discover(d, _) if targets.contains(&d) => Control::Break(()), + DfsEvent::Discover(d, _) if targets.contains(&d) => Control::Break(d), _ => Control::Continue, }); - matches!(result, Control::Break(())) + match result { + Control::Break(r) => Some(r), + _ => None, + } +} + +/// The current policy for this iterator is that it does not return the start +/// nodes *uness* there is a cycle and a node is reachable that way. +fn bfs_iter::Map>>( + g: G, + start: impl IntoIterator, +) -> WalkerIter::Map>, G> { + let mut discovered = g.visit_map(); + let mut stack: std::collections::VecDeque = Default::default(); + // prime the stack with all input nodes, otherwise they would be returned + // from the iterator. + for n in start { + for next in g.neighbors(n) { + if discovered.visit(next) { + stack.push_back(next); + } + } + } + let bfs = Bfs { stack, discovered }; + Walker::iter(bfs, g) +} + +#[cfg(test)] +mod test { + use petgraph::graph::DiGraph; + + use super::bfs_iter; + + #[test] + fn iter_sees_nested() { + let mut g = DiGraph::<(), ()>::new(); + let a = g.add_node(()); + let b = g.add_node(()); + let c = g.add_node(()); + let d = g.add_node(()); + + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + + let seen = bfs_iter(&g, [a]).collect::>(); + assert!(seen.contains(&b)); + assert!(seen.contains(&c)); + assert!(!seen.contains(&d)); + assert!(!seen.contains(&a)); + } + + #[test] + fn iter_sees_cycle() { + let mut g = DiGraph::<(), ()>::new(); + let a = g.add_node(()); + let b = g.add_node(()); + let c = g.add_node(()); + + g.add_edge(a, b, ()); + g.add_edge(b, c, ()); + g.add_edge(c, a, ()); + + let seen = bfs_iter(&g, [a]).collect::>(); + assert!(seen.contains(&b)); + assert!(seen.contains(&c)); + assert!(seen.contains(&a)); + } +} + +/// Base function for implementing influencers +pub fn generic_influencers< + G: IntoEdgesDirected + + Visitable::Map> + + Data, +>( + g: G, + nodes: impl IntoIterator, + edge_selection: EdgeSelection, +) -> Vec { + use petgraph::visit::*; + + let reversed_graph = Reversed(g); + + match edge_selection { + EdgeSelection::Data => { + let edges_filtered = EdgeFiltered::from_fn(reversed_graph, |e| e.weight().is_data()); + bfs_iter(&edges_filtered, nodes).collect::>() + } + EdgeSelection::Control => { + let edges_filtered = EdgeFiltered::from_fn(reversed_graph, |e| e.weight().is_control()); + bfs_iter(&edges_filtered, nodes).collect::>() + } + EdgeSelection::Both => bfs_iter(reversed_graph, nodes).collect::>(), + } +} + +/// Base function for implementing influencees +pub fn generic_influencees< + G: IntoEdges + + Visitable::Map> + + Data, +>( + g: G, + nodes: impl IntoIterator, + edge_selection: EdgeSelection, +) -> Vec { + use petgraph::visit::*; + + match edge_selection { + EdgeSelection::Data => { + let edges_filtered = EdgeFiltered::from_fn(g, |e| e.weight().is_data()); + bfs_iter(&edges_filtered, nodes).collect::>() + } + EdgeSelection::Control => { + let edges_filtered = EdgeFiltered::from_fn(g, |e| e.weight().is_control()); + bfs_iter(&edges_filtered, nodes).collect::>() + } + EdgeSelection::Both => bfs_iter(g, nodes).collect::>(), + } } diff --git a/guide/deletion-policy/Cargo.lock b/guide/deletion-policy/Cargo.lock index 8487126b5e..b7707e8acb 100644 --- a/guide/deletion-policy/Cargo.lock +++ b/guide/deletion-policy/Cargo.lock @@ -434,6 +434,7 @@ name = "paralegal-policy" version = "0.1.0" dependencies = [ "anyhow", + "autocfg", "bitvec", "colored 1.9.4", "indexical", diff --git a/guide/deletion-policy/src/main.rs b/guide/deletion-policy/src/main.rs index 02a112e244..59b6410dfa 100644 --- a/guide/deletion-policy/src/main.rs +++ b/guide/deletion-policy/src/main.rs @@ -1,10 +1,10 @@ use anyhow::Result; use paralegal_policy::{ - assert_error, paralegal_spdg::traverse::EdgeSelection, Context, Marker, NodeExt, + assert_error, paralegal_spdg::traverse::EdgeSelection, Marker, NodeExt, RootContext, }; use std::sync::Arc; -fn dummy_policy(_ctx: Arc) -> Result<()> { +fn dummy_policy(_ctx: Arc) -> Result<()> { println!("Graph loaded."); Ok(()) } @@ -20,7 +20,7 @@ fn main() -> Result<()> { } #[allow(dead_code)] -fn deletion_policy(ctx: Arc) -> Result<()> { +fn deletion_policy(ctx: Arc) -> Result<()> { let user_data_types = ctx.marked_type(Marker::new_intern("user_data")); let found = ctx.all_controllers().any(|(deleter_id, _ignored)| {