From ff2601d31a4588dea3523925f097888f192102a4 Mon Sep 17 00:00:00 2001 From: Justus Adam Date: Thu, 22 Feb 2024 09:01:24 -0500 Subject: [PATCH] Documentation, refactoring and convenience (#123) ## What Changed? Moves the annotations into `paralegal-flow` Provides basic documentation for `paralegal-spdg` Now requires documentation for `paralegal-spdg` in CI Adds convenience functions to `Context` ## Why Does It Need To? Adds convenience methods to `Context` to improve ergonomics of the policies. Adds missing documentation to the spdg crate also now enables the missing documentation warning, which makes it an error in CI. The annotations are moved because they aren't in the SPDG anyway and thus not accessible to consumers in this form. ## Checklist - [x] Above description has been filled out so that upon quash merge we have a good record of what changed. - [x] New functions, methods, types are documented. Old documentation is updated if necessary - [x] Documentation in Notion has been updated - [x] Tests for new behaviors are provided - [x] New test suites (if any) ave been added to the CI tests (in `.github/workflows/rust.yml`) either as compiler test or integration test. *Or* justification for their omission from CI has been provided in this PR description. --- Cargo.lock | 1 + crates/paralegal-flow/Cargo.toml | 1 + crates/paralegal-flow/src/ana/mod.rs | 1 + crates/paralegal-flow/src/ann/mod.rs | 138 ++++++++++ .../src/{ann_parse.rs => ann/parse.rs} | 6 +- crates/paralegal-flow/src/lib.rs | 2 +- crates/paralegal-flow/src/marker_db.rs | 18 +- crates/paralegal-policy/src/context.rs | 34 ++- crates/paralegal-policy/src/lib.rs | 4 +- crates/paralegal-spdg/src/dot.rs | 5 + crates/paralegal-spdg/src/lib.rs | 250 +++++++----------- crates/paralegal-spdg/src/tiny_bitset.rs | 4 + crates/paralegal-spdg/src/traverse.rs | 12 + crates/paralegal-spdg/src/utils.rs | 9 + 14 files changed, 316 insertions(+), 169 deletions(-) create mode 100644 crates/paralegal-flow/src/ann/mod.rs rename crates/paralegal-flow/src/{ann_parse.rs => ann/parse.rs} (98%) diff --git a/Cargo.lock b/Cargo.lock index c3c7711de2..fe90d3666b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -810,6 +810,7 @@ dependencies = [ "serde_json", "serial_test", "simple_logger", + "strum", "thiserror", "toml", "trait_enum", diff --git a/crates/paralegal-flow/Cargo.toml b/crates/paralegal-flow/Cargo.toml index 99fec275ea..d8d2f67bdd 100644 --- a/crates/paralegal-flow/Cargo.toml +++ b/crates/paralegal-flow/Cargo.toml @@ -40,6 +40,7 @@ num-derive = "0.4" num-traits = "0.2" petgraph = { workspace = true } humantime = "2" +strum = { version = "0.25", features = ["derive"] } #dot = "0.1" diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 105a416240..2a945c0b5f 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -5,6 +5,7 @@ //! [`analyze`](SPDGGenerator::analyze). use crate::{ + ann::{Annotation, MarkerAnnotation}, desc::*, rust::{hir::def, *}, utils::*, diff --git a/crates/paralegal-flow/src/ann/mod.rs b/crates/paralegal-flow/src/ann/mod.rs new file mode 100644 index 0000000000..93f0678b42 --- /dev/null +++ b/crates/paralegal-flow/src/ann/mod.rs @@ -0,0 +1,138 @@ +use serde::{Deserialize, Serialize}; + +use paralegal_spdg::{rustc_proxies, tiny_bitset_pretty, Identifier, TinyBitSet, TypeId}; + +pub mod parse; + +/// Types of annotations we support. +/// +/// Usually you'd expect one of those annotation types in any given situation. +/// For convenience the match methods [`Self::as_marker`], [`Self::as_otype`] +/// and [`Self::as_exception`] are provided. These are particularly useful in +/// conjunction with e.g. [`Iterator::filter_map`] +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize, strum::EnumIs)] +pub enum Annotation { + Marker(MarkerAnnotation), + OType(#[serde(with = "rustc_proxies::DefId")] TypeId), + Exception(ExceptionAnnotation), +} + +impl Annotation { + /// If this is an [`Annotation::Marker`], returns the underlying [`MarkerAnnotation`]. + pub fn as_marker(&self) -> Option<&MarkerAnnotation> { + match self { + Annotation::Marker(l) => Some(l), + _ => None, + } + } + + /// If this is an [`Annotation::OType`], returns the underlying [`TypeId`]. + pub fn as_otype(&self) -> Option { + match self { + Annotation::OType(t) => Some(*t), + _ => None, + } + } + + /// If this is an [`Annotation::Exception`], returns the underlying [`ExceptionAnnotation`]. + pub fn as_exception(&self) -> Option<&ExceptionAnnotation> { + match self { + Annotation::Exception(e) => Some(e), + _ => None, + } + } +} + +pub type VerificationHash = u128; + +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize)] +pub struct ExceptionAnnotation { + /// The value of the verification hash we found in the annotation. Is `None` + /// if there was no verification hash in the annotation. + pub verification_hash: Option, +} + +/// A marker annotation and its refinements. +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize)] +pub struct MarkerAnnotation { + /// The (unchanged) name of the marker as provided by the user + pub marker: Identifier, + #[serde(flatten)] + pub refinement: MarkerRefinement, +} + +fn const_false() -> bool { + false +} + +/// Refinements in the marker targeting. The default (no refinement provided) is +/// `on_argument == vec![]` and `on_return == false`, which is also what is +/// returned from [`Self::empty`]. +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize)] +pub struct MarkerRefinement { + #[serde(default, with = "tiny_bitset_pretty")] + on_argument: TinyBitSet, + #[serde(default = "const_false")] + on_return: bool, +} + +/// Disaggregated version of [`MarkerRefinement`]. Can be added to an existing +/// refinement [`MarkerRefinement::merge_kind`]. +#[derive(Clone, Deserialize, Serialize)] +pub enum MarkerRefinementKind { + Argument(#[serde(with = "tiny_bitset_pretty")] TinyBitSet), + Return, +} + +impl MarkerRefinement { + /// The default, empty aggregate refinement `Self { on_argument: vec![], + /// on_return: false }` + pub fn empty() -> Self { + Self { + on_argument: Default::default(), + on_return: false, + } + } + + /// Merge the aggregate refinement with another discovered refinement and + /// check that they do not overwrite each other. + pub fn merge_kind(mut self, k: MarkerRefinementKind) -> Result { + match k { + MarkerRefinementKind::Argument(a) => { + if self.on_argument.is_empty() { + self.on_argument = a; + Ok(self) + } else { + Err(format!( + "Double argument annotation {:?} and {a:?}", + self.on_argument + )) + } + } + MarkerRefinementKind::Return => { + if !self.on_return { + self.on_return = true; + Ok(self) + } else { + Err("Double on-return annotation".to_string()) + } + } + } + } + + /// Get the refinements on arguments + pub fn on_argument(&self) -> TinyBitSet { + self.on_argument + } + + /// Is this refinement targeting the return value? + pub fn on_return(&self) -> bool { + self.on_return + } + + /// True if this refinement is empty, i.e. the annotation is targeting the + /// item itself. + pub fn on_self(&self) -> bool { + self.on_argument.is_empty() && !self.on_return + } +} diff --git a/crates/paralegal-flow/src/ann_parse.rs b/crates/paralegal-flow/src/ann/parse.rs similarity index 98% rename from crates/paralegal-flow/src/ann_parse.rs rename to crates/paralegal-flow/src/ann/parse.rs index 6f199578bb..7f809fa3d3 100644 --- a/crates/paralegal-flow/src/ann_parse.rs +++ b/crates/paralegal-flow/src/ann/parse.rs @@ -8,9 +8,11 @@ //! it gives us boundaries for parsers that lets us (re)combine them, but also //! that we get features that are annoying to implement (such as backtracking) //! for free. +use super::{ + ExceptionAnnotation, MarkerAnnotation, MarkerRefinement, MarkerRefinementKind, VerificationHash, +}; use crate::{ consts, - desc::{ExceptionAnnotation, MarkerAnnotation, MarkerRefinement, MarkerRefinementKind}, rust::*, utils, utils::{write_sep, Print, TinyBitSet}, @@ -250,7 +252,7 @@ pub(crate) fn match_exception(ann: &rustc_ast::AttrArgs) -> ExceptionAnnotation assert_token(TokenKind::Eq), )), lit(token::LitKind::Str, |s| { - crate::desc::VerificationHash::from_str_radix(s, 16) + VerificationHash::from_str_radix(s, 16) .map_err(|e: std::num::ParseIntError| e.to_string()) }), ))(i)?; diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index d720418aed..14f3aead07 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -79,7 +79,7 @@ pub use either::Either; pub use rustc_span::Symbol; pub mod ana; -pub mod ann_parse; +pub mod ann; mod args; pub mod dbg; mod discover; diff --git a/crates/paralegal-flow/src/marker_db.rs b/crates/paralegal-flow/src/marker_db.rs index 0edca6327c..3b4cce8859 100644 --- a/crates/paralegal-flow/src/marker_db.rs +++ b/crates/paralegal-flow/src/marker_db.rs @@ -11,9 +11,9 @@ //! All interactions happen through the central database object: [`MarkerCtx`]. use crate::{ + ann::{Annotation, MarkerAnnotation}, args::{Args, MarkerControl}, consts, - desc::{Annotation, MarkerAnnotation}, hir::def::DefKind, mir, ty, utils::{ @@ -303,24 +303,22 @@ impl<'tcx> MarkerDatabase<'tcx> { /// Retrieve and parse the local annotations for this item. pub fn retrieve_local_annotations_for(&mut self, def_id: LocalDefId) { + use crate::ann::parse::{ann_match_fn, match_exception, otype_ann_match}; + let tcx = self.tcx; let hir = tcx.hir(); let id = def_id.force_into_hir_id(tcx); let mut sink_matches = vec![]; for a in hir.attrs(id) { if let Some(i) = a.match_get_ref(&consts::MARKER_MARKER) { - sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i))); + sink_matches.push(Annotation::Marker(ann_match_fn(i))); } else if let Some(i) = a.match_get_ref(&consts::LABEL_MARKER) { warn!("The `paralegal_flow::label` annotation is deprecated, use `paralegal_flow::marker` instead"); - sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i))) + sink_matches.push(Annotation::Marker(ann_match_fn(i))) } else if let Some(i) = a.match_get_ref(&consts::OTYPE_MARKER) { - sink_matches.extend( - crate::ann_parse::otype_ann_match(i, tcx) - .into_iter() - .map(Annotation::OType), - ); + sink_matches.extend(otype_ann_match(i, tcx).into_iter().map(Annotation::OType)); } else if let Some(i) = a.match_get_ref(&consts::EXCEPTION_MARKER) { - sink_matches.push(Annotation::Exception(crate::ann_parse::match_exception(i))); + sink_matches.push(Annotation::Exception(match_exception(i))); } } if !sink_matches.is_empty() { @@ -332,7 +330,7 @@ impl<'tcx> MarkerDatabase<'tcx> { } } -type RawExternalMarkers = HashMap>; +type RawExternalMarkers = HashMap>; /// Given the TOML of external annotations we have parsed, resolve the paths /// (keys of the map) to [`DefId`]s. diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index 68b9e83698..9395eb29f1 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -3,8 +3,9 @@ use std::{io::Write, process::exit, sync::Arc}; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; use paralegal_spdg::traverse::{generic_flows_to, EdgeSelection}; use paralegal_spdg::{ - CallString, DisplayNode, Endpoint, GlobalNode, HashMap, Identifier, IntoIterGlobalNodes, - Node as SPDGNode, NodeCluster, ProgramDescription, SPDGImpl, TypeId, SPDG, + CallString, DisplayNode, Endpoint, GlobalNode, HashMap, Identifier, InstructionInfo, + IntoIterGlobalNodes, Node as SPDGNode, NodeCluster, NodeInfo, ProgramDescription, SPDGImpl, + TypeId, SPDG, }; use anyhow::{anyhow, bail, ensure, Result}; @@ -71,7 +72,7 @@ fn bfs_iter< } let bfs = Bfs { stack, discovered }; let walker_iter = Walker::iter(bfs, g); - walker_iter.map(move |inner| GlobalNode::unsafe_new(controller_id, inner.index())) + walker_iter.map(move |inner| GlobalNode::from_local_node(controller_id, inner)) } /// Interface for defining policies. @@ -587,6 +588,33 @@ impl Context { ) } + /// Retrieve metadata about a node. + pub fn node_info(&self, node: GlobalNode) -> &NodeInfo { + self.desc.controllers[&node.controller_id()].node_info(node.local_node()) + } + + /// Retrieve metadata about the instruction executed by a specific node. + pub fn instruction_at_node(&self, node: GlobalNode) -> &InstructionInfo { + let node_info = self.node_info(node); + &self.desc.instruction_info[&node_info.at.leaf()] + } + + /// Return the immediate successors of this node + pub fn successors(&self, node: GlobalNode) -> impl Iterator + '_ { + self.desc.controllers[&node.controller_id()] + .graph + .neighbors(node.local_node()) + .map(move |n| GlobalNode::from_local_node(node.controller_id(), n)) + } + + /// Return the immediate predecessors of this node + pub fn predecessors(&self, node: GlobalNode) -> impl Iterator + '_ { + self.desc.controllers[&node.controller_id()] + .graph + .neighbors_directed(node.local_node(), petgraph::Direction::Incoming) + .map(move |n| GlobalNode::from_local_node(node.controller_id(), n)) + } + #[cfg(test)] pub fn nth_successors( &self, diff --git a/crates/paralegal-policy/src/lib.rs b/crates/paralegal-policy/src/lib.rs index 5888a3cc17..63bebef12c 100644 --- a/crates/paralegal-policy/src/lib.rs +++ b/crates/paralegal-policy/src/lib.rs @@ -53,7 +53,9 @@ extern crate core; use anyhow::{ensure, Result}; pub use paralegal_spdg; -use paralegal_spdg::ProgramDescription; +pub use paralegal_spdg::{ + traverse::EdgeSelection, GlobalNode, IntoIterGlobalNodes, ProgramDescription, +}; use std::{ fs::File, path::{Path, PathBuf}, diff --git a/crates/paralegal-spdg/src/dot.rs b/crates/paralegal-spdg/src/dot.rs index 4e2bd8ebcd..bb12950e2c 100644 --- a/crates/paralegal-spdg/src/dot.rs +++ b/crates/paralegal-spdg/src/dot.rs @@ -1,3 +1,5 @@ +//! Display SPDGs as dot graphs + use crate::{GlobalEdge, InstructionInfo, Node, ProgramDescription}; use dot::{CompassPoint, Edges, Id, LabelText, Nodes}; use flowistry_pdg::rustc_portable::LocalDefId; @@ -186,10 +188,12 @@ impl<'a, 'd> dot::Labeller<'a, CallString, GlobalEdge> for DotPrintableProgramDe } } +/// Dump all SPDGs in a single dot expression pub fn dump(spdg: &ProgramDescription, out: W) -> std::io::Result<()> { dump_for_selection(spdg, out, |_| true) } +/// Dump the SPDG for one select controller in dot format pub fn dump_for_controller( spdg: &ProgramDescription, out: impl std::io::Write, @@ -210,6 +214,7 @@ pub fn dump_for_controller( Ok(()) } +/// Dump a selection of controllers into a dot expression. pub fn dump_for_selection( spdg: &ProgramDescription, mut out: impl std::io::Write, diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index ff9269ef31..2585e0eed9 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -6,6 +6,7 @@ //! [`rustc_proxies`] module for all Rustc types within the PDG. #![cfg_attr(feature = "rustc", feature(rustc_private))] +#![warn(missing_docs)] #[cfg(feature = "rustc")] pub(crate) mod rustc { @@ -35,6 +36,7 @@ use std::{fmt, hash::Hash}; use utils::serde_map_via_vec; +pub use crate::tiny_bitset::pretty as tiny_bitset_pretty; pub use crate::tiny_bitset::TinyBitSet; use flowistry_pdg::rustc_portable::LocalDefId; use petgraph::graph::{EdgeIndex, EdgeReference, NodeIndex}; @@ -43,164 +45,17 @@ use petgraph::visit::IntoNodeIdentifiers; pub use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; +/// The types of identifiers that identify an entrypoint pub type Endpoint = LocalDefId; +/// Identifiers for types pub type TypeId = DefId; +/// Identifiers for functions pub type Function = Identifier; /// Name of the file used for emitting the JSON serialized /// [`ProgramDescription`]. pub const FLOW_GRAPH_OUT_NAME: &str = "flow-graph.json"; -/// Types of annotations we support. -/// -/// Usually you'd expect one of those annotation types in any given situation. -/// For convenience the match methods [`Self::as_marker`], [`Self::as_otype`] -/// and [`Self::as_exception`] are provided. These are particularly useful in -/// conjunction with e.g. [`Iterator::filter_map`] -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize, strum::EnumIs)] -pub enum Annotation { - Marker(MarkerAnnotation), - OType(#[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] TypeId), - Exception(ExceptionAnnotation), -} - -impl Annotation { - /// If this is an [`Annotation::Marker`], returns the underlying [`MarkerAnnotation`]. - pub fn as_marker(&self) -> Option<&MarkerAnnotation> { - match self { - Annotation::Marker(l) => Some(l), - _ => None, - } - } - - /// If this is an [`Annotation::OType`], returns the underlying [`TypeId`]. - pub fn as_otype(&self) -> Option { - match self { - Annotation::OType(t) => Some(*t), - _ => None, - } - } - - /// If this is an [`Annotation::Exception`], returns the underlying [`ExceptionAnnotation`]. - pub fn as_exception(&self) -> Option<&ExceptionAnnotation> { - match self { - Annotation::Exception(e) => Some(e), - _ => None, - } - } -} - -pub type VerificationHash = u128; - -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize)] -pub struct ExceptionAnnotation { - /// The value of the verification hash we found in the annotation. Is `None` - /// if there was no verification hash in the annotation. - pub verification_hash: Option, -} - -/// A marker annotation and its refinements. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize)] -pub struct MarkerAnnotation { - /// The (unchanged) name of the marker as provided by the user - pub marker: Identifier, - #[serde(flatten)] - pub refinement: MarkerRefinement, -} - -fn const_false() -> bool { - false -} - -/// Refinements in the marker targeting. The default (no refinement provided) is -/// `on_argument == vec![]` and `on_return == false`, which is also what is -/// returned from [`Self::empty`]. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Deserialize, Serialize)] -pub struct MarkerRefinement { - #[serde(default, with = "crate::tiny_bitset::pretty")] - on_argument: TinyBitSet, - #[serde(default = "const_false")] - on_return: bool, -} - -/// Disaggregated version of [`MarkerRefinement`]. Can be added to an existing -/// refinement [`MarkerRefinement::merge_kind`]. -#[derive(Clone, Deserialize, Serialize)] -pub enum MarkerRefinementKind { - Argument(#[serde(with = "crate::tiny_bitset::pretty")] TinyBitSet), - Return, -} - -impl MarkerRefinement { - /// The default, empty aggregate refinement `Self { on_argument: vec![], - /// on_return: false }` - pub fn empty() -> Self { - Self { - on_argument: Default::default(), - on_return: false, - } - } - - /// Merge the aggregate refinement with another discovered refinement and - /// check that they do not overwrite each other. - pub fn merge_kind(mut self, k: MarkerRefinementKind) -> Result { - match k { - MarkerRefinementKind::Argument(a) => { - if self.on_argument.is_empty() { - self.on_argument = a; - Ok(self) - } else { - Err(format!( - "Double argument annotation {:?} and {a:?}", - self.on_argument - )) - } - } - MarkerRefinementKind::Return => { - if !self.on_return { - self.on_return = true; - Ok(self) - } else { - Err("Double on-return annotation".to_string()) - } - } - } - } - - /// Get the refinements on arguments - pub fn on_argument(&self) -> TinyBitSet { - self.on_argument - } - - /// Is this refinement targeting the return value? - pub fn on_return(&self) -> bool { - self.on_return - } - - /// True if this refinement is empty, i.e. the annotation is targeting the - /// item itself. - pub fn on_self(&self) -> bool { - self.on_argument.is_empty() && !self.on_return - } -} - -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Serialize, Deserialize, strum::EnumIs)] -pub enum ObjectType { - Function(usize), - Type, - Other, -} - -impl ObjectType { - /// If this is [`Self::Function`], then return the payload. - pub fn as_function(&self) -> Option { - match self { - ObjectType::Function(f) => Some(*f), - _ => None, - } - } -} - #[allow(dead_code)] mod ser_localdefid_map { use serde::{Deserialize, Serialize}; @@ -277,31 +132,45 @@ pub struct DefInfo { Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Debug, strum::EnumIs, strum::AsRefStr, )] pub enum DefKind { + /// A regular function object Fn, + /// The function corresponding to a generator Generator, + /// The function corresponding to a closure Closure, + /// A type Type, } +/// Metadata on a function call. #[derive(Debug, Clone, Copy, Serialize, Deserialize, Eq, Ord, PartialOrd, PartialEq)] pub struct FunctionCallInfo { + /// Has this call been inlined pub is_inlined: bool, + /// What is the ID of the item that was called here. #[cfg_attr(feature = "rustc", serde(with = "rustc_proxies::DefId"))] pub id: DefId, } +/// The type of instructions we may encounter #[derive( Debug, Clone, Copy, Serialize, Deserialize, Eq, Ord, PartialOrd, PartialEq, strum::EnumIs, )] pub enum InstructionInfo { + /// Some type of statement Statement, + /// A function call FunctionCall(FunctionCallInfo), + /// A basic block terminator, usually switchInt Terminator, + /// The beginning of a function Start, + /// The merged exit points of a function Return, } impl InstructionInfo { + /// If this identifies a function call, return the information inside. pub fn as_function_call(self) -> Option { match self { InstructionInfo::FunctionCall(d) => Some(d), @@ -313,15 +182,18 @@ impl InstructionInfo { /// The annotated program dependence graph. #[derive(Serialize, Deserialize, Debug)] pub struct ProgramDescription { + /// Entry points we analyzed and their PDGs #[cfg_attr(feature = "rustc", serde(with = "ser_localdefid_map"))] #[cfg_attr(not(feature = "rustc"), serde(with = "serde_map_via_vec"))] - /// Mapping from function names to dependencies within the function. pub controllers: HashMap, + /// Metadata about types #[cfg_attr(not(feature = "rustc"), serde(with = "serde_map_via_vec"))] #[cfg_attr(feature = "rustc", serde(with = "ser_defid_map"))] pub type_info: HashMap, + /// Metadata about the instructions that are executed at all program + /// locations we know about. #[serde(with = "serde_map_via_vec")] pub instruction_info: HashMap, @@ -331,11 +203,15 @@ pub struct ProgramDescription { pub def_info: HashMap, } +/// Metadata about a type #[derive(Serialize, Deserialize, Debug)] pub struct TypeDescription { + /// How rustc would debug print this type pub rendering: String, + /// Aliases #[cfg_attr(feature = "rustc", serde(with = "ser_defid_vec"))] pub otypes: Vec, + /// Attached markers. Guaranteed not to be empty. pub markers: Vec, } @@ -375,7 +251,7 @@ impl ProgramDescription { .iter() .flat_map(|(name, c)| { c.all_sources() - .map(|ds| GlobalNode::unsafe_new(*name, ds.index())) + .map(|ds| GlobalNode::from_local_node(*name, ds)) }) .collect() } @@ -401,6 +277,7 @@ impl ProgramDescription { pub struct Identifier(Intern); impl Identifier { + /// Intern a new identifier from a rustc [`rustc::span::Symbol`] #[cfg(feature = "rustc")] pub fn new(s: rustc::span::Symbol) -> Self { Self::new_intern(s.as_str()) @@ -440,6 +317,7 @@ impl Display for Identifier { pub struct ShortHash(u64); impl ShortHash { + /// Constructor pub fn new(t: T) -> Self { // Six digits in hex Self(hash_pls(t) % 0x1_000_000) @@ -466,6 +344,7 @@ pub fn hash_pls(t: T) -> u64 { hasher.finish() } +/// Return type of [`IntoIterGlobalNodes::iter_global_nodes`]. pub struct GlobalNodeIter { controller_id: LocalDefId, iter: I::Iter, @@ -481,12 +360,25 @@ impl Iterator for GlobalNodeIter { } } +/// This lets us be agnostic whether a primitive (such as `flows_to`) is called +/// with a [`GlobalNode`] or `&NodeCluster`. +/// +/// Note that while [`GlobalNode`] implements this trait [`NodeCluster`] *does +/// not do so directly*, but it's reference `&NodeCluster` does. pub trait IntoIterGlobalNodes: Sized + Copy { + /// The iterator returned by [`Self::iter_nodes`] type Iter: Iterator; + + /// iterate over the local nodes fn iter_nodes(self) -> Self::Iter; + /// The controller id all of these nodes are located in. fn controller_id(self) -> LocalDefId; + /// Iterate all nodes as globally identified one's. + /// + /// The invariant of this iterator is that all `controller_id()`s of the + /// nodes in the iterator is the same as `self.controller_id()`. fn iter_global_nodes(self) -> GlobalNodeIter { GlobalNodeIter { controller_id: self.controller_id(), @@ -507,13 +399,16 @@ pub trait IntoIterGlobalNodes: Sized + Copy { )) } + /// Collect the iterator into a cluster fn to_local_cluster(self) -> NodeCluster { NodeCluster::new(self.controller_id(), self.iter_nodes()) } } +/// Local nodes in an [`SPDGImpl`] pub type Node = NodeIndex; +/// A globally identified node in an SPDG #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct GlobalNode { node: Node, @@ -530,6 +425,10 @@ impl GlobalNode { } } + /// Create a new globally identified node by pairing a node local to a + /// particular SPDG with it's controller id. + /// + /// Meant for internal use only. pub fn from_local_node(ctrl_id: LocalDefId, node: Node) -> Self { GlobalNode { controller_id: ctrl_id, @@ -537,10 +436,12 @@ impl GlobalNode { } } + /// The local node in the SPDG pub fn local_node(self) -> Node { self.node } + /// The identifier for the SPDG this node is contained in pub fn controller_id(self) -> LocalDefId { self.controller_id } @@ -557,12 +458,18 @@ impl IntoIterGlobalNodes for GlobalNode { } } +/// A globally identified set of nodes that are all located in the same +/// controller. +/// +/// Sometimes it is more convenient to think about such a group instead of +/// individual [`GlobalNode`]s #[derive(Debug, Hash, Clone)] pub struct NodeCluster { controller_id: LocalDefId, nodes: Box<[Node]>, } +/// Iterate over a node cluster but yielding [`GlobalNode`]s pub struct NodeClusterIter<'a> { inner: std::slice::Iter<'a, Node>, } @@ -588,6 +495,7 @@ impl<'a> IntoIterGlobalNodes for &'a NodeCluster { } impl NodeCluster { + /// Create a new cluster. This for internal use. pub fn new(controller_id: LocalDefId, nodes: impl IntoIterator) -> Self { Self { controller_id, @@ -595,15 +503,18 @@ impl NodeCluster { } } + /// Controller that these nodes belong to pub fn controller_id(&self) -> LocalDefId { self.controller_id } + /// Nodes in this cluster pub fn nodes(&self) -> &[Node] { &self.nodes } } +/// The global version of an edge that is tied to some specific entrypoint #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct GlobalEdge { index: EdgeIndex, @@ -611,15 +522,20 @@ pub struct GlobalEdge { } impl GlobalEdge { + /// The id of the controller that this edge is located in pub fn controller_id(self) -> LocalDefId { self.controller_id } } +/// Node metadata in the [`SPDGImpl`] #[derive(Clone, Debug, Serialize, Deserialize)] pub struct NodeInfo { + /// Location of the node in the call stack pub at: CallString, + /// The debug print of the `mir::Place` that this node represents pub description: String, + /// Additional information of how this node is used in the source. pub kind: NodeKind, } @@ -629,12 +545,19 @@ impl Display for NodeInfo { } } +/// Additional information about what a given node may represent #[derive(Clone, Debug, Serialize, Deserialize, Copy, strum::EnumIs)] pub enum NodeKind { + /// The node is (part of) a formal parameter of a function (0-indexed). e.g. + /// in `fn foo(x: usize)` `x` would be a `FormalParameter(0)`. FormalParameter(u8), + /// Formal return of a function, e.g. `x` in `return x`; FormalReturn, + /// Parameter given to a function at the call site, e.g. `x` in `foo(x)`. ActualParameter(TinyBitSet), + /// Return value received from a call, e.g. `x` in `let x = foo(...);` ActualReturn, + /// Any other kind of node Unspecified, } @@ -654,9 +577,12 @@ impl Display for NodeKind { } } +/// Metadata for an edge in the [`SPDGImpl`] #[derive(Clone, Debug, Serialize, Deserialize)] pub struct EdgeInfo { + /// What type of edge it is pub kind: EdgeKind, + /// Where in the program this edge arises from pub at: CallString, } @@ -667,40 +593,56 @@ impl Display for EdgeInfo { } impl EdgeInfo { + /// Same as `self.kind.is_data()` pub fn is_data(&self) -> bool { matches!(self.kind, EdgeKind::Data) } + /// Same as `self.kind.is_control()` pub fn is_control(&self) -> bool { matches!(self.kind, EdgeKind::Control) } } +/// The type of an edge #[derive( Clone, Debug, Copy, Eq, PartialEq, Deserialize, Serialize, strum::EnumIs, strum::Display, )] pub enum EdgeKind { + /// The target can read data created by the source Data, + /// The source controls the execution of the target Control, } +/// The graph portion of an [`SPDG`] pub type SPDGImpl = petgraph::Graph; +/// A semantic PDG, e.g. a graph plus marker annotations #[derive(Clone, Serialize, Deserialize, Debug)] pub struct SPDG { + /// The identifier of the entry point to this computation pub name: Identifier, + /// The PDG pub graph: SPDGImpl, + /// Nodes to which markers are assigned. pub markers: HashMap>, + /// The nodes that represent arguments to the entrypoint pub arguments: Vec, /// If the return is `()` or `!` then this is `None` pub return_: Option, + /// Stores the assignment of relevant (e.g. marked) types to nodes. Node + /// that this contains multiple types for a single node, because it hold + /// top-level types and subtypes that may be marked. pub type_assigns: HashMap, } +/// Holds [`TypeId`]s that were assigned to a node. #[derive(Clone, Serialize, Deserialize, Debug, Default)] pub struct Types(#[cfg_attr(feature = "rustc", serde(with = "ser_defid_vec"))] pub Vec); impl SPDG { + /// Retrieve metadata for this node pub fn node_info(&self, node: Node) -> &NodeInfo { self.graph.node_weight(node).unwrap() } @@ -714,6 +656,7 @@ impl SPDG { .unique() } + /// An iterator over all edges in this graph. pub fn edges(&self) -> impl Iterator> + '_ { self.graph.edge_references() } @@ -723,6 +666,7 @@ impl SPDG { self.graph.node_identifiers().map(Into::into) } + /// Dump this graph in dot format. pub fn dump_dot(&self, mut out: impl std::io::Write) -> std::io::Result<()> { use petgraph::dot::Dot; let dot = Dot::with_config(&self.graph, &[]); @@ -740,6 +684,7 @@ pub struct DisplayNode<'a> { } impl<'a> DisplayNode<'a> { + /// Render the node in extended format pub fn pretty(node: NodeIndex, graph: &'a SPDG) -> Self { Self { node, @@ -748,6 +693,7 @@ impl<'a> DisplayNode<'a> { } } + /// Render the node in simple format pub fn simple(node: NodeIndex, graph: &'a SPDG) -> Self { Self { node, diff --git a/crates/paralegal-spdg/src/tiny_bitset.rs b/crates/paralegal-spdg/src/tiny_bitset.rs index 503e1dca88..8fd3b2eca2 100644 --- a/crates/paralegal-spdg/src/tiny_bitset.rs +++ b/crates/paralegal-spdg/src/tiny_bitset.rs @@ -161,9 +161,12 @@ impl Display for DisplayTinyBitSet { } } +/// Serialization that is readable. Serializes the set as a list of integers +/// (that are set to one). pub mod pretty { use super::TinyBitSet; + /// See [module level documentation][self] pub fn deserialize<'de, D>(deserializer: D) -> Result where D: serde::Deserializer<'de>, @@ -172,6 +175,7 @@ pub mod pretty { .map(|v| v.into_iter().collect()) } + /// See [module level documentation][self] pub fn serialize(set: &TinyBitSet, serializer: S) -> Result where S: serde::Serializer, diff --git a/crates/paralegal-spdg/src/traverse.rs b/crates/paralegal-spdg/src/traverse.rs index 5b0eb6c515..bdac5ab82a 100644 --- a/crates/paralegal-spdg/src/traverse.rs +++ b/crates/paralegal-spdg/src/traverse.rs @@ -1,3 +1,5 @@ +//! Utilities for traversing an SPDG + use std::collections::HashSet; use petgraph::visit::{Control, Data, DfsEvent, EdgeFiltered, EdgeRef, IntoEdgeReferences}; @@ -6,21 +8,28 @@ use crate::{EdgeInfo, EdgeKind, Node}; use super::SPDG; +/// Which type of edges should be considered for a given traversal #[derive(Clone, Copy, Eq, PartialEq, strum::EnumIs)] pub enum EdgeSelection { + /// Consider only edges with [`crate::EdgeKind::Data`] Data, + /// Consider only edges with [`crate::EdgeKind::Control`] Control, + /// Consider both data and control flow edges in any combination Both, } impl EdgeSelection { + /// Does this selection admit edges of type [`crate::EdgeKind::Control`] pub fn use_control(self) -> bool { matches!(self, EdgeSelection::Control | EdgeSelection::Both) } + /// Does this selection admit edges of type [`crate::EdgeKind::Data`] pub fn use_data(self) -> bool { matches!(self, EdgeSelection::Data | EdgeSelection::Both) } + /// Is this edge kind admissible? pub fn conforms(self, kind: EdgeKind) -> bool { matches!( (self, kind), @@ -30,6 +39,7 @@ impl EdgeSelection { ) } + /// Create a graph adaptor that implements this edge selection. pub fn filter_graph>( self, g: G, @@ -52,6 +62,8 @@ impl EdgeSelection { } } +/// A primitive that queries whether we can reach from one set of nodes to +/// another pub fn generic_flows_to( from: impl IntoIterator, edge_selection: EdgeSelection, diff --git a/crates/paralegal-spdg/src/utils.rs b/crates/paralegal-spdg/src/utils.rs index a0c0115232..3a2fdff055 100644 --- a/crates/paralegal-spdg/src/utils.rs +++ b/crates/paralegal-spdg/src/utils.rs @@ -1,6 +1,10 @@ +//! Utility functions and structs + use std::fmt; use std::fmt::{Display, Formatter, Write}; +/// Write all elements from `it` into the formatter `fmt` using `f`, separating +/// them with `sep` pub fn write_sep< E, I: IntoIterator, @@ -23,10 +27,15 @@ pub fn write_sep< Ok(()) } +/// Has a [`Display`] implementation if the elements of the iterator inside have +/// one. This will render them surrounded by `[` brackets and separated by `, ` +/// comma and space +#[derive(Clone)] pub struct DisplayList { iter: I, } +/// Display this iterator as a list pub fn display_list(iter: I) -> DisplayList { DisplayList { iter } }