Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Laziness for Markers #89

Merged
merged 4 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 25 additions & 17 deletions crates/paralegal-flow/src/ana/mod.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
//! Data-and control flow analyzer and inliner.
//!
//!
//! Analysis starts with the construction of [`SPDGGenerator`] from a
//! [`CollectingVisitor`](crate::discover::CollectingVisitor) and then calling
//! [`analyze`](SPDGGenerator::analyze).

use std::borrow::Cow;

use crate::{
dbg, desc::*, ir::*, rust::*, utils::*, Either, HashMap, HashSet, LogLevelConfig, Symbol,
dbg, desc::*, ir::*, rust::*, utils::*, Either, HashMap, HashSet, LogLevelConfig, MarkerCtx,
Symbol,
};

use hir::def_id::DefId;
use mir::Location;

use anyhow::Result;

use super::discover::{CallSiteAnnotations, CollectingVisitor, FnToAnalyze};
use super::discover::{CallSiteAnnotations, FnToAnalyze};

pub mod algebra;
pub mod df;
Expand All @@ -23,15 +26,22 @@ pub mod non_transitive_aliases;
pub type SerializableInlinedGraph<L> =
petgraph::graphmap::GraphMap<regal::SimpleLocation<L>, inline::Edge, petgraph::Directed>;

impl<'tcx> CollectingVisitor<'tcx> {
/// Driver function. Performs the data collection via visit, then calls
/// [`Self::analyze`] to construct the Forge friendly description of all
/// endpoints.
pub fn run(mut self) -> Result<ProgramDescription> {
let tcx = self.tcx;
tcx.hir().visit_all_item_likes_in_crate(&mut self);
//println!("{:?}\n{:?}\n{:?}", self.marked_sinks, self.marked_sources, self.functions_to_analyze);
self.analyze()
/// Read-only database of information the analysis needs.
///
/// [`Self::analyze`] serves as the main entrypoint to SPDG generation.
pub struct SPDGGenerator<'tcx> {
JustusAdam marked this conversation as resolved.
Show resolved Hide resolved
pub marker_ctx: MarkerCtx<'tcx>,
pub opts: &'static crate::Args,
pub tcx: TyCtxt<'tcx>,
}

impl<'tcx> SPDGGenerator<'tcx> {
pub fn new(marker_ctx: MarkerCtx<'tcx>, opts: &'static crate::Args, tcx: TyCtxt<'tcx>) -> Self {
Self {
marker_ctx,
opts,
tcx,
}
}

/// Perform the analysis for one `#[paralegal_flow::analyze]` annotated function and
Expand All @@ -40,7 +50,7 @@ impl<'tcx> CollectingVisitor<'tcx> {
&self,
//_hash_verifications: &mut HashVerifications,
call_site_annotations: &mut CallSiteAnnotations,
target: FnToAnalyze,
target: &FnToAnalyze,
inliner: &inline::Inliner<'tcx>,
) -> anyhow::Result<(Endpoint, Ctrl)> {
let mut flows = Ctrl::default();
Expand Down Expand Up @@ -241,9 +251,7 @@ impl<'tcx> CollectingVisitor<'tcx> {
/// other setup necessary for the flow graph creation.
///
/// Should only be called after the visit.
fn analyze(mut self) -> Result<ProgramDescription> {
let mut targets = std::mem::take(&mut self.functions_to_analyze);

pub fn analyze(&self, targets: &[FnToAnalyze]) -> Result<ProgramDescription> {
if let LogLevelConfig::Targeted(s) = self.opts.debug() {
assert!(
targets.iter().any(|target| target.name().as_str() == s),
Expand All @@ -266,7 +274,7 @@ impl<'tcx> CollectingVisitor<'tcx> {
let mut call_site_annotations: CallSiteAnnotations = HashMap::new();
let result = //crate::sah::HashVerifications::with(|hash_verifications| {
targets
.drain(..)
.iter()
.map(|desc| {
let target_name = desc.name();
with_reset_level_if_target(self.opts, target_name, || {
Expand Down
9 changes: 0 additions & 9 deletions crates/paralegal-flow/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,11 +371,6 @@ impl ModelCtrl {
/// Arguments which control marker assignment and discovery
#[derive(serde::Serialize, serde::Deserialize, clap::Args)]
pub struct MarkerControl {
/// Eagerly load markers for local items. This guarantees that all markers
/// on local items are emitted in the model, even if those items are never
/// used in the flow.
#[clap(long, env = "PARALEGAL_EAGER_LOCAL_MARKERS")]
eager_local_markers: bool,
/// Don't mark the outputs of local functions if they are of a marked type.
///
/// Be aware that disabling this can cause unsoundness as inline
Expand All @@ -386,10 +381,6 @@ pub struct MarkerControl {
}

impl MarkerControl {
pub fn lazy_local_markers(&self) -> bool {
!self.eager_local_markers
}

pub fn local_function_type_marking(&self) -> bool {
!self.no_local_function_type_marking
}
Expand Down
46 changes: 31 additions & 15 deletions crates/paralegal-flow/src/discover.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! MIR visitor ([`CollectingVisitor`]) that discovers and stores all items of interest.
//! MIR visitor ([`CollectingVisitor`]) that populates the [`MarkerDatabase`]
//! and discovers functions marked for analysis.
//!
//! Items of interest is anything annotated with one of the `paralegal_flow::`
//! annotations.
use crate::{consts, desc::*, rust::*, utils::*, HashMap, MarkerCtx};
//! Essentially this discovers all local `paralegal_flow::*` annotations.
use crate::{
ana::SPDGGenerator, consts, desc::*, marker_db::MarkerDatabase, rust::*, utils::*, HashMap,
};

use hir::{
def_id::DefId,
Expand All @@ -12,6 +14,8 @@ use hir::{
use rustc_middle::hir::nested_filter::OnlyBodies;
use rustc_span::{symbol::Ident, Span, Symbol};

use anyhow::Result;

/// Values of this type can be matched against Rust attributes
pub type AttrMatchT = Vec<Symbol>;

Expand All @@ -21,7 +25,6 @@ pub type AttrMatchT = Vec<Symbol>;
/// the function `DefId`
pub type CallSiteAnnotations = HashMap<DefId, Vec<Annotation>>;

#[allow(clippy::type_complexity)]
/// This visitor traverses the items in the analyzed crate to discover
/// annotations and analysis targets and store them in this struct. After the
/// discovery phase [`Self::analyze`] is used to drive the
Expand All @@ -36,7 +39,7 @@ pub struct CollectingVisitor<'tcx> {
/// later perform the analysis
pub functions_to_analyze: Vec<FnToAnalyze>,

pub marker_ctx: MarkerCtx<'tcx>,
pub marker_ctx: MarkerDatabase<'tcx>,
}

/// A function we will be targeting to analyze with
Expand All @@ -54,19 +57,31 @@ impl FnToAnalyze {
}

impl<'tcx> CollectingVisitor<'tcx> {
pub(crate) fn new(
tcx: TyCtxt<'tcx>,
opts: &'static crate::Args,
marker_ctx: MarkerCtx<'tcx>,
) -> Self {
pub(crate) fn new(tcx: TyCtxt<'tcx>, opts: &'static crate::Args) -> Self {
Self {
tcx,
opts,
functions_to_analyze: vec![],
marker_ctx,
marker_ctx: MarkerDatabase::init(tcx, opts),
}
}

/// After running the discovery with `visit_all_item_likes_in_crate`, create
/// the read-only [`SPDGGenerator`] upon which the analysis will run.
fn into_generator(self) -> SPDGGenerator<'tcx> {
SPDGGenerator::new(self.marker_ctx.into(), self.opts, self.tcx)
}

/// Driver function. Performs the data collection via visit, then calls
/// [`Self::analyze`] to construct the Forge friendly description of all
/// endpoints.
pub fn run(mut self) -> Result<ProgramDescription> {
let tcx = self.tcx;
tcx.hir().visit_all_item_likes_in_crate(&mut self);
let targets = std::mem::take(&mut self.functions_to_analyze);
self.into_generator().analyze(&targets)
}

/// Does the function named by this id have the `paralegal_flow::analyze` annotation
fn should_analyze_function(&self, ident: LocalDefId) -> bool {
self.tcx
Expand All @@ -84,10 +99,11 @@ impl<'tcx> intravisit::Visitor<'tcx> for CollectingVisitor<'tcx> {
self.tcx.hir()
}

/// Check if this id has annotations and if so register them in the marker database.
fn visit_id(&mut self, hir_id: rustc_hir::HirId) {
if !self.opts.marker_control().lazy_local_markers()
&& let Some(owner_id) = hir_id.as_owner() {
self.marker_ctx.is_locally_marked(owner_id.def_id);
if let Some(owner_id) = hir_id.as_owner() {
self.marker_ctx
.retrieve_local_annotations_for(owner_id.def_id);
}
}

Expand Down
3 changes: 1 addition & 2 deletions crates/paralegal-flow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ impl rustc_driver::Callbacks for Callbacks {
.global_ctxt()
.unwrap()
.enter(|tcx| {
let marker_ctx = MarkerCtx::new(tcx, self.opts);
let desc = discover::CollectingVisitor::new(tcx, self.opts, marker_ctx).run()?;
let desc = discover::CollectingVisitor::new(tcx, self.opts).run()?;
if self.opts.dbg().dump_serialized_flow_graph() {
serde_json::to_writer(
&mut std::fs::OpenOptions::new()
Expand Down
102 changes: 51 additions & 51 deletions crates/paralegal-flow/src/marker_db.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
//! Central repository for information about markers (and annotations).
//!
//! The database ([`MarkerDatabase`]) is initialized with
//! ([`init`](`MarkerDatabase::init`)) and populated with local markers by
//! [`CollectingVisitor`](crate::discover::CollectingVisitor) by calls to
//! ([`retrieve_local_annotations_for`](MarkerDatabase::retrieve_local_annotations_for)).
//! Then it's transformed into a read-only [`MarkerCtx`] via [`From::from`]. The
//! [`MarkerCtx`] is a cheap pointer to the database and responsible for
//! answering queries about markers as the main analysis runs.
//!
//! All interactions happen through the central database object: [`MarkerCtx`].

use crate::{
Expand All @@ -13,7 +21,7 @@ use crate::{
},
DefId, HashMap, LocalDefId, TyCtxt,
};
use rustc_utils::cache::{Cache, CopyCache};
use rustc_utils::cache::CopyCache;

use std::rc::Rc;

Expand All @@ -32,14 +40,13 @@ type ExternalMarkers = HashMap<DefId, Vec<MarkerAnnotation>>;
#[derive(Clone)]
pub struct MarkerCtx<'tcx>(Rc<MarkerDatabase<'tcx>>);

impl<'tcx> MarkerCtx<'tcx> {
/// Constructs a new marker database.
///
/// This also loads any external annotations, as specified in the `args`.
pub fn new(tcx: TyCtxt<'tcx>, args: &'static Args) -> Self {
Self(Rc::new(MarkerDatabase::init(tcx, args)))
impl<'tcx> From<MarkerDatabase<'tcx>> for MarkerCtx<'tcx> {
fn from(value: MarkerDatabase<'tcx>) -> Self {
Self(Rc::new(value))
}
}

impl<'tcx> MarkerCtx<'tcx> {
#[inline]
fn tcx(&self) -> TyCtxt<'tcx> {
self.0.tcx
Expand All @@ -57,8 +64,7 @@ impl<'tcx> MarkerCtx<'tcx> {
pub fn local_annotations(&self, def_id: LocalDefId) -> &[Annotation] {
self.db()
.local_annotations
.get(def_id, |_| self.retrieve_local_annotations_for(def_id))
.as_ref()
.get(&def_id)
.map_or(&[], |o| o.as_slice())
}

Expand Down Expand Up @@ -113,9 +119,8 @@ impl<'tcx> MarkerCtx<'tcx> {
pub fn local_annotations_found(&self) -> Vec<(LocalDefId, &[Annotation])> {
self.db()
.local_annotations
.items()
.into_iter()
.filter_map(|(k, v)| Some((k, (v.as_ref()?.as_slice()))))
.iter()
.map(|(k, v)| (*k, (v.as_slice())))
.collect()
}

Expand Down Expand Up @@ -188,35 +193,6 @@ impl<'tcx> MarkerCtx<'tcx> {
false
}

/// Retrieve and parse the local annotations for this item.
fn retrieve_local_annotations_for(&self, def_id: LocalDefId) -> LocalAnnotations {
let tcx = self.tcx();
let hir = tcx.hir();
let id = def_id.force_into_hir_id(tcx);
let mut sink_matches = vec![];
for a in hir.attrs(id) {
if let Some(i) = a.match_get_ref(&consts::MARKER_MARKER) {
sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i)));
} else if let Some(i) = a.match_get_ref(&consts::LABEL_MARKER) {
warn!("The `paralegal_flow::label` annotation is deprecated, use `paralegal_flow::marker` instead");
sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i)))
} else if let Some(i) = a.match_get_ref(&consts::OTYPE_MARKER) {
sink_matches.extend(
crate::ann_parse::otype_ann_match(i, tcx)
.into_iter()
.map(Annotation::OType),
);
} else if let Some(i) = a.match_get_ref(&consts::EXCEPTION_MARKER) {
sink_matches.push(Annotation::Exception(crate::ann_parse::match_exception(i)));
}
}
if sink_matches.is_empty() {
return None;
}

Some(Box::new(sink_matches))
}

/// All the markers applied to this type and its subtypes.
///
/// Returns `(ann, (ty, did))` tuples which are the marker annotation `ann`,
Expand Down Expand Up @@ -272,18 +248,12 @@ impl<'tcx> MarkerCtx<'tcx> {
)
}
}

/// We expect most local items won't have annotations. This structure is much
/// smaller (8 bytes) than without the `Box` (24 Bytes).
#[allow(clippy::box_collection)]
type LocalAnnotations = Option<Box<Vec<Annotation>>>;

/// The structure inside of [`MarkerCtx`].
struct MarkerDatabase<'tcx> {
pub struct MarkerDatabase<'tcx> {
tcx: TyCtxt<'tcx>,
/// Cache for parsed local annotations. They are created with
/// [`MarkerCtx::retrieve_local_annotations_for`].
local_annotations: Cache<LocalDefId, LocalAnnotations>,
local_annotations: HashMap<LocalDefId, Vec<Annotation>>,
external_annotations: ExternalMarkers,
/// Cache whether markers are reachable transitively.
marker_reachable_cache: CopyCache<DefId, bool>,
Expand All @@ -293,15 +263,45 @@ struct MarkerDatabase<'tcx> {

impl<'tcx> MarkerDatabase<'tcx> {
/// Construct a new database, loading external markers.
fn init(tcx: TyCtxt<'tcx>, args: &'static Args) -> Self {
pub fn init(tcx: TyCtxt<'tcx>, args: &'static Args) -> Self {
Self {
tcx,
local_annotations: Cache::default(),
local_annotations: HashMap::default(),
external_annotations: resolve_external_markers(args, tcx),
marker_reachable_cache: Default::default(),
config: args.marker_control(),
}
}

/// Retrieve and parse the local annotations for this item.
pub fn retrieve_local_annotations_for(&mut self, def_id: LocalDefId) {
let tcx = self.tcx;
let hir = tcx.hir();
let id = def_id.force_into_hir_id(tcx);
let mut sink_matches = vec![];
for a in hir.attrs(id) {
if let Some(i) = a.match_get_ref(&consts::MARKER_MARKER) {
sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i)));
} else if let Some(i) = a.match_get_ref(&consts::LABEL_MARKER) {
warn!("The `paralegal_flow::label` annotation is deprecated, use `paralegal_flow::marker` instead");
sink_matches.push(Annotation::Marker(crate::ann_parse::ann_match_fn(i)))
} else if let Some(i) = a.match_get_ref(&consts::OTYPE_MARKER) {
sink_matches.extend(
crate::ann_parse::otype_ann_match(i, tcx)
.into_iter()
.map(Annotation::OType),
);
} else if let Some(i) = a.match_get_ref(&consts::EXCEPTION_MARKER) {
sink_matches.push(Annotation::Exception(crate::ann_parse::match_exception(i)));
}
}
if !sink_matches.is_empty() {
assert!(self
.local_annotations
.insert(def_id, sink_matches)
.is_none());
}
}
}

type RawExternalMarkers = HashMap<String, Vec<crate::desc::MarkerAnnotation>>;
Expand Down
7 changes: 2 additions & 5 deletions guide/deletion-policy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,8 @@ fn dummy_policy(ctx: Arc<Context>) -> Result<()> {
fn main() -> Result<()> {
let dir = "../file-db-example/";
let mut cmd = paralegal_policy::SPDGGenCommand::global();
cmd.get_command().args([
"--external-annotations",
"external-annotations.toml",
"--eager-local-markers",
]);
cmd.get_command()
.args(["--external-annotations", "external-annotations.toml"]);
cmd.run(dir)?.with_context(dummy_policy)?;
println!("Policy successful");
Ok(())
Expand Down
Loading