diff --git a/.gitignore b/.gitignore
index 223c98004c..50e88d317c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,9 @@ tests/xilinx/cocotb/**/hdl
 sim_build/
 results.xml
 
+# Ignore .fud2 cache
+.fud2/
+
 
 !cider-dap/calyxDebug/package.json
 !cider-dap/calyxDebug/tsconfig.json
diff --git a/Cargo.lock b/Cargo.lock
index 17bf05397b..e190670591 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -49,6 +49,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anyhow"
+version = "1.0.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
+
 [[package]]
 name = "argh"
 version = "0.1.12"
@@ -86,6 +92,15 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
 
+[[package]]
+name = "atomic"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
+dependencies = [
+ "bytemuck",
+]
+
 [[package]]
 name = "atty"
 version = "0.2.14"
@@ -169,6 +184,12 @@ version = "3.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
 
+[[package]]
+name = "bytemuck"
+version = "1.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9"
+
 [[package]]
 name = "byteorder"
 version = "1.4.3"
@@ -300,6 +321,12 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "camino"
+version = "1.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c"
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -396,6 +423,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "cranelift-entity"
+version = "0.103.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44e3ee532fc4776c69bcedf7e62f9632cbb3f35776fa9a525cdade3195baa3f7"
+
 [[package]]
 name = "criterion"
 version = "0.3.6"
@@ -701,6 +734,21 @@ dependencies = [
  "str-buf",
 ]
 
+[[package]]
+name = "fake"
+version = "0.6.1"
+dependencies = [
+ "anyhow",
+ "argh",
+ "camino",
+ "cranelift-entity",
+ "env_logger",
+ "figment",
+ "log",
+ "pathdiff",
+ "serde",
+]
+
 [[package]]
 name = "fastrand"
 version = "2.0.0"
@@ -718,6 +766,19 @@ dependencies = [
  "windows-sys",
 ]
 
+[[package]]
+name = "figment"
+version = "0.10.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b6e5bc7bd59d60d0d45a6ccab6cf0f4ce28698fb4e81e750ddf229c9b824026"
+dependencies = [
+ "atomic",
+ "serde",
+ "toml",
+ "uncased",
+ "version_check 0.9.4",
+]
+
 [[package]]
 name = "fixedbitset"
 version = "0.4.2"
@@ -742,6 +803,14 @@ dependencies = [
  "serde_derive",
 ]
 
+[[package]]
+name = "fud2"
+version = "0.6.1"
+dependencies = [
+ "anyhow",
+ "fake",
+]
+
 [[package]]
 name = "funty"
 version = "2.0.0"
@@ -1155,6 +1224,15 @@ version = "3.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
 
+[[package]]
+name = "pathdiff"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
+dependencies = [
+ "camino",
+]
+
 [[package]]
 name = "pest"
 version = "2.7.2"
@@ -1598,6 +1676,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_spanned"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "serde_with"
 version = "1.14.0"
@@ -1905,6 +1992,40 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "toml"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1a195ec8c9da26928f773888e0742ca3ca1040c6cd859c919c9f59c1954ab35"
+dependencies = [
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_edit",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03"
+dependencies = [
+ "indexmap 2.0.0",
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "winnow",
+]
+
 [[package]]
 name = "typed-arena"
 version = "2.0.2"
@@ -1929,6 +2050,15 @@ version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
 
+[[package]]
+name = "uncased"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
+dependencies = [
+ "version_check 0.9.4",
+]
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.11"
@@ -2172,6 +2302,15 @@ version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
+[[package]]
+name = "winnow"
+version = "0.5.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1931d78a9c73861da0134f453bb1f790ce49b2e30eba8410b4b79bac72b46a2d"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "wyz"
 version = "0.5.1"
diff --git a/Cargo.toml b/Cargo.toml
index ca029447d2..189d19015a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,6 +11,8 @@ members = [
     "web/rust",
     "tools/data_gen",
     "cider-dap",
+    "fud2",
+    "fud2/fake",
 ]
 exclude = ["site"]
 
@@ -43,6 +45,7 @@ pest = "2"
 pest_derive = "2"
 pest_consume = "1"
 argh = "0.1"
+anyhow = "1"
 calyx-utils = { path = "calyx-utils", version = "0.6.1" }
 calyx-ir = { path = "calyx-ir", version = "0.6.1" }
 calyx-frontend = { path = "calyx-frontend", version = "0.6.1" }
@@ -54,6 +57,12 @@ version = "0.6"
 default-features = false
 features = ["matrix_graph"]
 
+[workspace.dependencies.env_logger]
+version = "0.9.0"
+features = ["termcolor", "atty"]
+default-features = false
+
+
 # =========== Package configuration ===========
 
 [package]
@@ -89,6 +98,7 @@ itertools.workspace = true
 log.workspace = true
 serde.workspace = true
 argh.workspace = true
+env_logger.workspace = true
 
 calyx-utils.workspace = true
 calyx-ir.workspace = true
@@ -99,11 +109,6 @@ calyx-opt.workspace = true
 workspace = true
 features = ["mlir", "resources", "xilinx"]
 
-[dependencies.env_logger]
-version = "0.9.0"
-features = ["termcolor", "atty"]
-default-features = false
-
 [profile.release]
 lto = "thin"
 
diff --git a/calyx-backend/src/firrtl.rs b/calyx-backend/src/firrtl.rs
index cc03abec2e..dabccc39f6 100644
--- a/calyx-backend/src/firrtl.rs
+++ b/calyx-backend/src/firrtl.rs
@@ -36,30 +36,8 @@ impl Backend for FirrtlBackend {
     fn emit(ctx: &ir::Context, file: &mut OutputFile) -> CalyxResult<()> {
         let out = &mut file.get_write();
         writeln!(out, "circuit {}:", ctx.entrypoint)?;
-        // Pass to output any necessary extmodule statements (for primitive calls)
-        let mut extmodule_set: HashSet<String> = HashSet::new();
-        for comp in &ctx.components {
-            for cell in comp.cells.iter() {
-                let cell_borrowed = cell.as_ref().borrow();
-                if let ir::CellType::Primitive {
-                    name,
-                    param_binding,
-                    ..
-                } = &cell_borrowed.prototype
-                {
-                    let curr_module_name =
-                        get_primitive_module_name(name, param_binding);
-                    if extmodule_set.insert(curr_module_name.clone()) {
-                        emit_primitive_extmodule(
-                            cell.borrow().ports(),
-                            &curr_module_name,
-                            name,
-                            param_binding,
-                            out,
-                        )?;
-                    }
-                };
-            }
+        if ctx.bc.emit_primitive_extmodules {
+            emit_extmodules(ctx, out)?;
         }
         for comp in ctx.components.iter() {
             emit_component(comp, out)?
@@ -68,6 +46,37 @@ impl Backend for FirrtlBackend {
     }
 }
 
+fn emit_extmodules<F: io::Write>(
+    ctx: &ir::Context,
+    out: &mut F,
+) -> Result<(), calyx_utils::Error> {
+    let mut extmodule_set: HashSet<String> = HashSet::new();
+    for comp in &ctx.components {
+        for cell in comp.cells.iter() {
+            let cell_borrowed = cell.as_ref().borrow();
+            if let ir::CellType::Primitive {
+                name,
+                param_binding,
+                ..
+            } = &cell_borrowed.prototype
+            {
+                let curr_module_name =
+                    get_primitive_module_name(name, param_binding);
+                if extmodule_set.insert(curr_module_name.clone()) {
+                    emit_primitive_extmodule(
+                        cell.borrow().ports(),
+                        &curr_module_name,
+                        name,
+                        param_binding,
+                        out,
+                    )?;
+                }
+            };
+        }
+    }
+    Ok(())
+}
+
 // TODO: Ask about the other backend configurations in verilog.rs and see if I need any of it
 fn emit_component<F: io::Write>(
     comp: &ir::Component,
diff --git a/calyx-frontend/src/common.rs b/calyx-frontend/src/common.rs
index 78dbe363a5..5f595459f0 100644
--- a/calyx-frontend/src/common.rs
+++ b/calyx-frontend/src/common.rs
@@ -45,7 +45,7 @@ impl Primitive {
     ) -> CalyxResult<(SmallVec<[(Id, u64); 5]>, Vec<PortDef<u64>>)> {
         if self.params.len() != parameters.len() {
             let msg = format!(
-               "Invalid parameter binding for primitive `{}`. Requires {} parameters but provided with {}.",
+               "primitive `{}` requires {} parameters but instantiation provides {} parameters",
                self.name.clone(),
                self.params.len(),
                parameters.len(),
diff --git a/calyx-ir/src/builder.rs b/calyx-ir/src/builder.rs
index 547533e538..bb65e6513b 100644
--- a/calyx-ir/src/builder.rs
+++ b/calyx-ir/src/builder.rs
@@ -2,6 +2,7 @@
 //! representation.
 use crate::{self as ir, LibrarySignatures, Nothing, RRC, WRC};
 use calyx_frontend::BoolAttr;
+use calyx_utils::CalyxResult;
 use std::{cmp, rc::Rc};
 
 use super::{CellType, PortDef};
@@ -213,15 +214,28 @@ impl<'a> Builder<'a> {
         primitive: Prim,
         param_values: &[u64],
     ) -> RRC<ir::Cell>
+    where
+        Pre: Into<ir::Id> + ToString + Clone,
+        Prim: Into<ir::Id>,
+    {
+        self.try_add_primitive(prefix, primitive, param_values)
+            .expect("failed to add primitive:")
+    }
+
+    /// Result variant of [[ir::Builder::add_primitive()]].
+    pub fn try_add_primitive<Pre, Prim>(
+        &mut self,
+        prefix: Pre,
+        primitive: Prim,
+        param_values: &[u64],
+    ) -> CalyxResult<RRC<ir::Cell>>
     where
         Pre: Into<ir::Id> + ToString + Clone,
         Prim: Into<ir::Id>,
     {
         let prim_id = primitive.into();
         let prim = &self.lib.get_primitive(prim_id);
-        let (param_binding, ports) = prim
-            .resolve(param_values)
-            .expect("Failed to add primitive.");
+        let (param_binding, ports) = prim.resolve(param_values)?;
 
         let name = self.component.generate_name(prefix);
         let cell = Self::cell_from_signature(
@@ -238,7 +252,7 @@ impl<'a> Builder<'a> {
             cell.borrow_mut().add_attribute(BoolAttr::Generated, 1);
         }
         self.component.cells.add(Rc::clone(&cell));
-        cell
+        Ok(cell)
     }
 
     /// Add a component instance to this component using its name and port
diff --git a/calyx-ir/src/context.rs b/calyx-ir/src/context.rs
index 13df7eac8c..cc00e1b86c 100644
--- a/calyx-ir/src/context.rs
+++ b/calyx-ir/src/context.rs
@@ -13,6 +13,9 @@ pub struct BackendConf {
     pub enable_verification: bool,
     /// Use flat (ANF) assignments for guards instead of deep expression trees.
     pub flat_assign: bool,
+    /// [FIRRTL backend only] Emit extmodule declarations for primtives
+    /// for use with SystemVerilog implementations
+    pub emit_primitive_extmodules: bool,
 }
 
 /// The IR Context that represents an entire Calyx program with all of its
diff --git a/calyx-ir/src/from_ast.rs b/calyx-ir/src/from_ast.rs
index 619c5ab318..7479045c55 100644
--- a/calyx-ir/src/from_ast.rs
+++ b/calyx-ir/src/from_ast.rs
@@ -269,7 +269,7 @@ fn build_component(
     // required information.
     comp.cells
         .into_iter()
-        .for_each(|cell| add_cell(cell, sig_ctx, &mut builder));
+        .try_for_each(|cell| add_cell(cell, sig_ctx, &mut builder))?;
 
     comp.groups
         .into_iter()
@@ -301,15 +301,17 @@ fn build_component(
 
 ///////////////// Cell Construction /////////////////////////
 
-fn add_cell(cell: ast::Cell, sig_ctx: &SigCtx, builder: &mut Builder) {
+fn add_cell(
+    cell: ast::Cell,
+    sig_ctx: &SigCtx,
+    builder: &mut Builder,
+) -> CalyxResult<()> {
     let proto_name = cell.prototype.name;
 
     let res = if sig_ctx.lib.find_primitive(proto_name).is_some() {
-        let c = builder.add_primitive(
-            cell.name,
-            proto_name,
-            &cell.prototype.params,
-        );
+        let c = builder
+            .try_add_primitive(cell.name, proto_name, &cell.prototype.params)
+            .map_err(|e| e.with_pos(&cell.attributes))?;
         c.borrow_mut().set_reference(cell.reference);
         c
     } else {
@@ -328,6 +330,8 @@ fn add_cell(cell: ast::Cell, sig_ctx: &SigCtx, builder: &mut Builder) {
 
     // Add attributes to the built cell
     res.borrow_mut().attributes = cell.attributes;
+
+    Ok(())
 }
 
 ///////////////// Group Construction /////////////////////////
diff --git a/calyx-opt/src/analysis/compute_static.rs b/calyx-opt/src/analysis/compute_static.rs
index 67102da2a5..0eb965e49f 100644
--- a/calyx-opt/src/analysis/compute_static.rs
+++ b/calyx-opt/src/analysis/compute_static.rs
@@ -24,7 +24,8 @@ where
     /// **Ensures**: All sub-programs of the type will also be updated.
     fn update_static(&mut self, extra: &Self::Info) -> Option<u64> {
         if let Some(time) = self.compute_static(extra) {
-            self.get_mut_attributes().insert(ir::NumAttr::Static, time);
+            self.get_mut_attributes()
+                .insert(ir::NumAttr::PromoteStatic, time);
             Some(time)
         } else {
             None
@@ -56,30 +57,24 @@ impl WithStatic for ir::Control {
     }
 }
 
-impl WithStatic for ir::StaticEnable {
-    type Info = ();
-    fn compute_static(&mut self, _: &Self::Info) -> Option<u64> {
-        // Attempt to get the latency from the attribute on the enable first, or
-        // failing that, from the group.
-        Some(self.group.borrow().get_latency())
-    }
-}
-
 impl WithStatic for ir::Enable {
     type Info = ();
     fn compute_static(&mut self, _: &Self::Info) -> Option<u64> {
         // Attempt to get the latency from the attribute on the enable first, or
         // failing that, from the group.
-        self.attributes
-            .get(ir::NumAttr::Static)
-            .or_else(|| self.group.borrow().attributes.get(ir::NumAttr::Static))
+        self.attributes.get(ir::NumAttr::PromoteStatic).or_else(|| {
+            self.group
+                .borrow()
+                .attributes
+                .get(ir::NumAttr::PromoteStatic)
+        })
     }
 }
 
 impl WithStatic for ir::Invoke {
     type Info = CompTime;
     fn compute_static(&mut self, extra: &Self::Info) -> Option<u64> {
-        self.attributes.get(ir::NumAttr::Static).or_else(|| {
+        self.attributes.get(ir::NumAttr::PromoteStatic).or_else(|| {
             let comp = self.comp.borrow().type_name()?;
             extra.get(&comp).cloned()
         })
@@ -89,36 +84,47 @@ impl WithStatic for ir::Invoke {
 impl WithStatic for ir::Seq {
     type Info = CompTime;
     fn compute_static(&mut self, extra: &Self::Info) -> Option<u64> {
-        let mut sum = 0;
-        for stmt in &mut self.stmts {
-            sum += stmt.update_static(extra)?;
-        }
-        Some(sum)
+        // Go through each stmt in the seq, and try to calculate the latency.
+        self.stmts.iter_mut().fold(Some(0), |acc, stmt| {
+            match (acc, stmt.update_static(extra)) {
+                (Some(cur_latency), Some(stmt_latency)) => {
+                    Some(cur_latency + stmt_latency)
+                }
+                (_, _) => None,
+            }
+        })
     }
 }
 
 impl WithStatic for ir::Par {
     type Info = CompTime;
     fn compute_static(&mut self, extra: &Self::Info) -> Option<u64> {
-        let mut max = 0;
-        for stmt in &mut self.stmts {
-            max = std::cmp::max(max, stmt.update_static(extra)?);
-        }
-        Some(max)
+        // Go through each stmt in the par, and try to calculate the latency.
+        self.stmts.iter_mut().fold(Some(0), |acc, stmt| {
+            match (acc, stmt.update_static(extra)) {
+                (Some(cur_latency), Some(stmt_latency)) => {
+                    Some(std::cmp::max(cur_latency, stmt_latency))
+                }
+                (_, _) => None,
+            }
+        })
     }
 }
 
 impl WithStatic for ir::If {
     type Info = CompTime;
     fn compute_static(&mut self, extra: &Self::Info) -> Option<u64> {
-        let t = self.tbranch.update_static(extra)?;
-        let f = self.fbranch.update_static(extra)?;
         // Cannot compute latency information for `if`-`with`
+        let t_latency = self.tbranch.update_static(extra);
+        let f_latency = self.fbranch.update_static(extra);
         if self.cond.is_some() {
             log::debug!("Cannot compute latency for while-with");
             return None;
         }
-        Some(std::cmp::max(t, f))
+        match (t_latency, f_latency) {
+            (Some(t), Some(f)) => Some(std::cmp::max(t, f)),
+            (_, _) => None,
+        }
     }
 }
 
diff --git a/calyx-opt/src/analysis/inference_analysis.rs b/calyx-opt/src/analysis/inference_analysis.rs
new file mode 100644
index 0000000000..a7704946c4
--- /dev/null
+++ b/calyx-opt/src/analysis/inference_analysis.rs
@@ -0,0 +1,527 @@
+use crate::analysis::{
+    compute_static::WithStatic, GraphAnalysis, ReadWriteSet,
+};
+use calyx_ir::{self as ir, GetAttributes, RRC};
+use ir::CellType;
+use itertools::Itertools;
+use std::collections::{HashMap, HashSet};
+
+/// Struct to store information about the go-done interfaces defined by a primitive.
+/// There is no default implementation because it will almost certainly be very
+/// unhelpful: you will want to use `from_ctx`.
+#[derive(Debug)]
+pub struct GoDone {
+    ports: Vec<(ir::Id, ir::Id, u64)>,
+}
+
+impl GoDone {
+    pub fn new(ports: Vec<(ir::Id, ir::Id, u64)>) -> Self {
+        Self { ports }
+    }
+
+    /// Returns true if this is @go port
+    pub fn is_go(&self, name: &ir::Id) -> bool {
+        self.ports.iter().any(|(go, _, _)| name == go)
+    }
+
+    /// Returns true if this is a @done port
+    pub fn is_done(&self, name: &ir::Id) -> bool {
+        self.ports.iter().any(|(_, done, _)| name == done)
+    }
+
+    /// Returns the latency associated with the provided @go port if present
+    pub fn get_latency(&self, go_port: &ir::Id) -> Option<u64> {
+        self.ports.iter().find_map(|(go, _, lat)| {
+            if go == go_port {
+                Some(*lat)
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Iterate over the defined ports
+    pub fn iter(&self) -> impl Iterator<Item = &(ir::Id, ir::Id, u64)> {
+        self.ports.iter()
+    }
+
+    /// Iterate over the defined ports
+    pub fn is_empty(&self) -> bool {
+        self.ports.is_empty()
+    }
+
+    /// Iterate over the defined ports
+    pub fn len(&self) -> usize {
+        self.ports.len()
+    }
+
+    /// Iterate over the defined ports
+    pub fn get_ports(&self) -> &Vec<(ir::Id, ir::Id, u64)> {
+        &self.ports
+    }
+}
+
+impl From<&ir::Primitive> for GoDone {
+    fn from(prim: &ir::Primitive) -> Self {
+        let done_ports: HashMap<_, _> = prim
+            .find_all_with_attr(ir::NumAttr::Done)
+            .map(|pd| (pd.attributes.get(ir::NumAttr::Done), pd.name()))
+            .collect();
+
+        let go_ports = prim
+            .find_all_with_attr(ir::NumAttr::Go)
+            .filter_map(|pd| {
+                pd.attributes.get(ir::NumAttr::Static).and_then(|st| {
+                    done_ports
+                        .get(&pd.attributes.get(ir::NumAttr::Go))
+                        .map(|done_port| (pd.name(), *done_port, st))
+                })
+            })
+            .collect_vec();
+        GoDone::new(go_ports)
+    }
+}
+
+impl From<&ir::Cell> for GoDone {
+    fn from(cell: &ir::Cell) -> Self {
+        let done_ports: HashMap<_, _> = cell
+            .find_all_with_attr(ir::NumAttr::Done)
+            .map(|pr| {
+                let port = pr.borrow();
+                (port.attributes.get(ir::NumAttr::Done), port.name)
+            })
+            .collect();
+
+        let go_ports = cell
+            .find_all_with_attr(ir::NumAttr::Go)
+            .filter_map(|pr| {
+                let port = pr.borrow();
+                port.attributes.get(ir::NumAttr::Static).and_then(|st| {
+                    done_ports
+                        .get(&port.attributes.get(ir::NumAttr::Go))
+                        .map(|done_port| (port.name, *done_port, st))
+                })
+            })
+            .collect_vec();
+        GoDone::new(go_ports)
+    }
+}
+
+#[derive(Debug)]
+/// Default implemnetation is not provided, since it is almost certainly more helpful
+/// to use `from_ctx` instead.
+pub struct InferenceAnalysis {
+    /// component name -> vec<(go signal, done signal, latency)>
+    pub latency_data: HashMap<ir::Id, GoDone>,
+    /// Maps static component names to their latencies, but there can only
+    /// be one go port on the component. (This is a subset of the information
+    /// given by latency_data), and is helpful for inferring invokes.
+    /// Perhaps someday we should get rid of it and only make it one field.
+    pub static_component_latencies: HashMap<ir::Id, u64>,
+
+    updated_components: HashSet<ir::Id>,
+}
+
+impl InferenceAnalysis {
+    /// Builds FixUp struct from a ctx. Looks at all primitives and component
+    /// signatures to get latency information.
+    pub fn from_ctx(ctx: &ir::Context) -> Self {
+        let mut latency_data = HashMap::new();
+        let mut static_component_latencies = HashMap::new();
+        // Construct latency_data for each primitive
+        for prim in ctx.lib.signatures() {
+            let prim_go_done = GoDone::from(prim);
+            if prim_go_done.len() == 1 {
+                static_component_latencies
+                    .insert(prim.name, prim_go_done.get_ports()[0].2);
+            }
+            latency_data.insert(prim.name, GoDone::from(prim));
+        }
+        for comp in &ctx.components {
+            let comp_sig = comp.signature.borrow();
+
+            let done_ports: HashMap<_, _> = comp_sig
+                .find_all_with_attr(ir::NumAttr::Done)
+                .map(|pd| {
+                    let pd_ref = pd.borrow();
+                    (pd_ref.attributes.get(ir::NumAttr::Done), pd_ref.name)
+                })
+                .collect();
+
+            let go_ports = comp_sig
+                .find_all_with_attr(ir::NumAttr::Go)
+                .filter_map(|pd| {
+                    let pd_ref = pd.borrow();
+                    pd_ref.attributes.get(ir::NumAttr::Static).and_then(|st| {
+                        done_ports
+                            .get(&pd_ref.attributes.get(ir::NumAttr::Go))
+                            .map(|done_port| (pd_ref.name, *done_port, st))
+                    })
+                })
+                .collect_vec();
+
+            let go_done_comp = GoDone::new(go_ports);
+
+            if go_done_comp.len() == 1 {
+                static_component_latencies
+                    .insert(comp.name, go_done_comp.get_ports()[0].2);
+            }
+            latency_data.insert(comp.name, go_done_comp);
+        }
+        InferenceAnalysis {
+            latency_data,
+            static_component_latencies,
+            updated_components: HashSet::new(),
+        }
+    }
+
+    /// Updates the component, given a component name and a new latency and GoDone object.
+    pub fn add_component(
+        &mut self,
+        (comp_name, latency, go_done): (ir::Id, u64, GoDone),
+    ) {
+        self.latency_data.insert(comp_name, go_done);
+        self.static_component_latencies.insert(comp_name, latency);
+    }
+
+    /// Updates the component, given a component name and a new latency.
+    /// Note that this expects that the component already is accounted for
+    /// in self.latency_data and self.static_component_latencies.
+    pub fn remove_component(&mut self, comp_name: ir::Id) {
+        self.updated_components.insert(comp_name);
+        self.latency_data.remove(&comp_name);
+        self.static_component_latencies.remove(&comp_name);
+    }
+
+    /// Updates the component, given a component name and a new latency.
+    /// Note that this expects that the component already is accounted for
+    /// in self.latency_data and self.static_component_latencies.
+    pub fn adjust_component(
+        &mut self,
+        (comp_name, adjusted_latency): (ir::Id, u64),
+    ) {
+        self.updated_components.insert(comp_name);
+        self.latency_data.entry(comp_name).and_modify(|go_done| {
+            for (_, _, cur_latency) in &mut go_done.ports {
+                // Updating components with latency data.
+                *cur_latency = adjusted_latency;
+            }
+        });
+        self.static_component_latencies
+            .insert(comp_name, adjusted_latency);
+    }
+
+    /// Return true if the edge (`src`, `dst`) meet one these criteria, and false otherwise:
+    ///   - `src` is an "out" port of a constant, and `dst` is a "go" port
+    ///   - `src` is a "done" port, and `dst` is a "go" port
+    ///   - `src` is a "done" port, and `dst` is the "done" port of a group
+    fn mem_wrt_dep_graph(&self, src: &ir::Port, dst: &ir::Port) -> bool {
+        match (&src.parent, &dst.parent) {
+            (
+                ir::PortParent::Cell(src_cell_wrf),
+                ir::PortParent::Cell(dst_cell_wrf),
+            ) => {
+                let src_rf = src_cell_wrf.upgrade();
+                let src_cell = src_rf.borrow();
+                let dst_rf = dst_cell_wrf.upgrade();
+                let dst_cell = dst_rf.borrow();
+                if let (Some(s_name), Some(d_name)) =
+                    (src_cell.type_name(), dst_cell.type_name())
+                {
+                    let data_src = self.latency_data.get(&s_name);
+                    let data_dst = self.latency_data.get(&d_name);
+                    if let (Some(dst_ports), Some(src_ports)) =
+                        (data_dst, data_src)
+                    {
+                        return src_ports.is_done(&src.name)
+                            && dst_ports.is_go(&dst.name);
+                    }
+                }
+
+                // A constant writes to a cell: to be added to the graph, the cell needs to be a "done" port.
+                if let (Some(d_name), ir::CellType::Constant { .. }) =
+                    (dst_cell.type_name(), &src_cell.prototype)
+                {
+                    if let Some(ports) = self.latency_data.get(&d_name) {
+                        return ports.is_go(&dst.name);
+                    }
+                }
+
+                false
+            }
+
+            // Something is written to a group: to be added to the graph, this needs to be a "done" port.
+            (_, ir::PortParent::Group(_)) => dst.name == "done",
+
+            // If we encounter anything else, no need to add it to the graph.
+            _ => false,
+        }
+    }
+
+    /// Return a Vec of edges (`a`, `b`), where `a` is a "go" port and `b`
+    /// is a "done" port, and `a` and `b` have the same parent cell.
+    fn find_go_done_edges(
+        &self,
+        group: &ir::Group,
+    ) -> Vec<(RRC<ir::Port>, RRC<ir::Port>)> {
+        let rw_set = ReadWriteSet::uses(group.assignments.iter());
+        let mut go_done_edges: Vec<(RRC<ir::Port>, RRC<ir::Port>)> = Vec::new();
+
+        for cell_ref in rw_set {
+            let cell = cell_ref.borrow();
+            if let Some(ports) =
+                cell.type_name().and_then(|c| self.latency_data.get(&c))
+            {
+                go_done_edges.extend(
+                    ports
+                        .iter()
+                        .map(|(go, done, _)| (cell.get(go), cell.get(done))),
+                )
+            }
+        }
+        go_done_edges
+    }
+
+    /// Returns true if `port` is a "done" port, and we know the latency data
+    /// about `port`, or is a constant.
+    fn is_done_port_or_const(&self, port: &ir::Port) -> bool {
+        if let ir::PortParent::Cell(cwrf) = &port.parent {
+            let cr = cwrf.upgrade();
+            let cell = cr.borrow();
+            if let ir::CellType::Constant { val, .. } = &cell.prototype {
+                if *val > 0 {
+                    return true;
+                }
+            } else if let Some(ports) =
+                cell.type_name().and_then(|c| self.latency_data.get(&c))
+            {
+                return ports.is_done(&port.name);
+            }
+        }
+        false
+    }
+
+    /// Returns true if `graph` contains writes to "done" ports
+    /// that could have dynamic latencies, false otherwise.
+    fn contains_dyn_writes(&self, graph: &GraphAnalysis) -> bool {
+        for port in &graph.ports() {
+            match &port.borrow().parent {
+              ir::PortParent::Cell(cell_wrf) => {
+                  let cr = cell_wrf.upgrade();
+                  let cell = cr.borrow();
+                  if let Some(ports) =
+                      cell.type_name().and_then(|c| self.latency_data.get(&c))
+                  {
+                      let name = &port.borrow().name;
+                      if ports.is_go(name) {
+                          for write_port in graph.writes_to(&port.borrow()) {
+                              if !self
+                                  .is_done_port_or_const(&write_port.borrow())
+                              {
+                                  log::debug!(
+                                      "`{}` is not a done port",
+                                      write_port.borrow().canonical(),
+                                  );
+                                  return true;
+                              }
+                          }
+                      }
+                  }
+              }
+              ir::PortParent::Group(_) => {
+                  if port.borrow().name == "done" {
+                      for write_port in graph.writes_to(&port.borrow()) {
+                          if !self.is_done_port_or_const(&write_port.borrow())
+                          {
+                              log::debug!(
+                                  "`{}` is not a done port",
+                                  write_port.borrow().canonical(),
+                              );
+                              return true;
+                          }
+                      }
+                  }
+              }
+
+              ir::PortParent::StaticGroup(_) => // done ports of static groups should clearly NOT have static latencies
+              panic!("Have not decided how to handle static groups in infer-static-timing"),
+          }
+        }
+        false
+    }
+
+    /// Returns true if `graph` contains any nodes with degree > 1.
+    fn contains_node_deg_gt_one(graph: &GraphAnalysis) -> bool {
+        for port in graph.ports() {
+            if graph.writes_to(&port.borrow()).count() > 1 {
+                return true;
+            }
+        }
+        false
+    }
+
+    /// Attempts to infer the number of cycles starting when
+    /// `group[go]` is high, and port is high. If inference is
+    /// not possible, returns None.
+    fn infer_latency(&self, group: &ir::Group) -> Option<u64> {
+        // Creates a write dependency graph, which contains an edge (`a`, `b`) if:
+        //   - `a` is a "done" port, and writes to `b`, which is a "go" port
+        //   - `a` is a "done" port, and writes to `b`, which is the "done" port of this group
+        //   - `a` is an "out" port, and is a constant, and writes to `b`, a "go" port
+        //   - `a` is a "go" port, and `b` is a "done" port, and `a` and `b` share a parent cell
+        // Nodes that are not part of any edges that meet these criteria are excluded.
+        //
+        // For example, this group:
+        // ```
+        // group g1 {
+        //   a.in = 32'd1;
+        //   a.write_en = 1'd1;
+        //   g1[done] = a.done;
+        // }
+        // ```
+        // corresponds to this graph:
+        // ```
+        // constant(1) -> a.write_en
+        // a.write_en -> a.done
+        // a.done -> g1[done]
+        // ```
+        log::debug!("Checking group `{}`", group.name());
+        let graph_unprocessed = GraphAnalysis::from(group);
+        if self.contains_dyn_writes(&graph_unprocessed) {
+            log::debug!("FAIL: contains dynamic writes");
+            return None;
+        }
+
+        let go_done_edges = self.find_go_done_edges(group);
+        let graph = graph_unprocessed
+            .edge_induced_subgraph(|src, dst| self.mem_wrt_dep_graph(src, dst))
+            .add_edges(&go_done_edges)
+            .remove_isolated_vertices();
+
+        // Give up if a port has multiple writes to it.
+        if Self::contains_node_deg_gt_one(&graph) {
+            log::debug!("FAIL: Group contains multiple writes");
+            return None;
+        }
+
+        let mut tsort = graph.toposort();
+        let start = tsort.next()?;
+        let finish = tsort.last()?;
+
+        let paths = graph.paths(&start.borrow(), &finish.borrow());
+        // If there are no paths, give up.
+        if paths.is_empty() {
+            log::debug!("FAIL: No path between @go and @done port");
+            return None;
+        }
+        let first_path = paths.get(0).unwrap();
+
+        // Sum the latencies of each primitive along the path.
+        let mut latency_sum = 0;
+        for port in first_path {
+            if let ir::PortParent::Cell(cwrf) = &port.borrow().parent {
+                let cr = cwrf.upgrade();
+                let cell = cr.borrow();
+                if let Some(ports) =
+                    cell.type_name().and_then(|c| self.latency_data.get(&c))
+                {
+                    if let Some(latency) =
+                        ports.get_latency(&port.borrow().name)
+                    {
+                        latency_sum += latency;
+                    }
+                }
+            }
+        }
+
+        log::debug!("SUCCESS: Latency = {}", latency_sum);
+        Some(latency_sum)
+    }
+
+    /// Returns Some(latency) if a control statement has a latency, because
+    /// it is static or is has the @promotable attribute
+    pub fn get_possible_latency(c: &ir::Control) -> Option<u64> {
+        match c {
+            ir::Control::Static(sc) => Some(sc.get_latency()),
+            _ => c.get_attribute(ir::NumAttr::PromoteStatic),
+        }
+    }
+
+    /// Removes the @promotable attribute from the control program.
+    /// Recursively visits the children of the control.
+    pub fn remove_promotable_attribute(c: &mut ir::Control) {
+        c.get_mut_attributes().remove(ir::NumAttr::PromoteStatic);
+        match c {
+            ir::Control::Empty(_)
+            | ir::Control::Invoke(_)
+            | ir::Control::Enable(_)
+            | ir::Control::Static(_) => (),
+            ir::Control::While(ir::While { body, .. })
+            | ir::Control::Repeat(ir::Repeat { body, .. }) => {
+                Self::remove_promotable_attribute(body);
+            }
+            ir::Control::If(ir::If {
+                tbranch, fbranch, ..
+            }) => {
+                Self::remove_promotable_attribute(tbranch);
+                Self::remove_promotable_attribute(fbranch);
+            }
+            ir::Control::Seq(ir::Seq { stmts, .. })
+            | ir::Control::Par(ir::Par { stmts, .. }) => {
+                for stmt in stmts {
+                    Self::remove_promotable_attribute(stmt);
+                }
+            }
+        }
+    }
+
+    /// "Fixes Up" the component. In particular:
+    /// 1. Removes @promotable annotations for any groups that write to any
+    /// `updated_components`.
+    /// 2. Try to re-infer groups' latencies.
+    /// 3. Removes all @promotable annotation from the control program.
+    /// 4. Re-infers the @promotable annotations for any groups or control.
+    /// Note that this only fixes up the component's ``internals''.
+    /// It does *not* fix the component's signature.
+    pub fn fixup_timing(&self, comp: &mut ir::Component) {
+        // Removing @promotable annotations for any groups that write to an updated_component,
+        // then try to re-infer the latency.
+        for group in comp.groups.iter() {
+            // This checks any group that writes to the component:
+            // We can probably switch this to any group that writes to the component's
+            // `go` port to be more precise analysis.
+            if ReadWriteSet::write_set(group.borrow_mut().assignments.iter())
+                .any(|cell| match cell.borrow().prototype {
+                    CellType::Component { name } => {
+                        self.updated_components.contains(&name)
+                    }
+                    _ => false,
+                })
+            {
+                // Remove attribute from group.
+                group
+                    .borrow_mut()
+                    .attributes
+                    .remove(ir::NumAttr::PromoteStatic);
+            }
+        }
+
+        for group in &mut comp.groups.iter() {
+            // Immediately try to re-infer the latency of the group.
+            let latency_result = self.infer_latency(&group.borrow());
+            if let Some(latency) = latency_result {
+                group
+                    .borrow_mut()
+                    .attributes
+                    .insert(ir::NumAttr::PromoteStatic, latency);
+            }
+        }
+
+        // Removing @promotable annotations for the control flow, then trying
+        // to re-infer them.
+        Self::remove_promotable_attribute(&mut comp.control.borrow_mut());
+        comp.control
+            .borrow_mut()
+            .update_static(&self.static_component_latencies);
+    }
+}
diff --git a/calyx-opt/src/analysis/mod.rs b/calyx-opt/src/analysis/mod.rs
index 2c0afeb748..3e78bea5e4 100644
--- a/calyx-opt/src/analysis/mod.rs
+++ b/calyx-opt/src/analysis/mod.rs
@@ -11,6 +11,7 @@ mod dataflow_order;
 mod domination_analysis;
 mod graph;
 mod graph_coloring;
+mod inference_analysis;
 mod live_range_analysis;
 mod port_interface;
 pub mod reaching_defns;
@@ -29,6 +30,8 @@ pub use dataflow_order::DataflowOrder;
 pub use domination_analysis::DominatorMap;
 pub use graph::GraphAnalysis;
 pub use graph_coloring::GraphColoring;
+pub use inference_analysis::GoDone;
+pub use inference_analysis::InferenceAnalysis;
 pub use live_range_analysis::LiveRangeAnalysis;
 pub use port_interface::PortInterface;
 pub use read_write_set::ReadWriteSet;
diff --git a/calyx-opt/src/default_passes.rs b/calyx-opt/src/default_passes.rs
index 2528692c39..0f5fe52d79 100644
--- a/calyx-opt/src/default_passes.rs
+++ b/calyx-opt/src/default_passes.rs
@@ -8,9 +8,9 @@ use crate::passes::{
     Externalize, GoInsertion, GroupToInvoke, GroupToSeq, HoleInliner,
     InferShare, LowerGuards, MergeAssign, Papercut, ParToSeq,
     RegisterUnsharing, RemoveIds, ResetInsertion, ScheduleCompaction,
-    SimplifyStaticGuards, SimplifyWithControl, StaticInliner, StaticPromotion,
-    SynthesisPapercut, TopDownCompileControl, UnrollBounded, WellFormed,
-    WireInliner, WrapMain,
+    SimplifyStaticGuards, SimplifyWithControl, StaticInference, StaticInliner,
+    StaticPromotion, SynthesisPapercut, TopDownCompileControl, UnrollBounded,
+    WellFormed, WireInliner, WrapMain,
 };
 use crate::traversal::Named;
 use crate::{pass_manager::PassManager, register_alias};
@@ -38,6 +38,7 @@ impl PassManager {
         pm.register_pass::<InferShare>()?;
         pm.register_pass::<CellShare>()?;
         pm.register_pass::<ScheduleCompaction>()?;
+        pm.register_pass::<StaticInference>()?;
         pm.register_pass::<StaticPromotion>()?;
         pm.register_pass::<AttributePromotion>()?;
         pm.register_pass::<SimplifyStaticGuards>()?;
@@ -96,6 +97,7 @@ impl PassManager {
                 SimplifyWithControl, // Must run before compile-invoke
                 CompileInvoke,   // creates dead comb groups
                 AttributePromotion,
+                StaticInference,
                 StaticPromotion,
                 ScheduleCompaction,
                 CompileRepeat,
diff --git a/calyx-opt/src/passes/mod.rs b/calyx-opt/src/passes/mod.rs
index cba2e17ea2..932e37b5f4 100644
--- a/calyx-opt/src/passes/mod.rs
+++ b/calyx-opt/src/passes/mod.rs
@@ -30,6 +30,7 @@ mod remove_ids;
 mod reset_insertion;
 mod schedule_compaction;
 mod simplify_static_guards;
+mod static_inference;
 mod static_inliner;
 mod static_promotion;
 mod sync;
@@ -78,6 +79,7 @@ pub use reset_insertion::ResetInsertion;
 pub use schedule_compaction::ScheduleCompaction;
 pub use simplify_static_guards::SimplifyStaticGuards;
 pub use simplify_with_control::SimplifyWithControl;
+pub use static_inference::StaticInference;
 pub use static_inliner::StaticInliner;
 pub use static_promotion::StaticPromotion;
 pub use sync::CompileSync;
diff --git a/calyx-opt/src/passes/static_inference.rs b/calyx-opt/src/passes/static_inference.rs
new file mode 100644
index 0000000000..0c1b54d61d
--- /dev/null
+++ b/calyx-opt/src/passes/static_inference.rs
@@ -0,0 +1,106 @@
+use crate::analysis::{GoDone, InferenceAnalysis};
+use crate::traversal::{
+    Action, ConstructVisitor, Named, Order, VisResult, Visitor,
+};
+use calyx_ir::{self as ir, LibrarySignatures};
+use calyx_utils::CalyxResult;
+use itertools::Itertools;
+
+/// Infer "promote_static" (potentially to be renamed @promotable) annotation
+/// for groups and control.
+/// Inference occurs whenever possible.
+pub struct StaticInference {
+    /// Takes static information.
+    inference_analysis: InferenceAnalysis,
+}
+
+// Override constructor to build latency_data information from the primitives
+// library.
+impl ConstructVisitor for StaticInference {
+    fn from(ctx: &ir::Context) -> CalyxResult<Self> {
+        Ok(StaticInference {
+            inference_analysis: InferenceAnalysis::from_ctx(ctx),
+        })
+    }
+
+    // This pass shared information between components
+    fn clear_data(&mut self) {}
+}
+
+impl Named for StaticInference {
+    fn name() -> &'static str {
+        "static-inference"
+    }
+
+    fn description() -> &'static str {
+        "infer when dynamic control programs are promotable"
+    }
+}
+
+impl Visitor for StaticInference {
+    // Require post order traversal of components to ensure `invoke` nodes
+    // get timing information for components.
+    fn iteration_order() -> Order {
+        Order::Post
+    }
+
+    fn finish(
+        &mut self,
+        comp: &mut ir::Component,
+        _lib: &LibrarySignatures,
+        _comps: &[ir::Component],
+    ) -> VisResult {
+        if comp.name != "main" {
+            // If the entire component's control is promotable.
+            if let Some(val) =
+                InferenceAnalysis::get_possible_latency(&comp.control.borrow())
+            {
+                let comp_sig = comp.signature.borrow();
+                let mut go_ports: Vec<_> =
+                    comp_sig.find_all_with_attr(ir::NumAttr::Go).collect();
+                // Insert @static attribute on the go ports.
+                for go_port in &mut go_ports {
+                    go_port
+                        .borrow_mut()
+                        .attributes
+                        .insert(ir::NumAttr::Static, val);
+                }
+                let mut done_ports: Vec<_> =
+                    comp_sig.find_all_with_attr(ir::NumAttr::Done).collect();
+                // Update `latency_data`.
+                go_ports.sort_by_key(|port| {
+                    port.borrow().attributes.get(ir::NumAttr::Go).unwrap()
+                });
+                done_ports.sort_by_key(|port| {
+                    port.borrow().attributes.get(ir::NumAttr::Done).unwrap()
+                });
+                let zipped: Vec<_> =
+                    go_ports.iter().zip(done_ports.iter()).collect();
+                let go_done_ports = zipped
+                    .into_iter()
+                    .map(|(go_port, done_port)| {
+                        (go_port.borrow().name, done_port.borrow().name, val)
+                    })
+                    .collect_vec();
+                self.inference_analysis.add_component((
+                    comp.name,
+                    val,
+                    GoDone::new(go_done_ports),
+                ));
+            }
+        }
+        Ok(Action::Continue)
+    }
+
+    fn start(
+        &mut self,
+        comp: &mut ir::Component,
+        _sigs: &LibrarySignatures,
+        _comps: &[ir::Component],
+    ) -> VisResult {
+        // ``Fix up the timing'', but with the updated_components argument as
+        // and empty HashMap. This just performs inference.
+        self.inference_analysis.fixup_timing(comp);
+        Ok(Action::Continue)
+    }
+}
diff --git a/calyx-opt/src/passes/static_promotion.rs b/calyx-opt/src/passes/static_promotion.rs
index 750b5e9ad1..0fffcd8599 100644
--- a/calyx-opt/src/passes/static_promotion.rs
+++ b/calyx-opt/src/passes/static_promotion.rs
@@ -1,10 +1,10 @@
-use crate::analysis::{GraphAnalysis, ReadWriteSet};
+use crate::analysis::InferenceAnalysis;
 use crate::traversal::{
     Action, ConstructVisitor, Named, Order, ParseVal, PassOpt, VisResult,
     Visitor,
 };
-use calyx_ir::{self as ir, LibrarySignatures, RRC};
-use calyx_utils::{CalyxResult, Error};
+use calyx_ir::{self as ir, LibrarySignatures};
+use calyx_utils::CalyxResult;
 use ir::GetAttributes;
 use itertools::Itertools;
 use std::collections::HashMap;
@@ -15,132 +15,24 @@ const APPROX_ENABLE_SIZE: u64 = 1;
 const APPROX_IF_SIZE: u64 = 3;
 const APPROX_WHILE_REPEAT_SIZE: u64 = 3;
 
-/// Struct to store information about the go-done interfaces defined by a primitive.
-#[derive(Default, Debug)]
-struct GoDone {
-    ports: Vec<(ir::Id, ir::Id, u64)>,
-}
-
-impl GoDone {
-    pub fn new(ports: Vec<(ir::Id, ir::Id, u64)>) -> Self {
-        Self { ports }
-    }
-
-    /// Returns true if this is @go port
-    pub fn is_go(&self, name: &ir::Id) -> bool {
-        self.ports.iter().any(|(go, _, _)| name == go)
-    }
-
-    /// Returns true if this is a @done port
-    pub fn is_done(&self, name: &ir::Id) -> bool {
-        self.ports.iter().any(|(_, done, _)| name == done)
-    }
-
-    /// Returns the latency associated with the provided @go port if present
-    pub fn get_latency(&self, go_port: &ir::Id) -> Option<u64> {
-        self.ports.iter().find_map(|(go, _, lat)| {
-            if go == go_port {
-                Some(*lat)
-            } else {
-                None
-            }
-        })
-    }
-
-    /// Iterate over the defined ports
-    pub fn iter(&self) -> impl Iterator<Item = &(ir::Id, ir::Id, u64)> {
-        self.ports.iter()
-    }
-}
-
-impl From<&ir::Primitive> for GoDone {
-    fn from(prim: &ir::Primitive) -> Self {
-        let done_ports: HashMap<_, _> = prim
-            .find_all_with_attr(ir::NumAttr::Done)
-            .map(|pd| (pd.attributes.get(ir::NumAttr::Done), pd.name()))
-            .collect();
-
-        let go_ports = prim
-            .find_all_with_attr(ir::NumAttr::Go)
-            .filter_map(|pd| {
-                pd.attributes.get(ir::NumAttr::Static).and_then(|st| {
-                    done_ports
-                        .get(&pd.attributes.get(ir::NumAttr::Go))
-                        .map(|done_port| (pd.name(), *done_port, st))
-                })
-            })
-            .collect_vec();
-        GoDone::new(go_ports)
-    }
-}
-
-impl From<&ir::Cell> for GoDone {
-    fn from(cell: &ir::Cell) -> Self {
-        let done_ports: HashMap<_, _> = cell
-            .find_all_with_attr(ir::NumAttr::Done)
-            .map(|pr| {
-                let port = pr.borrow();
-                (port.attributes.get(ir::NumAttr::Done), port.name)
-            })
-            .collect();
-
-        let go_ports = cell
-            .find_all_with_attr(ir::NumAttr::Go)
-            .filter_map(|pr| {
-                let port = pr.borrow();
-                port.attributes.get(ir::NumAttr::Static).and_then(|st| {
-                    done_ports
-                        .get(&port.attributes.get(ir::NumAttr::Go))
-                        .map(|done_port| (port.name, *done_port, st))
-                })
-            })
-            .collect_vec();
-        GoDone::new(go_ports)
-    }
-}
-
-/// Infer "promote_static" annotation for groups and promote control to static when
-/// (conservatively) possible.
+/// Promote control to static when (conservatively) possible, using @promote_static
+/// annotations from `infer_static`.
+///
+/// Promotion occurs the following policies:
+/// 1. ``Threshold'': How large the island must be. We have three const
+/// defined as heuristics to measure approximately how big each control program
+/// is. It must be larger than that threshold.
+/// 2. ``Cycle limit": The maximum number of cycles the island can be when we
+/// promote it.
+/// 3. ``If Diff Limit": The maximum difference in latency between if statments
+/// that we can tolerate to promote it.
 ///
-/// Promotion follows the current policies:
-/// 1. if multiple groups enables aligned inside a seq are marked with the "promote_static"
-///     attribute, then promote all promotable enables to static enables, meanwhile,
-///     wrap them into a static seq
-///     for example:
-/// ```
-///     seq {
-///         a1;
-///         @promote_static a2; @promote_static a3; }
-/// ```
-///     becomes
-/// ```
-///     seq {
-///         a1;
-///         static seq {a2; a3;}}
-/// ```
-/// 2. if all control statements under seq are either static statements or group enables
-///     with `promote_static` annotation, then promote all group enables and turn
-///     seq into static seq
-/// 3. Under a par control op, all group enables marked with `promote_static` will be promoted.
-///     all control statements that are either static or group enables with `promote_static` annotation
-///     are wrapped inside a static par.
-/// ```
-/// par {@promote_static a1; a2; @promote_static a3;}
-/// ```
-/// becomes
-/// ```
-/// par {
-/// static par { a1; a3; }
-/// a2;
-/// }
-/// ```
 pub struct StaticPromotion {
-    /// component name -> vec<(go signal, done signal, latency)>
-    latency_data: HashMap<ir::Id, GoDone>,
+    /// An InferenceAnalysis object so that we can re-infer the latencies of
+    /// certain components.
+    inference_analysis: InferenceAnalysis,
     /// dynamic group Id -> promoted static group Id
     static_group_name: HashMap<ir::Id, ir::Id>,
-    /// Maps static component names to their latencies
-    static_component_latencies: HashMap<ir::Id, NonZeroU64>,
     /// Threshold for promotion
     threshold: u64,
     /// Threshold for difference in latency for if statements
@@ -153,38 +45,10 @@ pub struct StaticPromotion {
 // library.
 impl ConstructVisitor for StaticPromotion {
     fn from(ctx: &ir::Context) -> CalyxResult<Self> {
-        let mut latency_data = HashMap::new();
-        //let mut comp_latency = HashMap::new();
-        // Construct latency_data for each primitive
-        for prim in ctx.lib.signatures() {
-            let done_ports: HashMap<_, _> = prim
-                .find_all_with_attr(ir::NumAttr::Done)
-                .map(|pd| (pd.attributes.get(ir::NumAttr::Done), pd.name()))
-                .collect();
-
-            let go_ports = prim
-                .find_all_with_attr(ir::NumAttr::Go)
-                .filter_map(|pd| {
-                    pd.attributes.get(ir::NumAttr::Static).and_then(|st| {
-                        done_ports
-                            .get(&pd.attributes.get(ir::NumAttr::Go))
-                            .map(|done_port| (pd.name(), *done_port, st))
-                    })
-                })
-                .collect_vec();
-
-            // If this primitive has exactly one (go, done, static) pair, we
-            // can infer the latency of its invokes.
-            if go_ports.len() == 1 {
-                //comp_latency.insert(prim.name, go_ports[0].2);
-            }
-            latency_data.insert(prim.name, GoDone::new(go_ports));
-        }
         let opts = Self::get_opts(ctx);
         Ok(StaticPromotion {
-            latency_data,
+            inference_analysis: InferenceAnalysis::from_ctx(ctx),
             static_group_name: HashMap::new(),
-            static_component_latencies: HashMap::new(),
             threshold: opts["threshold"].pos_num().unwrap(),
             if_diff_limit: opts["if-diff-limit"].pos_num(),
             cycle_limit: opts["cycle-limit"].pos_num(),
@@ -231,233 +95,6 @@ impl Named for StaticPromotion {
 }
 
 impl StaticPromotion {
-    /// Return true if the edge (`src`, `dst`) meet one these criteria, and false otherwise:
-    ///   - `src` is an "out" port of a constant, and `dst` is a "go" port
-    ///   - `src` is a "done" port, and `dst` is a "go" port
-    ///   - `src` is a "done" port, and `dst` is the "done" port of a group
-    fn mem_wrt_dep_graph(&self, src: &ir::Port, dst: &ir::Port) -> bool {
-        match (&src.parent, &dst.parent) {
-            (
-                ir::PortParent::Cell(src_cell_wrf),
-                ir::PortParent::Cell(dst_cell_wrf),
-            ) => {
-                let src_rf = src_cell_wrf.upgrade();
-                let src_cell = src_rf.borrow();
-                let dst_rf = dst_cell_wrf.upgrade();
-                let dst_cell = dst_rf.borrow();
-                if let (Some(s_name), Some(d_name)) =
-                    (src_cell.type_name(), dst_cell.type_name())
-                {
-                    let data_src = self.latency_data.get(&s_name);
-                    let data_dst = self.latency_data.get(&d_name);
-                    if let (Some(dst_ports), Some(src_ports)) =
-                        (data_dst, data_src)
-                    {
-                        return src_ports.is_done(&src.name)
-                            && dst_ports.is_go(&dst.name);
-                    }
-                }
-
-                // A constant writes to a cell: to be added to the graph, the cell needs to be a "done" port.
-                if let (Some(d_name), ir::CellType::Constant { .. }) =
-                    (dst_cell.type_name(), &src_cell.prototype)
-                {
-                    if let Some(ports) = self.latency_data.get(&d_name) {
-                        return ports.is_go(&dst.name);
-                    }
-                }
-
-                false
-            }
-
-            // Something is written to a group: to be added to the graph, this needs to be a "done" port.
-            (_, ir::PortParent::Group(_)) => dst.name == "done",
-
-            // If we encounter anything else, no need to add it to the graph.
-            _ => false,
-        }
-    }
-
-    /// Return a Vec of edges (`a`, `b`), where `a` is a "go" port and `b`
-    /// is a "done" port, and `a` and `b` have the same parent cell.
-    fn find_go_done_edges(
-        &self,
-        group: &ir::Group,
-    ) -> Vec<(RRC<ir::Port>, RRC<ir::Port>)> {
-        let rw_set = ReadWriteSet::uses(group.assignments.iter());
-        let mut go_done_edges: Vec<(RRC<ir::Port>, RRC<ir::Port>)> = Vec::new();
-
-        for cell_ref in rw_set {
-            let cell = cell_ref.borrow();
-            if let Some(ports) =
-                cell.type_name().and_then(|c| self.latency_data.get(&c))
-            {
-                go_done_edges.extend(
-                    ports
-                        .iter()
-                        .map(|(go, done, _)| (cell.get(go), cell.get(done))),
-                )
-            }
-        }
-        go_done_edges
-    }
-
-    /// Returns true if `port` is a "done" port, and we know the latency data
-    /// about `port`, or is a constant.
-    fn is_done_port_or_const(&self, port: &ir::Port) -> bool {
-        if let ir::PortParent::Cell(cwrf) = &port.parent {
-            let cr = cwrf.upgrade();
-            let cell = cr.borrow();
-            if let ir::CellType::Constant { val, .. } = &cell.prototype {
-                if *val > 0 {
-                    return true;
-                }
-            } else if let Some(ports) =
-                cell.type_name().and_then(|c| self.latency_data.get(&c))
-            {
-                return ports.is_done(&port.name);
-            }
-        }
-        false
-    }
-
-    /// Returns true if `graph` contains writes to "done" ports
-    /// that could have dynamic latencies, false otherwise.
-    fn contains_dyn_writes(&self, graph: &GraphAnalysis) -> bool {
-        for port in &graph.ports() {
-            match &port.borrow().parent {
-                ir::PortParent::Cell(cell_wrf) => {
-                    let cr = cell_wrf.upgrade();
-                    let cell = cr.borrow();
-                    if let Some(ports) =
-                        cell.type_name().and_then(|c| self.latency_data.get(&c))
-                    {
-                        let name = &port.borrow().name;
-                        if ports.is_go(name) {
-                            for write_port in graph.writes_to(&port.borrow()) {
-                                if !self
-                                    .is_done_port_or_const(&write_port.borrow())
-                                {
-                                    log::debug!(
-                                        "`{}` is not a done port",
-                                        write_port.borrow().canonical(),
-                                    );
-                                    return true;
-                                }
-                            }
-                        }
-                    }
-                }
-                ir::PortParent::Group(_) => {
-                    if port.borrow().name == "done" {
-                        for write_port in graph.writes_to(&port.borrow()) {
-                            if !self.is_done_port_or_const(&write_port.borrow())
-                            {
-                                log::debug!(
-                                    "`{}` is not a done port",
-                                    write_port.borrow().canonical(),
-                                );
-                                return true;
-                            }
-                        }
-                    }
-                }
-
-                ir::PortParent::StaticGroup(_) => // done ports of static groups should clearly NOT have static latencies
-                panic!("Have not decided how to handle static groups in infer-static-timing"),
-            }
-        }
-        false
-    }
-
-    /// Returns true if `graph` contains any nodes with degree > 1.
-    fn contains_node_deg_gt_one(graph: &GraphAnalysis) -> bool {
-        for port in graph.ports() {
-            if graph.writes_to(&port.borrow()).count() > 1 {
-                return true;
-            }
-        }
-        false
-    }
-
-    /// Attempts to infer the number of cycles starting when
-    /// `group[go]` is high, and port is high. If inference is
-    /// not possible, returns None.
-    fn infer_latency(&self, group: &ir::Group) -> Option<u64> {
-        // Creates a write dependency graph, which contains an edge (`a`, `b`) if:
-        //   - `a` is a "done" port, and writes to `b`, which is a "go" port
-        //   - `a` is a "done" port, and writes to `b`, which is the "done" port of this group
-        //   - `a` is an "out" port, and is a constant, and writes to `b`, a "go" port
-        //   - `a` is a "go" port, and `b` is a "done" port, and `a` and `b` share a parent cell
-        // Nodes that are not part of any edges that meet these criteria are excluded.
-        //
-        // For example, this group:
-        // ```
-        // group g1 {
-        //   a.in = 32'd1;
-        //   a.write_en = 1'd1;
-        //   g1[done] = a.done;
-        // }
-        // ```
-        // corresponds to this graph:
-        // ```
-        // constant(1) -> a.write_en
-        // a.write_en -> a.done
-        // a.done -> g1[done]
-        // ```
-        log::debug!("Checking group `{}`", group.name());
-        let graph_unprocessed = GraphAnalysis::from(group);
-        if self.contains_dyn_writes(&graph_unprocessed) {
-            log::debug!("FAIL: contains dynamic writes");
-            return None;
-        }
-
-        let go_done_edges = self.find_go_done_edges(group);
-        let graph = graph_unprocessed
-            .edge_induced_subgraph(|src, dst| self.mem_wrt_dep_graph(src, dst))
-            .add_edges(&go_done_edges)
-            .remove_isolated_vertices();
-
-        // Give up if a port has multiple writes to it.
-        if Self::contains_node_deg_gt_one(&graph) {
-            log::debug!("FAIL: Group contains multiple writes");
-            return None;
-        }
-
-        let mut tsort = graph.toposort();
-        let start = tsort.next()?;
-        let finish = tsort.last()?;
-
-        let paths = graph.paths(&start.borrow(), &finish.borrow());
-        // If there are no paths, give up.
-        if paths.is_empty() {
-            log::debug!("FAIL: No path between @go and @done port");
-            return None;
-        }
-        let first_path = paths.get(0).unwrap();
-
-        // Sum the latencies of each primitive along the path.
-        let mut latency_sum = 0;
-        for port in first_path {
-            if let ir::PortParent::Cell(cwrf) = &port.borrow().parent {
-                let cr = cwrf.upgrade();
-                let cell = cr.borrow();
-                if let Some(ports) =
-                    cell.type_name().and_then(|c| self.latency_data.get(&c))
-                {
-                    if let Some(latency) =
-                        ports.get_latency(&port.borrow().name)
-                    {
-                        latency_sum += latency;
-                    }
-                }
-            }
-        }
-
-        log::debug!("SUCCESS: Latency = {}", latency_sum);
-        Some(latency_sum)
-    }
-
     /// Gets the inferred latency, which should either be from being a static
     /// control operator or the promote_static attribute.
     /// Will raise an error if neither of these is true.
@@ -512,17 +149,71 @@ impl StaticPromotion {
             self.static_group_name
                 .insert(group.borrow().name(), sg.borrow().name());
             for assignment in group.borrow().assignments.iter() {
+                // Don't need to include assignment to done hole.
                 if !(assignment.dst.borrow().is_hole()
                     && assignment.dst.borrow().name == "done")
                 {
-                    let static_s = ir::Assignment::from(assignment.clone());
-                    sg.borrow_mut().assignments.push(static_s);
+                    sg.borrow_mut()
+                        .assignments
+                        .push(ir::Assignment::from(assignment.clone()));
                 }
             }
             Rc::clone(&sg)
         }
     }
 
+    // Converts dynamic enable to static
+    fn convert_enable_to_static(
+        &mut self,
+        s: &mut ir::Enable,
+        builder: &mut ir::Builder,
+    ) -> ir::StaticControl {
+        s.attributes.remove(ir::NumAttr::PromoteStatic);
+        ir::StaticControl::Enable(ir::StaticEnable {
+            // upgrading group to static group
+            group: self.construct_static_group(
+                builder,
+                Rc::clone(&s.group),
+                s.group
+                    .borrow()
+                    .get_attributes()
+                    .unwrap()
+                    .get(ir::NumAttr::PromoteStatic)
+                    .unwrap(),
+            ),
+            attributes: std::mem::take(&mut s.attributes),
+        })
+    }
+
+    // Converts dynamic invoke to static
+    fn convert_invoke_to_static(
+        &mut self,
+        s: &mut ir::Invoke,
+    ) -> ir::StaticControl {
+        assert!(
+            s.comb_group.is_none(),
+            "Shouldn't Promote to Static if there is a Comb Group",
+        );
+        s.attributes.remove(ir::NumAttr::PromoteStatic);
+        let latency = *self.inference_analysis.static_component_latencies.get(
+            &s.comp.borrow().type_name().unwrap_or_else(|| {
+                unreachable!(
+                    "Already checked that comp is component"
+                )
+            }),
+        ).unwrap_or_else(|| unreachable!("Called convert_to_static for static invoke that does not have a static component"));
+        let s_inv = ir::StaticInvoke {
+            comp: Rc::clone(&s.comp),
+            inputs: std::mem::take(&mut s.inputs),
+            outputs: std::mem::take(&mut s.outputs),
+            latency,
+            attributes: std::mem::take(&mut s.attributes),
+            ref_cells: std::mem::take(&mut s.ref_cells),
+            comb_group: std::mem::take(&mut s.comb_group),
+        };
+        ir::StaticControl::Invoke(s_inv)
+    }
+
     /// Converts control to static control.
     /// Control must already be static or have the `promote_static` attribute.
     fn convert_to_static(
@@ -542,25 +233,7 @@ impl StaticPromotion {
         let inferred_latency = Self::get_inferred_latency(c);
         match c {
             ir::Control::Empty(_) => ir::StaticControl::empty(),
-            ir::Control::Enable(ir::Enable { group, attributes }) => {
-                // Removing the `promote_static` attribute bc we don't need it anymore.
-                attributes.remove(ir::NumAttr::PromoteStatic);
-                let enable = ir::StaticControl::Enable(ir::StaticEnable {
-                    // upgrading group to static group
-                    group: self.construct_static_group(
-                        builder,
-                        Rc::clone(group),
-                        group
-                            .borrow()
-                            .get_attributes()
-                            .unwrap()
-                            .get(ir::NumAttr::PromoteStatic)
-                            .unwrap(),
-                    ),
-                    attributes: std::mem::take(attributes),
-                });
-                enable
-            }
+            ir::Control::Enable(s) => self.convert_enable_to_static(s, builder),
             ir::Control::Seq(ir::Seq { stmts, attributes }) => {
                 // Removing the `promote_static` attribute bc we don't need it anymore
                 attributes.remove(ir::NumAttr::PromoteStatic);
@@ -655,37 +328,7 @@ impl StaticPromotion {
                 )
             }
             ir::Control::Static(_) => c.take_static_control(),
-            ir::Control::Invoke(ir::Invoke {
-                comp,
-                inputs,
-                outputs,
-                attributes,
-                comb_group,
-                ref_cells,
-            }) => {
-                assert!(
-                    comb_group.is_none(),
-                    "Shouldn't Promote to Static if there is a Comb Group",
-                );
-                attributes.remove(ir::NumAttr::PromoteStatic);
-                Self::check_latencies_match(self.static_component_latencies.get(
-                    &comp.borrow().type_name().unwrap_or_else(|| {
-                        unreachable!(
-                            "Already checked that comp is component"
-                        )
-                    }),
-                ).unwrap_or_else(|| unreachable!("Called convert_to_static for static invoke that does not have a static component")).get(), inferred_latency);
-                let s_inv = ir::StaticInvoke {
-                    comp: Rc::clone(comp),
-                    inputs: std::mem::take(inputs),
-                    outputs: std::mem::take(outputs),
-                    latency: inferred_latency,
-                    attributes: std::mem::take(attributes),
-                    ref_cells: std::mem::take(ref_cells),
-                    comb_group: std::mem::take(comb_group),
-                };
-                ir::StaticControl::Invoke(s_inv)
-            }
+            ir::Control::Invoke(s) => self.convert_invoke_to_static(s),
         }
     }
 
@@ -727,7 +370,8 @@ impl StaticPromotion {
                 // static control appears as one big group to the dynamic FSM
                 1
             }
-            ir::Control::Invoke(_) => 1,
+            // Invokes are same size as enables.
+            ir::Control::Invoke(_) => APPROX_ENABLE_SIZE,
         }
     }
 
@@ -737,27 +381,35 @@ impl StaticPromotion {
         v.iter().map(Self::approx_size).sum()
     }
 
-    /// First checks if the vec of control statements satsifies the threshold
-    /// and cycle count threshold
-    /// (That is, whether the combined approx_size of the static_vec is greater)
-    /// than the threshold and cycle count is less than cycle limit).
-    /// If so, converts vec of control to a static seq, and returns a vec containing
-    /// the static seq.
-    /// Otherwise, just returns the vec without changing it.
-    fn convert_vec_seq_if_sat(
+    /// Converts the control_vec (i..e, the stmts of the seq) using heuristics.
+    fn promote_vec_seq_heuristic(
         &mut self,
         builder: &mut ir::Builder,
-        control_vec: Vec<ir::Control>,
+        mut control_vec: Vec<ir::Control>,
     ) -> Vec<ir::Control> {
-        if Self::approx_control_vec_size(&control_vec) <= self.threshold
-            || !self.within_cycle_limit(
-                control_vec.iter().map(Self::get_inferred_latency).sum(),
-            )
+        if control_vec.is_empty() {
+            // Base case
+            return vec![];
+        } else if control_vec.len() == 1 {
+            return vec![control_vec.pop().unwrap()];
+        } else if Self::approx_control_vec_size(&control_vec) <= self.threshold
         {
-            // Return unchanged vec
+            // Too small to be promoted, return as is
             return control_vec;
+        } else if !self.within_cycle_limit(
+            control_vec.iter().map(Self::get_inferred_latency).sum(),
+        ) {
+            // Too large, try to break up
+            let right = control_vec.split_off(control_vec.len() / 2);
+            dbg!(control_vec.len());
+            dbg!(right.len());
+            let mut left_res =
+                self.promote_vec_seq_heuristic(builder, control_vec);
+            let right_res = self.promote_vec_seq_heuristic(builder, right);
+            left_res.extend(right_res);
+            return left_res;
         }
-        // Convert vec to static seq
+        // Correct size, convert the entire vec
         let s_seq_stmts = self.convert_vec_to_static(builder, control_vec);
         let latency = s_seq_stmts.iter().map(|sc| sc.get_latency()).sum();
         let mut sseq =
@@ -772,24 +424,41 @@ impl StaticPromotion {
     /// If so, converts vec of control to a static par, and returns a vec containing
     /// the static par.
     /// Otherwise, just returns the vec without changing it.
-    fn convert_vec_par_if_sat(
+    fn promote_vec_par_heuristic(
         &mut self,
         builder: &mut ir::Builder,
-        control_vec: Vec<ir::Control>,
+        mut control_vec: Vec<ir::Control>,
     ) -> Vec<ir::Control> {
-        if Self::approx_control_vec_size(&control_vec) <= self.threshold
-            || !self.within_cycle_limit(
-                control_vec
-                    .iter()
-                    .map(Self::get_inferred_latency)
-                    .max()
-                    .unwrap_or_else(|| unreachable!("Non Empty Par Block")),
-            )
+        if control_vec.is_empty() {
+            // Base case
+            return vec![];
+        } else if control_vec.len() == 1 {
+            return vec![control_vec.pop().unwrap()];
+        } else if Self::approx_control_vec_size(&control_vec) <= self.threshold
         {
-            // Return unchanged vec
+            // Too small to be promoted, return as is
             return control_vec;
+        } else if !self.within_cycle_limit(
+            control_vec
+                .iter()
+                .map(Self::get_inferred_latency)
+                .max()
+                .unwrap_or_else(|| unreachable!("Empty Par Block")),
+        ) {
+            // Too large to be promoted, take out largest thread and try to promote rest.
+            // Can safely unwrap bc we already checked for an empty vector.
+            let (index, _) = control_vec
+                .iter()
+                .enumerate()
+                .max_by_key(|&(_, c)| Self::approx_size(c))
+                .unwrap();
+            // Pop the largest element from the vector
+            let largest_thread = control_vec.remove(index);
+            let mut left = self.promote_vec_par_heuristic(builder, control_vec);
+            left.push(largest_thread);
+            return left;
         }
-        // Convert vec to static seq
+        // Convert vec to static par
         let s_par_stmts = self.convert_vec_to_static(builder, control_vec);
         let latency = s_par_stmts
             .iter()
@@ -815,102 +484,82 @@ impl Visitor for StaticPromotion {
         _lib: &LibrarySignatures,
         _comps: &[ir::Component],
     ) -> VisResult {
-        if comp.name != "main" && comp.control.borrow().is_static() {
-            if let Some(lat) = comp.control.borrow().get_latency() {
-                if !comp.is_static() {
-                    comp.attributes.insert(ir::BoolAttr::Promoted, 1);
-                }
-                comp.latency = Some(NonZeroU64::new(lat).unwrap());
-                let comp_sig = comp.signature.borrow();
-                let mut done_ports: Vec<_> =
-                    comp_sig.find_all_with_attr(ir::NumAttr::Done).collect();
-                let mut go_ports: Vec<_> =
-                    comp_sig.find_all_with_attr(ir::NumAttr::Go).collect();
-                if done_ports.len() == 1 && go_ports.len() == 1 {
-                    let go_done = GoDone::new(vec![(
-                        go_ports.pop().unwrap().borrow().name,
-                        done_ports.pop().unwrap().borrow().name,
-                        lat,
-                    )]);
-                    self.latency_data.insert(comp.name, go_done);
+        if comp.name != "main" {
+            let comp_sig = comp.signature.borrow();
+            let go_ports =
+                comp_sig.find_all_with_attr(ir::NumAttr::Go).collect_vec();
+            if go_ports.iter().any(|go_port| {
+                go_port.borrow_mut().attributes.has(ir::NumAttr::Static)
+            }) {
+                if comp.control.borrow().is_static() {
+                    // We ended up promoting it
+                    if !comp.is_static() {
+                        // Need this attribute for a weird, in-between state.
+                        // It has a known latency but also produces a done signal.
+                        comp.attributes.insert(ir::BoolAttr::Promoted, 1);
+                    }
+                    // This makes the component appear as a static<n> component.
+                    comp.latency = Some(
+                        NonZeroU64::new(
+                            comp.control.borrow().get_latency().unwrap(),
+                        )
+                        .unwrap(),
+                    );
+                } else {
+                    // We decided not to promote, so we need to update data structures
+                    // and remove @static attribute from the signature.
+
+                    // Updating `static_info`.
+                    self.inference_analysis.remove_component(comp.name);
+                    // Removing `@static` from the go ports.
+                    for go_port in go_ports {
+                        go_port
+                            .borrow_mut()
+                            .attributes
+                            .remove(ir::NumAttr::Static);
+                    }
                 }
-            }
-        }
-        if comp.is_static() {
-            self.static_component_latencies
-                .insert(comp.name, comp.latency.unwrap());
+            };
         }
+        // Remove @promotable (i.e., @promote_static) attribute from control.
+        // Probably not necessary, since we'll ignore it anyways, but makes for
+        // cleaner code.
+        InferenceAnalysis::remove_promotable_attribute(
+            &mut comp.control.borrow_mut(),
+        );
         Ok(Action::Continue)
     }
 
     fn start(
         &mut self,
         comp: &mut ir::Component,
-        sigs: &LibrarySignatures,
-        _comps: &[ir::Component],
-    ) -> VisResult {
-        let builder = ir::Builder::new(comp, sigs);
-        let mut latency_result: Option<u64>;
-        for group in builder.component.get_groups() {
-            if let Some(latency) = self.infer_latency(&group.borrow()) {
-                let grp = group.borrow();
-                if let Some(curr_lat) = grp.attributes.get(ir::NumAttr::Static)
-                {
-                    // Inferred latency is not the same as the provided latency annotation.
-                    if curr_lat != latency {
-                        let msg1 = format!("Annotated latency: {}", curr_lat);
-                        let msg2 = format!("Inferred latency: {}", latency);
-                        let msg = format!(
-                            "Invalid \"static\" latency annotation for group {}.\n{}\n{}",
-                            grp.name(),
-                            msg1,
-                            msg2
-                        );
-                        return Err(Error::malformed_structure(msg)
-                            .with_pos(&grp.attributes));
-                    }
-                }
-                latency_result = Some(latency);
-            } else {
-                latency_result = None;
-            }
-
-            if let Some(latency) = latency_result {
-                group
-                    .borrow_mut()
-                    .attributes
-                    .insert(ir::NumAttr::PromoteStatic, latency);
-            }
-        }
-        Ok(Action::Continue)
-    }
-
-    fn empty(
-        &mut self,
-        s: &mut ir::Empty,
-        _comp: &mut ir::Component,
         _sigs: &LibrarySignatures,
         _comps: &[ir::Component],
     ) -> VisResult {
-        s.attributes.insert(ir::NumAttr::PromoteStatic, 0);
+        // Re-infer static timing based on the components we have updated in
+        // this pass.
+        self.inference_analysis.fixup_timing(comp);
         Ok(Action::Continue)
     }
 
     fn enable(
         &mut self,
         s: &mut ir::Enable,
-        _comp: &mut ir::Component,
-        _sigs: &LibrarySignatures,
+        comp: &mut ir::Component,
+        sigs: &LibrarySignatures,
         _comps: &[ir::Component],
     ) -> VisResult {
-        if let Some(latency) = s
-            .group
-            .borrow()
-            .get_attributes()
-            .unwrap()
-            .get(ir::NumAttr::PromoteStatic)
-        {
-            s.attributes.insert(ir::NumAttr::PromoteStatic, latency);
+        let mut builder = ir::Builder::new(comp, sigs);
+        if let Some(latency) = s.attributes.get(ir::NumAttr::PromoteStatic) {
+            // Convert to static if enable is
+            // within cycle limit and size is above threshold.
+            if self.within_cycle_limit(latency)
+                && (APPROX_ENABLE_SIZE > self.threshold)
+            {
+                return Ok(Action::change(ir::Control::Static(
+                    self.convert_enable_to_static(s, &mut builder),
+                )));
+            }
         }
         Ok(Action::Continue)
     }
@@ -922,14 +571,14 @@ impl Visitor for StaticPromotion {
         _sigs: &LibrarySignatures,
         _comps: &[ir::Component],
     ) -> VisResult {
-        // Shouldn't promote to static invoke if we have a comb group
-        if s.comp.borrow().is_component() {
-            if let Some(latency) = self
-                .static_component_latencies
-                .get(&s.comp.borrow().type_name().unwrap())
+        if let Some(latency) = s.attributes.get(ir::NumAttr::PromoteStatic) {
+            // Convert to static if within cycle limit and size is above threshold.
+            if self.within_cycle_limit(latency)
+                && (APPROX_ENABLE_SIZE > self.threshold)
             {
-                s.attributes
-                    .insert(ir::NumAttr::PromoteStatic, latency.get());
+                return Ok(Action::change(ir::Control::Static(
+                    self.convert_invoke_to_static(s),
+                )));
             }
         }
         Ok(Action::Continue)
@@ -943,6 +592,33 @@ impl Visitor for StaticPromotion {
         _comps: &[ir::Component],
     ) -> VisResult {
         let mut builder = ir::Builder::new(comp, sigs);
+        // Checking if entire seq is promotable
+        if let Some(latency) = s.attributes.get(ir::NumAttr::PromoteStatic) {
+            // If seq is too small to promote, then continue without doing anything.
+            if Self::approx_control_vec_size(&s.stmts) <= self.threshold {
+                return Ok(Action::Continue);
+            } else if self.within_cycle_limit(latency) {
+                // We promote entire seq.
+                let mut sseq = ir::Control::Static(ir::StaticControl::seq(
+                    self.convert_vec_to_static(
+                        &mut builder,
+                        std::mem::take(&mut s.stmts),
+                    ),
+                    latency,
+                ));
+                sseq.get_mut_attributes()
+                    .insert(ir::NumAttr::Compactable, 1);
+                return Ok(Action::change(sseq));
+            }
+        }
+        // The seq either a) takes too many cylces to promote entirely or
+        // b) has dynamic stmts in it. Either way, the solution is to
+        // break it up into smaller static seqs.
+        // We know that this seq will *never* be promoted. Therefore, we can
+        // safely replace it with a standard `seq` that does not have an `@promotable`
+        // attribute. This temporarily messes up  its parents' `@promotable`
+        // attribute, but this is fine since we know its parent will never try
+        // to promote it.
         let old_stmts = std::mem::take(&mut s.stmts);
         let mut new_stmts: Vec<ir::Control> = Vec::new();
         let mut cur_vec: Vec<ir::Control> = Vec::new();
@@ -950,9 +626,9 @@ impl Visitor for StaticPromotion {
             if Self::can_be_promoted(&stmt) {
                 cur_vec.push(stmt);
             } else {
-                // Accumualte cur_vec into a static seq if it meets threshold
+                // Use heuristics to decide how to promote this cur_vec of promotable stmts.
                 let possibly_promoted_stmts =
-                    self.convert_vec_seq_if_sat(&mut builder, cur_vec);
+                    self.promote_vec_seq_heuristic(&mut builder, cur_vec);
                 new_stmts.extend(possibly_promoted_stmts);
                 // Add the current (non-promotable) stmt
                 new_stmts.push(stmt);
@@ -960,43 +636,7 @@ impl Visitor for StaticPromotion {
                 cur_vec = Vec::new();
             }
         }
-        if new_stmts.is_empty() {
-            // The entire seq can be promoted
-            let approx_size: u64 = cur_vec.iter().map(Self::approx_size).sum();
-            if approx_size > self.threshold
-                && self.within_cycle_limit(
-                    cur_vec.iter().map(Self::get_inferred_latency).sum(),
-                )
-            {
-                // Promote entire seq to a static seq
-                let s_seq_stmts =
-                    self.convert_vec_to_static(&mut builder, cur_vec);
-                let latency =
-                    s_seq_stmts.iter().map(|sc| sc.get_latency()).sum();
-                let mut sseq = ir::Control::Static(ir::StaticControl::seq(
-                    s_seq_stmts,
-                    latency,
-                ));
-                sseq.get_mut_attributes()
-                    .insert(ir::NumAttr::Compactable, 1);
-                return Ok(Action::change(sseq));
-            } else {
-                // Doesn't meet threshold.
-                // Add attribute to seq so parent might promote it.
-                let inferred_latency =
-                    cur_vec.iter().map(Self::get_inferred_latency).sum();
-                s.attributes
-                    .insert(ir::NumAttr::PromoteStatic, inferred_latency);
-                s.stmts = cur_vec;
-                return Ok(Action::Continue);
-            }
-        }
-        // Entire seq is not static, so we're only promoting the last
-        // bit of it if possible.
-        let possibly_promoted_stmts =
-            self.convert_vec_seq_if_sat(&mut builder, cur_vec);
-        new_stmts.extend(possibly_promoted_stmts);
-
+        new_stmts.extend(self.promote_vec_seq_heuristic(&mut builder, cur_vec));
         let new_seq = ir::Control::Seq(ir::Seq {
             stmts: new_stmts,
             attributes: ir::Attributes::default(),
@@ -1012,53 +652,37 @@ impl Visitor for StaticPromotion {
         _comps: &[ir::Component],
     ) -> VisResult {
         let mut builder = ir::Builder::new(comp, sigs);
-        let mut new_stmts: Vec<ir::Control> = Vec::new();
-        // Split the par into static and dynamic stmts
-        let (s_stmts, d_stmts): (Vec<ir::Control>, Vec<ir::Control>) =
-            s.stmts.drain(..).partition(|s| {
-                s.is_static()
-                    || s.get_attributes().has(ir::NumAttr::PromoteStatic)
-            });
-        if d_stmts.is_empty() {
-            // Entire par block can be promoted to static
-            if Self::approx_control_vec_size(&s_stmts) > self.threshold
-                && self.within_cycle_limit(
-                    s_stmts
-                        .iter()
-                        .map(Self::get_inferred_latency)
-                        .max()
-                        .unwrap_or_else(|| unreachable!("Empty Par Block")),
-                )
-            {
-                // Promote entire par block to static
-                let static_par_stmts =
-                    self.convert_vec_to_static(&mut builder, s_stmts);
-                let latency = static_par_stmts
-                    .iter()
-                    .map(|sc| sc.get_latency())
-                    .max()
-                    .unwrap_or_else(|| unreachable!("empty par block"));
-                return Ok(Action::change(ir::Control::Static(
-                    ir::StaticControl::par(static_par_stmts, latency),
-                )));
-            } else {
-                // Doesn't meet threshold, but add promotion attribute since
-                // parent might want to promote it.
-                let inferred_latency = s_stmts
-                    .iter()
-                    .map(Self::get_inferred_latency)
-                    .max()
-                    .unwrap_or_else(|| unreachable!("empty par block"));
-                s.get_mut_attributes()
-                    .insert(ir::NumAttr::PromoteStatic, inferred_latency);
-                s.stmts = s_stmts;
+        // Check if entire par is promotable
+        if let Some(latency) = s.attributes.get(ir::NumAttr::PromoteStatic) {
+            let approx_size: u64 = s.stmts.iter().map(Self::approx_size).sum();
+            if approx_size <= self.threshold {
+                // Par is too small to promote, continue.
                 return Ok(Action::Continue);
+            } else if self.within_cycle_limit(latency) {
+                // Promote entire par
+                let spar = ir::Control::Static(ir::StaticControl::par(
+                    self.convert_vec_to_static(
+                        &mut builder,
+                        std::mem::take(&mut s.stmts),
+                    ),
+                    latency,
+                ));
+                return Ok(Action::change(spar));
             }
         }
-        // Otherwise just promote the par threads that we can into a static par
-        let s_stmts_possibly_promoted =
-            self.convert_vec_par_if_sat(&mut builder, s_stmts);
-        new_stmts.extend(s_stmts_possibly_promoted);
+        let mut new_stmts: Vec<ir::Control> = Vec::new();
+        // The par either a) takes too many cylces to promote entirely or
+        // b) has dynamic stmts in it. Either way, the solution is to
+        // break it up.
+        // Split the par into static and dynamic stmts, and use heuristics
+        // to choose whether to promote the static ones. This replacement will
+        // not have a `@promotable` attribute.
+        // This temporarily messes up  its parents' `@promotable`
+        // attribute, but this is fine since we know its parent will never try
+        // to promote it.
+        let (s_stmts, d_stmts): (Vec<ir::Control>, Vec<ir::Control>) =
+            s.stmts.drain(..).partition(Self::can_be_promoted);
+        new_stmts.extend(self.promote_vec_par_heuristic(&mut builder, s_stmts));
         new_stmts.extend(d_stmts);
         let new_par = ir::Control::Par(ir::Par {
             stmts: new_stmts,
@@ -1075,17 +699,10 @@ impl Visitor for StaticPromotion {
         _comps: &[ir::Component],
     ) -> VisResult {
         let mut builder = ir::Builder::new(comp, sigs);
-        if Self::can_be_promoted(&s.tbranch)
-            && (Self::can_be_promoted(&s.fbranch))
-        {
-            // Both branches can be promoted
+        if let Some(latency) = s.attributes.get(ir::NumAttr::PromoteStatic) {
             let approx_size_if = Self::approx_size(&s.tbranch)
                 + Self::approx_size(&s.fbranch)
                 + APPROX_IF_SIZE;
-            let latency = std::cmp::max(
-                Self::get_inferred_latency(&s.tbranch),
-                Self::get_inferred_latency(&s.fbranch),
-            );
             let branch_diff = Self::get_inferred_latency(&s.tbranch)
                 .abs_diff(Self::get_inferred_latency(&s.fbranch));
             if approx_size_if > self.threshold
@@ -1105,15 +722,14 @@ impl Visitor for StaticPromotion {
                         latency,
                     ),
                 )));
-            } else {
-                // Doesn't meet size threshold, so attach attribute
-                // so parent might be able to promote it.
-                let inferred_max_latency = std::cmp::max(
-                    Self::get_inferred_latency(&s.tbranch),
-                    Self::get_inferred_latency(&s.fbranch),
-                );
-                s.get_mut_attributes()
-                    .insert(ir::NumAttr::PromoteStatic, inferred_max_latency)
+            }
+            // If this takes too many cycles, then we will
+            // never promote this if statement, meaning we will never promote any
+            // of its parents. We can therefore safely remove the `@promotable` attribute.
+            // This isn't strictly necessary, but it is helpful for parent control
+            // programs applying heuristics.
+            if !(self.within_cycle_limit(latency)) {
+                s.attributes.remove(ir::NumAttr::PromoteStatic);
             }
         }
         Ok(Action::Continue)
@@ -1128,35 +744,35 @@ impl Visitor for StaticPromotion {
         _comps: &[ir::Component],
     ) -> VisResult {
         let mut builder = ir::Builder::new(comp, sigs);
-        // First check that while loop is bounded
-        if let Some(num_repeats) = s.get_attributes().get(ir::NumAttr::Bound) {
-            // Then check that body is static/promotable
-            if Self::can_be_promoted(&s.body) {
-                let approx_size =
-                    Self::approx_size(&s.body) + APPROX_WHILE_REPEAT_SIZE;
-                let latency = Self::get_inferred_latency(&s.body) * num_repeats;
-                // Then check that it reaches the threshold
-                if approx_size > self.threshold
-                    && self.within_cycle_limit(latency)
-                {
-                    // Turn repeat into static repeat
-                    let sc = self.convert_to_static(&mut s.body, &mut builder);
-                    let static_repeat = ir::StaticControl::repeat(
-                        num_repeats,
-                        latency,
-                        Box::new(sc),
-                    );
-                    return Ok(Action::Change(Box::new(ir::Control::Static(
-                        static_repeat,
-                    ))));
-                } else {
-                    // Attach static_promote attribute since parent control may
-                    // want to promote
-                    s.attributes.insert(
-                        ir::NumAttr::PromoteStatic,
-                        num_repeats * Self::get_inferred_latency(&s.body),
-                    )
-                }
+        // First check that while loop is promotable
+        if let Some(latency) = s.attributes.get(ir::NumAttr::PromoteStatic) {
+            let approx_size =
+                Self::approx_size(&s.body) + APPROX_WHILE_REPEAT_SIZE;
+            // Then check that it fits the heuristics
+            if approx_size > self.threshold && self.within_cycle_limit(latency)
+            {
+                // Turn repeat into static repeat
+                let sc = self.convert_to_static(&mut s.body, &mut builder);
+                let static_repeat = ir::StaticControl::repeat(
+                    s.attributes.get(ir::NumAttr::Bound).unwrap_or_else(|| {
+                        unreachable!(
+                            "Unbounded loop has has @promotable attribute"
+                        )
+                    }),
+                    latency,
+                    Box::new(sc),
+                );
+                return Ok(Action::Change(Box::new(ir::Control::Static(
+                    static_repeat,
+                ))));
+            }
+            // If this takes too many cycles, then we will
+            // never promote this if statement, meaning we will never promote any
+            // of its parents. We can therefore safely remove the `@promotable` attribute.
+            // This isn't strictly necessary, but it is helpful for parent control
+            // programs applying heuristics.
+            if !(self.within_cycle_limit(latency)) {
+                s.attributes.remove(ir::NumAttr::PromoteStatic);
             }
         }
         Ok(Action::Continue)
@@ -1171,11 +787,9 @@ impl Visitor for StaticPromotion {
         _comps: &[ir::Component],
     ) -> VisResult {
         let mut builder = ir::Builder::new(comp, sigs);
-        if Self::can_be_promoted(&s.body) {
-            // Body can be promoted
+        if let Some(latency) = s.attributes.get(ir::NumAttr::PromoteStatic) {
             let approx_size =
                 Self::approx_size(&s.body) + APPROX_WHILE_REPEAT_SIZE;
-            let latency = Self::get_inferred_latency(&s.body) * s.num_repeats;
             if approx_size > self.threshold && self.within_cycle_limit(latency)
             {
                 // Meets size threshold, so turn repeat into static repeat
@@ -1188,14 +802,14 @@ impl Visitor for StaticPromotion {
                 return Ok(Action::Change(Box::new(ir::Control::Static(
                     static_repeat,
                 ))));
-            } else {
-                // Doesn't meet threshold.
-                // Attach static_promote attribute since parent control may
-                // want to promote
-                s.attributes.insert(
-                    ir::NumAttr::PromoteStatic,
-                    s.num_repeats * Self::get_inferred_latency(&s.body),
-                )
+            }
+            // If this takes too many cycles, then we will
+            // never promote this if statement, meaning we will never promote any
+            // of its parents. We can therefore safely remove the `@promotable` attribute.
+            // This isn't strictly necessary, but it is helpful for parent control
+            // programs applying heuristics.
+            if !(self.within_cycle_limit(latency)) {
+                s.attributes.remove(ir::NumAttr::PromoteStatic);
             }
         }
         Ok(Action::Continue)
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index aa7899c04c..e10bfa6d16 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -14,7 +14,7 @@
 
 # Running Calyx Programs
 
-- [`fud`: The Calyx Driver](./running-calyx/fud/index.md)
+- [fud: The Calyx Driver](./running-calyx/fud/index.md)
   - [Examples](./running-calyx/fud/examples.md)
   - [Xilinx Tools](./running-calyx/fud/xilinx.md)
     - [AXI Generation](./running-calyx/fud/axi-gen.md)
@@ -22,6 +22,7 @@
   - [Multiple Paths](./running-calyx/fud/multiple-paths.md)
   - [CIRCT](./running-calyx/fud/circt.md)
   - [Resource Estimation](./running-calyx/fud/resource-estimation.md)
+- [fud2: Experimental Driver](./running-calyx/fud2.md)
 - [Interfacing with Calyx RTL](./running-calyx/interfacing.md)
 - [The Calyx Interpreter](./running-calyx/interpreter.md)
 
diff --git a/docs/running-calyx/fud2.md b/docs/running-calyx/fud2.md
new file mode 100644
index 0000000000..d847962a20
--- /dev/null
+++ b/docs/running-calyx/fud2.md
@@ -0,0 +1,78 @@
+# fud2: An Experimental Successor to fud
+
+[fud][] is the compiler driver tool for orchestrating the Calyx ecosystem.
+fud2 is an experiment in building a new driver that works like fud that adds some fundamental new capabilities and resolves some underlying problems.
+
+"Original" fud is still the right tool for almost all jobs; fud2 is in an experimental phase and does not support everything fud can do.
+Someday, fud2 may supplant fud, but it needs more work before it is ready to do that.
+Until then, fud remains your first choice for all your build-related needs.
+
+[fud]: ./fud/index.md
+
+## Set Up
+
+fud2 is a Rust tool, so you can build it along with everything else in this monorepo with `cargo build`.
+You might then want to do something like ``ln -s `pwd`/target/debug/fud2 ~/.local/bin`` for easy access to the `fud2` binary.
+
+fud2 depends on [Ninja][].
+Install it using your OS package manager or by downloading a binary.
+
+Create a configuration file at `~/.config/fud2.toml`, using the path to your checkout of the Calyx git repository:
+
+```toml
+rsrc = ".../calyx/fud2/rsrc"
+
+[calyx]
+base = ".../calyx"
+```
+
+Now you're ready to use fud2.
+
+[ninja]: https://ninja-build.org
+
+## General Use
+
+You can see complete command-line documentation with `fud2 --help`.
+But generally, you want to do something like this:
+
+    $ fud2 <IN> -o <OUT>
+
+For example, use this to compile a Calyx program to Verilog:
+
+    $ fud2 foo.futil -o bar.sv
+
+fud2 tries to automatically guess the input and output formats using filename extensions.
+If that doesn't work, you can choose for it with `--from <state>` and `--to <state>`;
+for example, this is a more explicit version of the above:
+
+    $ fud2 foo.futil -o bar.sv --from calyx --to verilog
+
+You can also omit the input and output filenames to instead use stdin and stdout.
+In that case, `--from` and `--to` respectively are required.
+So here's yet another way to do the same thing:
+
+    $ fud2 --from calyx --to verilog < foo.futil > bar.sv
+
+This is handy if you just want to print the result of a build to the console:
+
+    $ fud2 foo.futil --to verilog
+
+Some operations use other configuration options, which can come from either your `fud2.toml` or the command line.
+Use `--set key=value` to override any such option.
+
+## Advanced Options
+
+Here are some options you might need:
+
+* By default, fud2 runs the build in a directory called `.fud2` within the working directory. It automatically deletes this directory when the build is done.
+    * It can be useful to keep this build directory around for debugging or as a "cache" for future builds. Use `--keep` to prevent fud2 from deleting the build directory.
+    * You can also tell fud2 to use a different build directory with `--dir`. If you give it an existing directory, it will never be deleted, even without `--keep`. (Only "fresh" build directories are automatically cleaned up.)
+* If you don't like the operation path that fud2 selected for your build, you can control it with `--through <OP>`. fud2 will search the operation graph for a path that contains that op. You can provide this option multiple times; fud2 will look for paths that contain *all* these operations, in order.
+* You can choose one of several modes with `-m <NAME>`:
+    * `run`: Actually execute a build. The default.
+    * `gen`: Generate the Ninja build file in the build directory, but don't actually run the build. The default `run` mode is therefore approximately like doing `fud2 -m gen && ninja -C .fud2`.
+    * `emit`: Just print the Ninja build file to stdout. The `gen` mode is therefore approximately `fud2 -m emit > .fud2/build.ninja`.
+    * `plan`: Print a brief description of the plan, i.e., the sequence of operations that the build would run.
+    * `dot`: Print a [GraphViz][] depiction of the plan. Try `fud2 -m dot | dot -Tpdf > graph.pdf` and take a look.
+
+[graphviz]: https://graphviz.org
diff --git a/fud2/Cargo.toml b/fud2/Cargo.toml
new file mode 100644
index 0000000000..31e4c39a3a
--- /dev/null
+++ b/fud2/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "fud2"
+version.workspace = true
+edition.workspace = true
+
+[dependencies]
+fake = { path = "fake" }
+anyhow.workspace = true
diff --git a/fud2/fake/Cargo.toml b/fud2/fake/Cargo.toml
new file mode 100644
index 0000000000..021d7cd286
--- /dev/null
+++ b/fud2/fake/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "fake"
+version.workspace = true
+edition.workspace = true
+
+[dependencies]
+argh.workspace = true
+cranelift-entity = "0.103.0"
+serde.workspace = true
+figment = { version = "0.10.12", features = ["toml"] }
+pathdiff = { version = "0.2.1", features = ["camino"] }
+camino = "1.1.6"
+anyhow.workspace = true
+log.workspace = true
+env_logger.workspace = true
diff --git a/fud2/fake/src/cli.rs b/fud2/fake/src/cli.rs
new file mode 100644
index 0000000000..1af8660f1c
--- /dev/null
+++ b/fud2/fake/src/cli.rs
@@ -0,0 +1,239 @@
+use crate::config;
+use crate::driver::{Driver, Request, StateRef};
+use crate::run::Run;
+use anyhow::{anyhow, bail};
+use argh::FromArgs;
+use camino::{Utf8Path, Utf8PathBuf};
+use std::fmt::Display;
+use std::str::FromStr;
+
+enum Mode {
+    EmitNinja,
+    ShowPlan,
+    ShowDot,
+    Generate,
+    Run,
+}
+
+impl FromStr for Mode {
+    type Err = String;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        match s {
+            "emit" => Ok(Mode::EmitNinja),
+            "plan" => Ok(Mode::ShowPlan),
+            "gen" => Ok(Mode::Generate),
+            "run" => Ok(Mode::Run),
+            "dot" => Ok(Mode::ShowDot),
+            _ => Err("unknown mode".to_string()),
+        }
+    }
+}
+
+impl Display for Mode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Mode::EmitNinja => write!(f, "emit"),
+            Mode::ShowPlan => write!(f, "plan"),
+            Mode::Generate => write!(f, "gen"),
+            Mode::Run => write!(f, "run"),
+            Mode::ShowDot => write!(f, "dot"),
+        }
+    }
+}
+
+/// edit the configuration file
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand, name = "edit-config")]
+pub struct EditConfig {
+    /// the editor to use
+    #[argh(option, short = 'e')]
+    pub editor: Option<String>,
+}
+
+/// supported subcommands
+#[derive(FromArgs, PartialEq, Debug)]
+#[argh(subcommand)]
+pub enum Subcommand {
+    /// edit the configuration file
+    EditConfig(EditConfig),
+}
+
+#[derive(FromArgs)]
+/// A generic compiler driver.
+struct FakeArgs {
+    #[argh(subcommand)]
+    pub sub: Option<Subcommand>,
+
+    /// the input file
+    #[argh(positional)]
+    input: Option<Utf8PathBuf>,
+
+    /// the output file
+    #[argh(option, short = 'o')]
+    output: Option<Utf8PathBuf>,
+
+    /// the state to start from
+    #[argh(option)]
+    from: Option<String>,
+
+    /// the state to produce
+    #[argh(option)]
+    to: Option<String>,
+
+    /// execution mode (run, plan, emit, gen, dot)
+    #[argh(option, short = 'm', default = "Mode::Run")]
+    mode: Mode,
+
+    /// working directory for the build
+    #[argh(option)]
+    dir: Option<Utf8PathBuf>,
+
+    /// in run mode, keep the temporary directory
+    #[argh(switch)]
+    keep: Option<bool>,
+
+    /// set a configuration variable (key=value)
+    #[argh(option, short = 's')]
+    set: Vec<String>,
+
+    /// route the conversion through a specific operation
+    #[argh(option)]
+    through: Vec<String>,
+
+    /// verbose ouput
+    #[argh(switch, short = 'v')]
+    verbose: Option<bool>,
+
+    /// log level for debugging fud internal
+    #[argh(option, long = "log", default = "log::LevelFilter::Warn")]
+    pub log_level: log::LevelFilter,
+}
+
+fn from_state(driver: &Driver, args: &FakeArgs) -> anyhow::Result<StateRef> {
+    match &args.from {
+        Some(name) => driver
+            .get_state(name)
+            .ok_or(anyhow!("unknown --from state")),
+        None => match args.input {
+            Some(ref input) => driver
+                .guess_state(input)
+                .ok_or(anyhow!("could not infer input state")),
+            None => bail!("specify an input file or use --from"),
+        },
+    }
+}
+
+fn to_state(driver: &Driver, args: &FakeArgs) -> anyhow::Result<StateRef> {
+    match &args.to {
+        Some(name) => {
+            driver.get_state(name).ok_or(anyhow!("unknown --to state"))
+        }
+        None => match &args.output {
+            Some(out) => driver
+                .guess_state(out)
+                .ok_or(anyhow!("could not infer output state")),
+            None => Err(anyhow!("specify an output file or use --to")),
+        },
+    }
+}
+
+fn get_request(driver: &Driver, args: &FakeArgs) -> anyhow::Result<Request> {
+    // The default working directory (if not specified) depends on the mode.
+    let default_workdir = driver.default_workdir();
+    let workdir = args.dir.as_deref().unwrap_or_else(|| match args.mode {
+        Mode::Generate | Mode::Run => default_workdir.as_ref(),
+        _ => Utf8Path::new("."),
+    });
+
+    // Find all the operations to route through.
+    let through: Result<Vec<_>, _> = args
+        .through
+        .iter()
+        .map(|s| {
+            driver
+                .get_op(s)
+                .ok_or(anyhow!("unknown --through op {}", s))
+        })
+        .collect();
+
+    Ok(Request {
+        start_file: args.input.clone(),
+        start_state: from_state(driver, args)?,
+        end_file: args.output.clone(),
+        end_state: to_state(driver, args)?,
+        through: through?,
+        workdir: workdir.into(),
+    })
+}
+
+pub fn cli(driver: &Driver) -> anyhow::Result<()> {
+    let args: FakeArgs = argh::from_env();
+
+    // enable tracing
+    env_logger::Builder::new()
+        .format_timestamp(None)
+        .filter_level(args.log_level)
+        .target(env_logger::Target::Stderr)
+        .init();
+
+    // edit the configuration file
+    if let Some(Subcommand::EditConfig(EditConfig { editor })) = args.sub {
+        let editor =
+            if let Some(e) = editor.or_else(|| std::env::var("EDITOR").ok()) {
+                e
+            } else {
+                bail!("$EDITOR not specified. Use -e")
+            };
+        let config_path = config::config_path(&driver.name);
+        log::info!("Editing config at {}", config_path.display());
+        let status = std::process::Command::new(editor)
+            .arg(config_path)
+            .status()
+            .expect("failed to execute editor");
+        if !status.success() {
+            bail!("editor exited with status {}", status);
+        }
+        return Ok(());
+    }
+
+    // Make a plan.
+    let req = get_request(driver, &args)?;
+    let workdir = req.workdir.clone();
+    let plan = driver.plan(req).ok_or(anyhow!("could not find path"))?;
+
+    // Configure.
+    let mut run = Run::new(driver, plan);
+
+    // Override some global config options.
+    if let Some(keep) = args.keep {
+        run.global_config.keep_build_dir = keep;
+    }
+    if let Some(verbose) = args.verbose {
+        run.global_config.verbose = verbose;
+    }
+
+    // Use `--set` arguments to override configuration values.
+    for set in args.set {
+        let mut parts = set.splitn(2, '=');
+        let key = parts.next().unwrap();
+        let value = parts
+            .next()
+            .ok_or(anyhow!("--set arguments must be in key=value form"))?;
+        let dict = figment::util::nest(key, value.into());
+        run.config_data = run
+            .config_data
+            .merge(figment::providers::Serialized::defaults(dict));
+    }
+
+    // Execute.
+    match args.mode {
+        Mode::ShowPlan => run.show(),
+        Mode::ShowDot => run.show_dot(),
+        Mode::EmitNinja => run.emit_to_stdout()?,
+        Mode::Generate => run.emit_to_dir(&workdir)?,
+        Mode::Run => run.emit_and_run(&workdir)?,
+    }
+
+    Ok(())
+}
diff --git a/fud2/fake/src/config.rs b/fud2/fake/src/config.rs
new file mode 100644
index 0000000000..3e90d83ceb
--- /dev/null
+++ b/fud2/fake/src/config.rs
@@ -0,0 +1,49 @@
+use figment::{
+    providers::{Format, Serialized, Toml},
+    Figment,
+};
+use serde::{Deserialize, Serialize};
+use std::{env, path::Path};
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct GlobalConfig {
+    /// The `ninja` command to execute in `run` mode.
+    pub ninja: String,
+
+    /// Never delete the temporary directory used to execute ninja in `run` mode.
+    pub keep_build_dir: bool,
+
+    /// Enable verbose output.
+    pub verbose: bool,
+}
+
+impl Default for GlobalConfig {
+    fn default() -> Self {
+        Self {
+            ninja: "ninja".to_string(),
+            keep_build_dir: false,
+            verbose: false,
+        }
+    }
+}
+
+/// Location of the configuration file
+pub(crate) fn config_path(name: &str) -> std::path::PathBuf {
+    // The configuration is usually at `~/.config/driver_name.toml`.
+    let config_base = env::var("XDG_CONFIG_HOME").unwrap_or_else(|_| {
+        let home = env::var("HOME").expect("$HOME not set");
+        home + "/.config"
+    });
+    let config_path = Path::new(&config_base).join(name).with_extension("toml");
+    log::info!("Loading config from {}", config_path.display());
+    config_path
+}
+
+/// Load configuration data from the standard config file location.
+pub(crate) fn load_config(name: &str) -> Figment {
+    let config_path = config_path(name);
+
+    // Use our defaults, overridden by the TOML config file.
+    Figment::from(Serialized::defaults(GlobalConfig::default()))
+        .merge(Toml::file(config_path))
+}
diff --git a/fud2/fake/src/driver.rs b/fud2/fake/src/driver.rs
new file mode 100644
index 0000000000..ee4945909d
--- /dev/null
+++ b/fud2/fake/src/driver.rs
@@ -0,0 +1,400 @@
+use crate::run;
+use camino::{Utf8Path, Utf8PathBuf};
+use cranelift_entity::{entity_impl, PrimaryMap, SecondaryMap};
+use pathdiff::diff_utf8_paths;
+
+/// A State is a type of file that Operations produce or consume.
+pub struct State {
+    /// The name of the state, for the UI.
+    pub name: String,
+
+    /// The file extensions that this state can be represented by.
+    ///
+    /// The first extension in the list is used when generating a new filename for the state. If
+    /// the list is empty, this is a "pseudo-state" that doesn't correspond to an actual file.
+    /// Pseudo-states can only be final outputs; they are appropraite for representing actions that
+    /// interact directly with the user, for example.
+    pub extensions: Vec<String>,
+}
+
+/// A reference to a State.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct StateRef(u32);
+entity_impl!(StateRef, "state");
+
+/// An Operation transforms files from one State to another.
+pub struct Operation {
+    pub name: String,
+    pub input: StateRef,
+    pub output: StateRef,
+    pub setups: Vec<SetupRef>,
+    pub emit: Box<dyn run::EmitBuild>,
+}
+
+/// A reference to an Operation.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct OpRef(u32);
+entity_impl!(OpRef, "op");
+
+/// A Setup runs at configuration time and produces Ninja machinery for Operations.
+pub struct Setup {
+    pub name: String,
+    pub emit: Box<dyn run::EmitSetup>,
+}
+
+/// A reference to a Setup.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct SetupRef(u32);
+entity_impl!(SetupRef, "setup");
+
+impl State {
+    /// Check whether a filename extension indicates this state.
+    fn ext_matches(&self, ext: &str) -> bool {
+        self.extensions.iter().any(|e| e == ext)
+    }
+
+    /// Is this a "pseudo-state": doesn't correspond to an actual file, and must be an output state?
+    fn is_pseudo(&self) -> bool {
+        self.extensions.is_empty()
+    }
+}
+
+/// Get a version of `path` that works when the working directory is `base`. This is
+/// opportunistically a relative path, but we can always fall back to an absolute path to make sure
+/// the path still works.
+pub fn relative_path(path: &Utf8Path, base: &Utf8Path) -> Utf8PathBuf {
+    match diff_utf8_paths(path, base) {
+        Some(p) => p,
+        None => path
+            .canonicalize_utf8()
+            .expect("could not get absolute path"),
+    }
+}
+
+#[derive(PartialEq)]
+enum Destination {
+    State(StateRef),
+    Op(OpRef),
+}
+
+/// A Driver encapsulates a set of States and the Operations that can transform between them. It
+/// contains all the machinery to perform builds in a given ecosystem.
+pub struct Driver {
+    pub name: String,
+    pub setups: PrimaryMap<SetupRef, Setup>,
+    pub states: PrimaryMap<StateRef, State>,
+    pub ops: PrimaryMap<OpRef, Operation>,
+}
+
+impl Driver {
+    /// Find a chain of Operations from the `start` state to the `end`, which may be a state or the
+    /// final operation in the chain.
+    fn find_path_segment(
+        &self,
+        start: StateRef,
+        end: Destination,
+    ) -> Option<Vec<OpRef>> {
+        // Our start state is the input.
+        let mut visited = SecondaryMap::<StateRef, bool>::new();
+        visited[start] = true;
+
+        // Build the incoming edges for each vertex.
+        let mut breadcrumbs = SecondaryMap::<StateRef, Option<OpRef>>::new();
+
+        // Breadth-first search.
+        let mut state_queue: Vec<StateRef> = vec![start];
+        while !state_queue.is_empty() {
+            let cur_state = state_queue.remove(0);
+
+            // Finish when we reach the goal vertex.
+            if end == Destination::State(cur_state) {
+                break;
+            }
+
+            // Traverse any edge from the current state to an unvisited state.
+            for (op_ref, op) in self.ops.iter() {
+                if op.input == cur_state && !visited[op.output] {
+                    state_queue.push(op.output);
+                    visited[op.output] = true;
+                    breadcrumbs[op.output] = Some(op_ref);
+                }
+
+                // Finish when we reach the goal edge.
+                if end == Destination::Op(op_ref) {
+                    break;
+                }
+            }
+        }
+
+        // Traverse the breadcrumbs backward to build up the path back from output to input.
+        let mut op_path: Vec<OpRef> = vec![];
+        let mut cur_state = match end {
+            Destination::State(state) => state,
+            Destination::Op(op) => {
+                op_path.push(op);
+                self.ops[op].input
+            }
+        };
+        while cur_state != start {
+            match breadcrumbs[cur_state] {
+                Some(op) => {
+                    op_path.push(op);
+                    cur_state = self.ops[op].input;
+                }
+                None => return None,
+            }
+        }
+        op_path.reverse();
+
+        Some(op_path)
+    }
+
+    /// Find a chain of operations from the `start` state to the `end` state, passing through each
+    /// `through` operation in order.
+    pub fn find_path(
+        &self,
+        start: StateRef,
+        end: StateRef,
+        through: &[OpRef],
+    ) -> Option<Vec<OpRef>> {
+        let mut cur_state = start;
+        let mut op_path: Vec<OpRef> = vec![];
+
+        // Build path segments through each through required operation.
+        for op in through {
+            let segment =
+                self.find_path_segment(cur_state, Destination::Op(*op))?;
+            op_path.extend(segment);
+            cur_state = self.ops[*op].output;
+        }
+
+        // Build the final path segment to the destination state.
+        let segment =
+            self.find_path_segment(cur_state, Destination::State(end))?;
+        op_path.extend(segment);
+
+        Some(op_path)
+    }
+
+    /// Generate a filename with an extension appropriate for the given State.
+    fn gen_name(&self, stem: &str, state: StateRef) -> Utf8PathBuf {
+        let state = &self.states[state];
+        if state.is_pseudo() {
+            Utf8PathBuf::from(format!("_pseudo_{}", state.name))
+        } else {
+            // TODO avoid collisions in case we reuse extensions...
+            Utf8PathBuf::from(stem).with_extension(&state.extensions[0])
+        }
+    }
+
+    pub fn plan(&self, req: Request) -> Option<Plan> {
+        // Find a path through the states.
+        let path =
+            self.find_path(req.start_state, req.end_state, &req.through)?;
+
+        let mut steps: Vec<(OpRef, Utf8PathBuf)> = vec![];
+
+        // Get the initial input filename and the stem to use to generate all intermediate filenames.
+        let (stdin, start_file) = match req.start_file {
+            Some(path) => (false, relative_path(&path, &req.workdir)),
+            None => (true, "stdin".into()),
+        };
+        let stem = start_file.file_stem().unwrap();
+
+        // Generate filenames for each step.
+        steps.extend(path.into_iter().map(|op| {
+            let filename = self.gen_name(stem, self.ops[op].output);
+            (op, filename)
+        }));
+
+        // If we have a specified output filename, use that instead of the generated one.
+        let stdout = if let Some(end_file) = req.end_file {
+            // TODO Can we just avoid generating the unused filename in the first place?
+            let last_step = steps.last_mut().expect("no steps");
+            last_step.1 = relative_path(&end_file, &req.workdir);
+            false
+        } else {
+            // Print to stdout if the last state is a real (non-pseudo) state.
+            !self.states[req.end_state].is_pseudo()
+        };
+
+        Some(Plan {
+            start: start_file,
+            steps,
+            workdir: req.workdir,
+            stdin,
+            stdout,
+        })
+    }
+
+    pub fn guess_state(&self, path: &Utf8Path) -> Option<StateRef> {
+        let ext = path.extension()?;
+        self.states
+            .iter()
+            .find(|(_, state_data)| state_data.ext_matches(ext))
+            .map(|(state, _)| state)
+    }
+
+    pub fn get_state(&self, name: &str) -> Option<StateRef> {
+        self.states
+            .iter()
+            .find(|(_, state_data)| state_data.name == name)
+            .map(|(state, _)| state)
+    }
+
+    pub fn get_op(&self, name: &str) -> Option<OpRef> {
+        self.ops
+            .iter()
+            .find(|(_, op_data)| op_data.name == name)
+            .map(|(op, _)| op)
+    }
+
+    /// The working directory to use when running a build.
+    pub fn default_workdir(&self) -> Utf8PathBuf {
+        format!(".{}", &self.name).into()
+    }
+}
+
+pub struct DriverBuilder {
+    name: String,
+    setups: PrimaryMap<SetupRef, Setup>,
+    states: PrimaryMap<StateRef, State>,
+    ops: PrimaryMap<OpRef, Operation>,
+}
+
+impl DriverBuilder {
+    pub fn new(name: &str) -> Self {
+        Self {
+            name: name.to_string(),
+            setups: Default::default(),
+            states: Default::default(),
+            ops: Default::default(),
+        }
+    }
+
+    pub fn state(&mut self, name: &str, extensions: &[&str]) -> StateRef {
+        self.states.push(State {
+            name: name.to_string(),
+            extensions: extensions.iter().map(|s| s.to_string()).collect(),
+        })
+    }
+
+    fn add_op<T: run::EmitBuild + 'static>(
+        &mut self,
+        name: &str,
+        setups: &[SetupRef],
+        input: StateRef,
+        output: StateRef,
+        emit: T,
+    ) -> OpRef {
+        self.ops.push(Operation {
+            name: name.into(),
+            setups: setups.into(),
+            input,
+            output,
+            emit: Box::new(emit),
+        })
+    }
+
+    pub fn add_setup<T: run::EmitSetup + 'static>(
+        &mut self,
+        name: &str,
+        emit: T,
+    ) -> SetupRef {
+        self.setups.push(Setup {
+            name: name.into(),
+            emit: Box::new(emit),
+        })
+    }
+
+    pub fn setup(&mut self, name: &str, func: run::EmitSetupFn) -> SetupRef {
+        self.add_setup(name, func)
+    }
+
+    pub fn op(
+        &mut self,
+        name: &str,
+        setups: &[SetupRef],
+        input: StateRef,
+        output: StateRef,
+        build: run::EmitBuildFn,
+    ) -> OpRef {
+        self.add_op(name, setups, input, output, build)
+    }
+
+    pub fn rule(
+        &mut self,
+        setups: &[SetupRef],
+        input: StateRef,
+        output: StateRef,
+        rule_name: &str,
+    ) -> OpRef {
+        self.add_op(
+            rule_name,
+            setups,
+            input,
+            output,
+            run::EmitRuleBuild {
+                rule_name: rule_name.to_string(),
+            },
+        )
+    }
+
+    pub fn build(self) -> Driver {
+        Driver {
+            name: self.name,
+            setups: self.setups,
+            states: self.states,
+            ops: self.ops,
+        }
+    }
+}
+
+/// A request to the Driver directing it what to build.
+#[derive(Debug)]
+pub struct Request {
+    /// The input format.
+    pub start_state: StateRef,
+
+    /// The output format to produce.
+    pub end_state: StateRef,
+
+    /// The filename to read the input from, or None to read from stdin.
+    pub start_file: Option<Utf8PathBuf>,
+
+    /// The filename to write the output to, or None to print to stdout.
+    pub end_file: Option<Utf8PathBuf>,
+
+    /// A sequence of operators to route the conversion through.
+    pub through: Vec<OpRef>,
+
+    /// The working directory for the build.
+    pub workdir: Utf8PathBuf,
+}
+
+#[derive(Debug)]
+pub struct Plan {
+    /// The input to the first step.
+    pub start: Utf8PathBuf,
+
+    /// The chain of operations to run and each step's output file.
+    pub steps: Vec<(OpRef, Utf8PathBuf)>,
+
+    /// The directory that the build will happen in.
+    pub workdir: Utf8PathBuf,
+
+    /// Read the first input from stdin.
+    pub stdin: bool,
+
+    /// Write the final output to stdout.
+    pub stdout: bool,
+}
+
+impl Plan {
+    pub fn end(&self) -> &Utf8Path {
+        match self.steps.last() {
+            Some((_, path)) => path,
+            None => &self.start,
+        }
+    }
+}
diff --git a/fud2/fake/src/lib.rs b/fud2/fake/src/lib.rs
new file mode 100644
index 0000000000..2e2faa0a17
--- /dev/null
+++ b/fud2/fake/src/lib.rs
@@ -0,0 +1,6 @@
+pub mod cli;
+pub mod config;
+pub mod driver;
+pub mod run;
+
+pub use driver::{Driver, DriverBuilder};
diff --git a/fud2/fake/src/run.rs b/fud2/fake/src/run.rs
new file mode 100644
index 0000000000..780c62f53e
--- /dev/null
+++ b/fud2/fake/src/run.rs
@@ -0,0 +1,391 @@
+use crate::config;
+use crate::driver::{relative_path, Driver, OpRef, Plan, SetupRef, StateRef};
+use camino::{Utf8Path, Utf8PathBuf};
+use std::collections::{HashMap, HashSet};
+use std::io::Write;
+use std::process::Command;
+
+/// An error that arises while emitting the Ninja file.
+#[derive(Debug)]
+pub enum EmitError {
+    Io(std::io::Error),
+    MissingConfig(String),
+}
+
+impl From<std::io::Error> for EmitError {
+    fn from(e: std::io::Error) -> Self {
+        Self::Io(e)
+    }
+}
+
+impl std::fmt::Display for EmitError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match &self {
+            EmitError::Io(e) => write!(f, "{}", e),
+            EmitError::MissingConfig(s) => {
+                write!(f, "missing required config key: {}", s)
+            }
+        }
+    }
+}
+
+impl std::error::Error for EmitError {}
+
+pub type EmitResult = std::result::Result<(), EmitError>;
+
+/// Code to emit a Ninja `build` command.
+pub trait EmitBuild {
+    fn build(
+        &self,
+        emitter: &mut Emitter,
+        input: &str,
+        output: &str,
+    ) -> EmitResult;
+}
+
+pub type EmitBuildFn = fn(&mut Emitter, &str, &str) -> EmitResult;
+
+impl EmitBuild for EmitBuildFn {
+    fn build(
+        &self,
+        emitter: &mut Emitter,
+        input: &str,
+        output: &str,
+    ) -> EmitResult {
+        self(emitter, input, output)
+    }
+}
+
+// TODO make this unnecessary...
+/// A simple `build` emitter that just runs a Ninja rule.
+pub struct EmitRuleBuild {
+    pub rule_name: String,
+}
+
+impl EmitBuild for EmitRuleBuild {
+    fn build(
+        &self,
+        emitter: &mut Emitter,
+        input: &str,
+        output: &str,
+    ) -> EmitResult {
+        emitter.build(&self.rule_name, input, output)?;
+        Ok(())
+    }
+}
+
+/// Code to emit Ninja code at the setup stage.
+pub trait EmitSetup {
+    fn setup(&self, emitter: &mut Emitter) -> EmitResult;
+}
+
+pub type EmitSetupFn = fn(&mut Emitter) -> EmitResult;
+
+impl EmitSetup for EmitSetupFn {
+    fn setup(&self, emitter: &mut Emitter) -> EmitResult {
+        self(emitter)
+    }
+}
+
+pub struct Run<'a> {
+    pub driver: &'a Driver,
+    pub plan: Plan,
+    pub config_data: figment::Figment,
+    pub global_config: config::GlobalConfig,
+}
+
+impl<'a> Run<'a> {
+    pub fn new(driver: &'a Driver, plan: Plan) -> Self {
+        let config_data = config::load_config(&driver.name);
+        let global_config: config::GlobalConfig =
+            config_data.extract().expect("failed to load config");
+        Self {
+            driver,
+            plan,
+            config_data,
+            global_config,
+        }
+    }
+
+    /// Just print the plan for debugging purposes.
+    pub fn show(self) {
+        if self.plan.stdin {
+            println!("(stdin) -> {}", self.plan.start);
+        } else {
+            println!("start: {}", self.plan.start);
+        }
+        for (op, file) in self.plan.steps {
+            println!("{}: {} -> {}", op, self.driver.ops[op].name, file);
+        }
+        if self.plan.stdout {
+            println!("-> (stdout)");
+        }
+    }
+
+    /// Print a GraphViz representation of the plan.
+    pub fn show_dot(self) {
+        println!("digraph plan {{");
+        println!("  rankdir=LR;");
+        println!("  node[shape=box];");
+
+        // Record the states and ops that are actually used in the plan.
+        let mut states: HashMap<StateRef, String> = HashMap::new();
+        let mut ops: HashSet<OpRef> = HashSet::new();
+        let first_op = self.plan.steps[0].0;
+        states.insert(
+            self.driver.ops[first_op].input,
+            self.plan.start.to_string(),
+        );
+        for (op, file) in &self.plan.steps {
+            states.insert(self.driver.ops[*op].output, file.to_string());
+            ops.insert(*op);
+        }
+
+        // Show all states.
+        for (state_ref, state) in self.driver.states.iter() {
+            print!("  {} [", state_ref);
+            if let Some(filename) = states.get(&state_ref) {
+                print!(
+                    "label=\"{}\n{}\" penwidth=3 fillcolor=gray style=filled",
+                    state.name, filename
+                );
+            } else {
+                print!("label=\"{}\"", state.name);
+            }
+            println!("];");
+        }
+
+        // Show all operations.
+        for (op_ref, op) in self.driver.ops.iter() {
+            print!("  {} -> {} [label=\"{}\"", op.input, op.output, op.name);
+            if ops.contains(&op_ref) {
+                print!(" penwidth=3");
+            }
+            println!("];");
+        }
+
+        println!("}}");
+    }
+
+    /// Print the `build.ninja` file to stdout.
+    pub fn emit_to_stdout(&self) -> EmitResult {
+        self.emit(std::io::stdout())
+    }
+
+    /// Ensure that a directory exists and write `build.ninja` inside it.
+    pub fn emit_to_dir(&self, dir: &Utf8Path) -> EmitResult {
+        std::fs::create_dir_all(dir)?;
+        let ninja_path = dir.join("build.ninja");
+        let ninja_file = std::fs::File::create(ninja_path)?;
+
+        self.emit(ninja_file)
+    }
+
+    /// Emit `build.ninja` to a temporary directory and then actually execute ninja.
+    pub fn emit_and_run(&self, dir: &Utf8Path) -> EmitResult {
+        // Emit the Ninja file.
+        let stale_dir = dir.exists();
+        self.emit_to_dir(dir)?;
+
+        // Capture stdin.
+        if self.plan.stdin {
+            let stdin_file = std::fs::File::create(
+                self.plan.workdir.join(&self.plan.start),
+            )?;
+            std::io::copy(
+                &mut std::io::stdin(),
+                &mut std::io::BufWriter::new(stdin_file),
+            )?;
+        }
+
+        // Run `ninja` in the working directory.
+        let mut cmd = Command::new(&self.global_config.ninja);
+        cmd.current_dir(dir);
+        if self.plan.stdout && !self.global_config.verbose {
+            // When we're printing to stdout, suppress Ninja's output by default.
+            cmd.stdout(std::process::Stdio::null());
+        }
+        cmd.status()?;
+
+        // Emit stdout.
+        if self.plan.stdout {
+            let stdout_file =
+                std::fs::File::open(self.plan.workdir.join(self.plan.end()))?;
+            std::io::copy(
+                &mut std::io::BufReader::new(stdout_file),
+                &mut std::io::stdout(),
+            )?;
+        }
+
+        // Remove the temporary directory unless it already existed at the start *or* the user specified `--keep`.
+        if !self.global_config.keep_build_dir && !stale_dir {
+            std::fs::remove_dir_all(dir)?;
+        }
+
+        Ok(())
+    }
+
+    fn emit<T: Write + 'static>(&self, out: T) -> EmitResult {
+        let mut emitter = Emitter::new(
+            out,
+            self.config_data.clone(),
+            self.plan.workdir.clone(),
+        );
+
+        // Emit the setup for each operation used in the plan, only once.
+        let mut done_setups = HashSet::<SetupRef>::new();
+        for (op, _) in &self.plan.steps {
+            for setup in &self.driver.ops[*op].setups {
+                if done_setups.insert(*setup) {
+                    let setup = &self.driver.setups[*setup];
+                    writeln!(emitter.out, "# {}", setup.name)?;
+                    setup.emit.setup(&mut emitter)?;
+                    writeln!(emitter.out)?;
+                }
+            }
+        }
+
+        // Emit the build commands for each step in the plan.
+        emitter.comment("build targets")?;
+        let mut last_file = &self.plan.start;
+        for (op, out_file) in &self.plan.steps {
+            let op = &self.driver.ops[*op];
+            op.emit.build(
+                &mut emitter,
+                last_file.as_str(),
+                out_file.as_str(),
+            )?;
+            last_file = out_file;
+        }
+        writeln!(emitter.out)?;
+
+        // Mark the last file as the default target.
+        writeln!(emitter.out, "default {}", last_file)?;
+
+        Ok(())
+    }
+}
+
+pub struct Emitter {
+    pub out: Box<dyn Write>,
+    pub config_data: figment::Figment,
+    pub workdir: Utf8PathBuf,
+}
+
+impl Emitter {
+    fn new<T: Write + 'static>(
+        out: T,
+        config_data: figment::Figment,
+        workdir: Utf8PathBuf,
+    ) -> Self {
+        Self {
+            out: Box::new(out),
+            config_data,
+            workdir,
+        }
+    }
+
+    /// Fetch a configuration value, or panic if it's missing.
+    pub fn config_val(&self, key: &str) -> Result<String, EmitError> {
+        self.config_data
+            .extract_inner::<String>(key)
+            .map_err(|_| EmitError::MissingConfig(key.to_string()))
+    }
+
+    /// Fetch a configuration value, using a default if it's missing.
+    pub fn config_or(&self, key: &str, default: &str) -> String {
+        self.config_data
+            .extract_inner::<String>(key)
+            .unwrap_or_else(|_| default.into())
+    }
+
+    /// Emit a Ninja variable declaration for `name` based on the configured value for `key`.
+    pub fn config_var(&mut self, name: &str, key: &str) -> EmitResult {
+        self.var(name, &self.config_val(key)?)?;
+        Ok(())
+    }
+
+    /// Emit a Ninja variable declaration for `name` based on the configured value for `key`, or a
+    /// default value if it's missing.
+    pub fn config_var_or(
+        &mut self,
+        name: &str,
+        key: &str,
+        default: &str,
+    ) -> std::io::Result<()> {
+        self.var(name, &self.config_or(key, default))
+    }
+
+    /// Emit a Ninja variable declaration.
+    pub fn var(&mut self, name: &str, value: &str) -> std::io::Result<()> {
+        writeln!(self.out, "{} = {}", name, value)
+    }
+
+    /// Emit a Ninja rule definition.
+    pub fn rule(&mut self, name: &str, command: &str) -> std::io::Result<()> {
+        writeln!(self.out, "rule {}", name)?;
+        writeln!(self.out, "  command = {}", command)
+    }
+
+    /// Emit a simple Ninja build command with one dependency.
+    pub fn build(
+        &mut self,
+        rule: &str,
+        input: &str,
+        output: &str,
+    ) -> std::io::Result<()> {
+        self.build_cmd(&[output], rule, &[input], &[])
+    }
+
+    /// Emit a Ninja build command.
+    pub fn build_cmd(
+        &mut self,
+        targets: &[&str],
+        rule: &str,
+        deps: &[&str],
+        implicit_deps: &[&str],
+    ) -> std::io::Result<()> {
+        write!(self.out, "build")?;
+        for target in targets {
+            write!(self.out, " {}", target)?;
+        }
+        write!(self.out, ": {}", rule)?;
+        for dep in deps {
+            write!(self.out, " {}", dep)?;
+        }
+        if !implicit_deps.is_empty() {
+            write!(self.out, " |")?;
+            for dep in implicit_deps {
+                write!(self.out, " {}", dep)?;
+            }
+        }
+        writeln!(self.out)?;
+        Ok(())
+    }
+
+    /// Emit a Ninja comment.
+    pub fn comment(&mut self, text: &str) -> std::io::Result<()> {
+        writeln!(self.out, "# {}", text)?;
+        Ok(())
+    }
+
+    /// Add a file to the build directory.
+    pub fn add_file(&self, name: &str, contents: &[u8]) -> std::io::Result<()> {
+        let path = self.workdir.join(name);
+        std::fs::write(path, contents)?;
+        Ok(())
+    }
+
+    /// Get a path to an external file. The input `path` may be relative to our original
+    /// invocation; we make it relative to the build directory so it can safely be used in the
+    /// Ninja file.
+    pub fn external_path(&self, path: &Utf8Path) -> Utf8PathBuf {
+        relative_path(path, &self.workdir)
+    }
+
+    /// Add a variable parameter to a rule or build command.
+    pub fn arg(&mut self, name: &str, value: &str) -> std::io::Result<()> {
+        writeln!(self.out, "  {} = {}", name, value)?;
+        Ok(())
+    }
+}
diff --git a/fud2/rsrc/gen_xo.tcl b/fud2/rsrc/gen_xo.tcl
new file mode 100644
index 0000000000..dbe328b6d8
--- /dev/null
+++ b/fud2/rsrc/gen_xo.tcl
@@ -0,0 +1,46 @@
+if { $::argc < 1 } {
+    #puts "ERROR: Program \"$::argv0\" requires 1 argument!\n"
+    puts "ERROR: Executable name unspecified\n"
+    puts "Usage: $::argv0 <xoname> $::argv <axi_name> \n"
+    exit
+}
+
+# Define a process that pops an element off of the list
+proc lvarpop {upVar {index 0}} {
+  upvar $upVar list;
+  if {![info exists list]} { return "-1" }
+  set top [lindex $list $index];
+  set list [concat [lrange $list 0 [expr $index - 1]] [lrange $list [expr $index +1] end]]
+  return $top;
+}
+
+set xoname [lindex $::argv 0]
+set path_to_packaged "./packaged_kernel"
+
+# Make a temporary Vivado project.
+create_project -force kernel_pack "./tmp_kernel_pack"
+
+# Add all Verilog files in the current working directory.
+add_files -norecurse [glob *.v *.sv]
+
+# I don't really understand any of this.
+ipx::package_project -root_dir $path_to_packaged -vendor capra.cs.cornell.edu -library RTLKernel -taxonomy /KernelIP -import_files -set_current false
+ipx::unload_core $path_to_packaged/component.xml
+ipx::edit_ip_in_project -upgrade true -name tmp_edit_project -directory $path_to_packaged $path_to_packaged/component.xml
+set_property sdx_kernel true [ipx::current_core]
+set_property sdx_kernel_type rtl [ipx::current_core]
+
+# Declare bus interfaces.
+ipx::associate_bus_interfaces -busif s_axi_control -clock ap_clk [ipx::current_core]
+lvarpop argv
+foreach busname $argv {
+    ipx::associate_bus_interfaces -busif $busname -clock ap_clk [ipx::current_core]
+}
+
+# Close & save the temporary project.
+ipx::update_checksums [ipx::current_core]
+ipx::save_core [ipx::current_core]
+close_project -delete
+
+# Package the project as an .xo file.
+package_xo -xo_path ${xoname} -kernel_name Toplevel -ip_directory ${path_to_packaged} -kernel_xml ./kernel.xml
diff --git a/fud2/rsrc/get-ports.py b/fud2/rsrc/get-ports.py
new file mode 100644
index 0000000000..1302c69c60
--- /dev/null
+++ b/fud2/rsrc/get-ports.py
@@ -0,0 +1,12 @@
+import xml.etree.ElementTree as ET
+import sys
+
+
+def get_ports(kernel_xml):
+    tree = ET.parse(kernel_xml)
+    for port in tree.findall(".//port[@mode='master']"):
+        yield port.attrib["name"]
+
+
+if __name__ == "__main__":
+    print(' '.join(get_ports(sys.argv[1])))
diff --git a/fud2/rsrc/interp-dat.py b/fud2/rsrc/interp-dat.py
new file mode 100644
index 0000000000..c783a08a55
--- /dev/null
+++ b/fud2/rsrc/interp-dat.py
@@ -0,0 +1,43 @@
+import simplejson
+import sys
+import pathlib
+from fud.stages.interpreter import convert_to_json, parse_from_json
+
+
+def data2interp(in_file):
+    """Convert a fud-style JSON data file to Cider-ready JSON.
+
+    The output file is hard-coded to be `data.json`.
+    """
+    round_float_to_fixed = True
+    with open(in_file) as f:
+        convert_to_json(
+            '.',
+            simplejson.load(f, use_decimal=True),
+            round_float_to_fixed,
+        )
+
+
+def interp2data(in_file, orig_file):
+    """Convert the Cider's output JSON to fud-style JSON.
+
+    Print the result to stdout.
+    """
+    with open(in_file) as f:
+        out = parse_from_json(f, pathlib.Path(orig_file))
+    simplejson.dump(
+        out,
+        sys.stdout,
+        indent=2,
+        sort_keys=True,
+        use_decimal=True,
+    )
+
+
+if __name__ == "__main__":
+    if sys.argv[1] == '--to-interp':
+        data2interp(*sys.argv[2:])
+    elif sys.argv[1] == '--from-interp':
+        interp2data(*sys.argv[2:])
+    else:
+        print("specify --to-interp or --from-interp", file=sys.stderr)
diff --git a/fud2/rsrc/json-dat.py b/fud2/rsrc/json-dat.py
new file mode 100644
index 0000000000..b6e6d7d37f
--- /dev/null
+++ b/fud2/rsrc/json-dat.py
@@ -0,0 +1,54 @@
+"""Convert between fud-style JSON and hex data files.
+
+Use the machinery from "old fud" to convert a JSON data file into a
+directory of flat hex-encoded files, suitable for loading into a
+hardware simulator, and back again.
+"""
+from fud.stages.verilator.json_to_dat import convert2dat, convert2json
+import simplejson
+import sys
+import os
+import re
+
+
+def json2dat(in_file, out_dir):
+    os.makedirs(out_dir, exist_ok=True)
+    round_float_to_fixed = True
+    with open(in_file) as json:
+        convert2dat(
+            out_dir,
+            simplejson.load(json, use_decimal=True),
+            "dat",
+            round_float_to_fixed,
+        )
+
+
+def dat2json(out_file, in_dir, sim_log=None):
+    mem = convert2json(in_dir, "out")
+
+    if sim_log:
+        cycles = 0
+        with open(sim_log) as f:
+            for line in f:
+                match = re.search(r"Simulated\s+((-)?\d+) cycles", line)
+                if match:
+                    cycles = int(match.group(1))
+                    break
+        out = {
+            "cycles": cycles,
+            "memories": mem,
+        }
+    else:
+        out = mem
+
+    with open(out_file, 'w') as f:
+        simplejson.dump(out, f, indent=2, sort_keys=True, use_decimal=True)
+
+
+if __name__ == '__main__':
+    if sys.argv[1] == '--from-json':
+        json2dat(*sys.argv[2:])
+    elif sys.argv[1] == '--to-json':
+        dat2json(*sys.argv[2:])
+    else:
+        print("specify --from-json or --to-json", file=sys.stderr)
diff --git a/fud2/rsrc/primitives-for-firrtl.sv b/fud2/rsrc/primitives-for-firrtl.sv
new file mode 100644
index 0000000000..6335c649a1
--- /dev/null
+++ b/fud2/rsrc/primitives-for-firrtl.sv
@@ -0,0 +1,330 @@
+module std_mem_d1 #(
+    parameter WIDTH = 32,
+    parameter SIZE = 16,
+    parameter IDX_SIZE = 4
+) (
+   input wire                logic [IDX_SIZE-1:0] addr0,
+   input wire                logic [ WIDTH-1:0] write_data,
+   input wire                logic write_en,
+   input wire                logic clk,
+   input wire                logic reset,
+   output logic [ WIDTH-1:0] read_data,
+   output logic              done
+);
+
+   logic [WIDTH-1:0]         mem[SIZE-1:0];
+
+   initial begin
+      $readmemh({"sim_data/mem.dat"}, mem);
+   end
+   final begin
+      $writememh({"sim_data/mem.out"}, mem);
+   end
+
+  /* verilator lint_off WIDTH */
+  assign read_data = mem[addr0];
+
+  always_ff @(posedge clk) begin
+    if (reset)
+      done <= '0;
+    else if (write_en)
+      done <= '1;
+    else
+      done <= '0;
+  end
+
+  always_ff @(posedge clk) begin
+    if (!reset && write_en)
+      mem[addr0] <= write_data;
+  end
+
+  // Check for out of bounds access
+  `ifdef VERILATOR
+    always_comb begin
+      if (addr0 >= SIZE)
+        $error(
+          "std_mem_d1: Out of bounds access\n",
+          "addr0: %0d\n", addr0,
+          "SIZE: %0d", SIZE
+        );
+    end
+  `endif
+endmodule
+
+/**
+ * Core primitives for Calyx.
+ * Implements core primitives used by the compiler.
+ *
+ * Conventions:
+ * - All parameter names must be SNAKE_CASE and all caps.
+ * - Port names must be snake_case, no caps.
+ */
+`default_nettype none
+
+module std_slice #(
+    parameter IN_WIDTH  = 32,
+    parameter OUT_WIDTH = 32
+) (
+   input wire                   logic [ IN_WIDTH-1:0] in,
+   output logic [OUT_WIDTH-1:0] out
+);
+  assign out = in[OUT_WIDTH-1:0];
+
+  `ifdef VERILATOR
+    always_comb begin
+      if (IN_WIDTH < OUT_WIDTH)
+        $error(
+          "std_slice: Input width less than output width\n",
+          "IN_WIDTH: %0d", IN_WIDTH,
+          "OUT_WIDTH: %0d", OUT_WIDTH
+        );
+    end
+  `endif
+endmodule
+
+module std_pad #(
+    parameter IN_WIDTH  = 32,
+    parameter OUT_WIDTH = 32
+) (
+   input wire logic [IN_WIDTH-1:0]  in,
+   output logic     [OUT_WIDTH-1:0] out
+);
+  localparam EXTEND = OUT_WIDTH - IN_WIDTH;
+  assign out = { {EXTEND {1'b0}}, in};
+
+  `ifdef VERILATOR
+    always_comb begin
+      if (IN_WIDTH > OUT_WIDTH)
+        $error(
+          "std_pad: Output width less than input width\n",
+          "IN_WIDTH: %0d", IN_WIDTH,
+          "OUT_WIDTH: %0d", OUT_WIDTH
+        );
+    end
+  `endif
+endmodule
+
+module std_cat #(
+  parameter LEFT_WIDTH  = 32,
+  parameter RIGHT_WIDTH = 32,
+  parameter OUT_WIDTH = 64
+) (
+  input wire logic [LEFT_WIDTH-1:0] left,
+  input wire logic [RIGHT_WIDTH-1:0] right,
+  output logic [OUT_WIDTH-1:0] out
+);
+  assign out = {left, right};
+
+  `ifdef VERILATOR
+    always_comb begin
+      if (LEFT_WIDTH + RIGHT_WIDTH != OUT_WIDTH)
+        $error(
+          "std_cat: Output width must equal sum of input widths\n",
+          "LEFT_WIDTH: %0d", LEFT_WIDTH,
+          "RIGHT_WIDTH: %0d", RIGHT_WIDTH,
+          "OUT_WIDTH: %0d", OUT_WIDTH
+        );
+    end
+  `endif
+endmodule
+
+module std_not #(
+    parameter WIDTH = 32
+) (
+   input wire               logic [WIDTH-1:0] in,
+   output logic [WIDTH-1:0] out
+);
+  assign out = ~in;
+endmodule
+
+module std_and #(
+    parameter WIDTH = 32
+) (
+   input wire               logic [WIDTH-1:0] left,
+   input wire               logic [WIDTH-1:0] right,
+   output logic [WIDTH-1:0] out
+);
+  assign out = left & right;
+endmodule
+
+module std_or #(
+    parameter WIDTH = 32
+) (
+   input wire               logic [WIDTH-1:0] left,
+   input wire               logic [WIDTH-1:0] right,
+   output logic [WIDTH-1:0] out
+);
+  assign out = left | right;
+endmodule
+
+module std_xor #(
+    parameter WIDTH = 32
+) (
+   input wire               logic [WIDTH-1:0] left,
+   input wire               logic [WIDTH-1:0] right,
+   output logic [WIDTH-1:0] out
+);
+  assign out = left ^ right;
+endmodule
+
+module std_sub #(
+    parameter WIDTH = 32
+) (
+   input wire               logic [WIDTH-1:0] left,
+   input wire               logic [WIDTH-1:0] right,
+   output logic [WIDTH-1:0] out
+);
+  assign out = left - right;
+endmodule
+
+module std_gt #(
+    parameter WIDTH = 32
+) (
+   input wire   logic [WIDTH-1:0] left,
+   input wire   logic [WIDTH-1:0] right,
+   output logic out
+);
+  assign out = left > right;
+endmodule
+
+module std_lt #(
+    parameter WIDTH = 32
+) (
+   input wire   logic [WIDTH-1:0] left,
+   input wire   logic [WIDTH-1:0] right,
+   output logic out
+);
+  assign out = left < right;
+endmodule
+
+module std_eq #(
+    parameter WIDTH = 32
+) (
+   input wire   logic [WIDTH-1:0] left,
+   input wire   logic [WIDTH-1:0] right,
+   output logic out
+);
+  assign out = left == right;
+endmodule
+
+module std_neq #(
+    parameter WIDTH = 32
+) (
+   input wire   logic [WIDTH-1:0] left,
+   input wire   logic [WIDTH-1:0] right,
+   output logic out
+);
+  assign out = left != right;
+endmodule
+
+module std_ge #(
+    parameter WIDTH = 32
+) (
+    input wire   logic [WIDTH-1:0] left,
+    input wire   logic [WIDTH-1:0] right,
+    output logic out
+);
+  assign out = left >= right;
+endmodule
+
+module std_le #(
+    parameter WIDTH = 32
+) (
+   input wire   logic [WIDTH-1:0] left,
+   input wire   logic [WIDTH-1:0] right,
+   output logic out
+);
+  assign out = left <= right;
+endmodule
+
+module std_lsh #(
+    parameter WIDTH = 32
+) (
+   input wire               logic [WIDTH-1:0] left,
+   input wire               logic [WIDTH-1:0] right,
+   output logic [WIDTH-1:0] out
+);
+  assign out = left << right;
+endmodule
+
+module std_rsh #(
+    parameter WIDTH = 32
+) (
+   input wire               logic [WIDTH-1:0] left,
+   input wire               logic [WIDTH-1:0] right,
+   output logic [WIDTH-1:0] out
+);
+  assign out = left >> right;
+endmodule
+
+/// this primitive is intended to be used
+/// for lowering purposes (not in source programs)
+module std_mux #(
+    parameter WIDTH = 32
+) (
+   input wire               logic cond,
+   input wire               logic [WIDTH-1:0] tru,
+   input wire               logic [WIDTH-1:0] fal,
+   output logic [WIDTH-1:0] out
+);
+  assign out = cond ? tru : fal;
+endmodule
+
+`default_nettype wire
+
+module undef #(
+    parameter WIDTH = 32
+) (
+   output logic [WIDTH-1:0] out
+);
+assign out = 'x;
+endmodule
+
+module std_const #(
+    parameter WIDTH = 32,
+    parameter VALUE = 32
+) (
+   output logic [WIDTH-1:0] out
+);
+assign out = VALUE;
+endmodule
+
+module std_wire #(
+    parameter WIDTH = 32
+) (
+   input logic [WIDTH-1:0] in,
+   output logic [WIDTH-1:0] out
+);
+assign out = in;
+endmodule
+
+module std_add #(
+    parameter WIDTH = 32
+) (
+   input logic [WIDTH-1:0] left,
+   input logic [WIDTH-1:0] right,
+   output logic [WIDTH-1:0] out
+);
+assign out = left + right;
+endmodule
+
+module std_reg #(
+    parameter WIDTH = 32
+) (
+   input logic [WIDTH-1:0] in,
+   input logic write_en,
+   input logic clk,
+   input logic reset,
+   output logic [WIDTH-1:0] out,
+   output logic done
+);
+always_ff @(posedge clk) begin
+    if (reset) begin
+       out <= 0;
+       done <= 0;
+    end else if (write_en) begin
+      out <= in;
+      done <= 1'd1;
+    end else done <= 1'd0;
+  end
+endmodule
diff --git a/fud2/rsrc/tb.sv b/fud2/rsrc/tb.sv
new file mode 100644
index 0000000000..c1631bef6a
--- /dev/null
+++ b/fud2/rsrc/tb.sv
@@ -0,0 +1,77 @@
+module TOP;
+
+// Signals for the main module.
+logic go, done, clk, reset;
+main #() main (
+  .go(go),
+  .clk(clk),
+  .reset(reset),
+  .done(done)
+);
+
+localparam RESET_CYCLES = 3;
+
+// Cycle counter. Make this signed to catch errors with cycle simulation
+// counts.
+logic signed [63:0] cycle_count;
+
+always_ff @(posedge clk) begin
+  cycle_count <= cycle_count + 1;
+end
+
+always_ff @(posedge clk) begin
+  // Reset the design for a few cycles
+  if (cycle_count < RESET_CYCLES) begin
+    reset <= 1;
+    go <= 0;
+  end else begin
+    reset <= 0;
+    go <= 1;
+  end
+end
+
+// Output location of the VCD file
+string OUT;
+// Disable VCD tracing
+int NOTRACE;
+// Maximum number of cycles to simulate
+longint CYCLE_LIMIT;
+// Dummy variable to track value returned by $value$plusargs
+int CODE;
+
+initial begin
+  CODE = $value$plusargs("OUT=%s", OUT);
+  CODE = $value$plusargs("CYCLE_LIMIT=%d", CYCLE_LIMIT);
+  if (CYCLE_LIMIT != 0) begin
+    $display("cycle limit set to %d", CYCLE_LIMIT);
+  end
+  CODE = $value$plusargs("NOTRACE=%d", NOTRACE);
+  if (NOTRACE == 0) begin
+    $display("VCD tracing enabled");
+    $dumpfile(OUT);
+    $dumpvars(0,main);
+  end else begin
+    $display("VCD tracing disabled");
+  end
+
+  // Initial values
+  go = 0;
+  clk = 0;
+  reset = 1;
+  cycle_count = 0;
+
+  forever begin
+    #10 clk = ~clk;
+    if (cycle_count > RESET_CYCLES && done == 1) begin
+      // Subtract 1 because the cycle counter is incremented at the end of the
+      // cycle.
+      $display("Simulated %d cycles", cycle_count - RESET_CYCLES - 1);
+      $finish;
+    end else if (cycle_count != 0 && cycle_count == CYCLE_LIMIT + RESET_CYCLES) begin
+      $display("reached limit of %d cycles", CYCLE_LIMIT);
+      $finish;
+    end
+  end
+end
+
+endmodule
diff --git a/fud2/rsrc/xrt.ini b/fud2/rsrc/xrt.ini
new file mode 100644
index 0000000000..2f2b8e589a
--- /dev/null
+++ b/fud2/rsrc/xrt.ini
@@ -0,0 +1,4 @@
+[Runtime]
+runtime_log=xrt.log
+[Emulation]
+print_infos_in_console=true
diff --git a/fud2/rsrc/xrt_trace.ini b/fud2/rsrc/xrt_trace.ini
new file mode 100644
index 0000000000..21c2738cd1
--- /dev/null
+++ b/fud2/rsrc/xrt_trace.ini
@@ -0,0 +1,7 @@
+[Runtime]
+runtime_log=xrt.log
+[Emulation]
+print_infos_in_console=true
+debug_mode=batch
+user_pre_sim_script=pre_sim.tcl
+user_post_sim_script=post_sim.tcl
diff --git a/fud2/src/main.rs b/fud2/src/main.rs
new file mode 100644
index 0000000000..69da9c974e
--- /dev/null
+++ b/fud2/src/main.rs
@@ -0,0 +1,417 @@
+use fake::{
+    cli,
+    run::{EmitResult, Emitter},
+    Driver, DriverBuilder,
+};
+
+fn build_driver() -> Driver {
+    let mut bld = DriverBuilder::new("fud2");
+
+    // Calyx.
+    let calyx = bld.state("calyx", &["futil"]);
+    let verilog = bld.state("verilog", &["sv", "v"]);
+    let calyx_setup = bld.setup("Calyx compiler", |e| {
+        e.config_var("calyx-base", "calyx.base")?;
+        e.config_var_or(
+            "calyx-exe",
+            "calyx.exe",
+            "$calyx-base/target/debug/calyx",
+        )?;
+        e.rule(
+            "calyx",
+            "$calyx-exe -l $calyx-base -b $backend $args $in > $out",
+        )?;
+        Ok(())
+    });
+    bld.op(
+        "calyx-to-verilog",
+        &[calyx_setup],
+        calyx,
+        verilog,
+        |e, input, output| {
+            e.build_cmd(&[output], "calyx", &[input], &[])?;
+            e.arg("backend", "verilog")?;
+            Ok(())
+        },
+    );
+
+    // Dahlia.
+    let dahlia = bld.state("dahlia", &["fuse"]);
+    let dahlia_setup = bld.setup("Dahlia compiler", |e| {
+        e.config_var("dahlia-exe", "dahlia")?;
+        e.rule(
+            "dahlia-to-calyx",
+            "$dahlia-exe -b calyx --lower -l error $in -o $out",
+        )?;
+        Ok(())
+    });
+    bld.rule(&[dahlia_setup], dahlia, calyx, "dahlia-to-calyx");
+
+    // MrXL.
+    let mrxl = bld.state("mrxl", &["mrxl"]);
+    let mrxl_setup = bld.setup("MrXL compiler", |e| {
+        e.var("mrxl-exe", "mrxl")?;
+        e.rule("mrxl-to-calyx", "$mrxl-exe $in > $out")?;
+        Ok(())
+    });
+    bld.rule(&[mrxl_setup], mrxl, calyx, "mrxl-to-calyx");
+
+    // Shared machinery for RTL simulators.
+    let dat = bld.state("dat", &["json"]);
+    let vcd = bld.state("vcd", &["vcd"]);
+    let simulator = bld.state("sim", &["exe"]);
+    let sim_setup = bld.setup("RTL simulation", |e| {
+        // Data conversion to and from JSON.
+        e.config_var_or("python", "python", "python3")?;
+        e.var(
+            "json_dat",
+            &format!("$python {}/json-dat.py", e.config_val("rsrc")?),
+        )?;
+        e.rule("hex-data", "$json_dat --from-json $in $out")?;
+        e.rule("json-data", "$json_dat --to-json $out $in")?;
+
+        // The Verilog testbench.
+        e.var("testbench", &format!("{}/tb.sv", e.config_val("rsrc")?))?;
+
+        // The input data file. `sim.data` is required.
+        let data_name = e.config_val("sim.data")?;
+        let data_path = e.external_path(data_name.as_ref());
+        e.var("sim_data", data_path.as_str())?;
+
+        // Produce the data directory.
+        e.var("datadir", "sim_data")?;
+        e.build("hex-data", "$sim_data", "$datadir")?;
+
+        // Rule for simulation execution.
+        e.rule(
+            "sim-run",
+            "./$bin +DATA=$datadir +CYCLE_LIMIT=$cycle-limit $args > $out",
+        )?;
+
+        // More shared configuration.
+        e.config_var_or("cycle-limit", "sim.cycle_limit", "500000000")?;
+
+        Ok(())
+    });
+    bld.op(
+        "simulate",
+        &[sim_setup],
+        simulator,
+        dat,
+        |e, input, output| {
+            e.build_cmd(&["sim.log"], "sim-run", &[input, "$datadir"], &[])?;
+            e.arg("bin", input)?;
+            e.arg("args", "+NOTRACE=1")?;
+            e.build_cmd(&[output], "json-data", &["$datadir", "sim.log"], &[])?;
+            Ok(())
+        },
+    );
+    bld.op("trace", &[sim_setup], simulator, vcd, |e, input, output| {
+        e.build_cmd(
+            &["sim.log", output],
+            "sim-run",
+            &[input, "$datadir"],
+            &[],
+        )?;
+        e.arg("bin", input)?;
+        e.arg("args", &format!("+NOTRACE=0 +OUT={}", output))?;
+        Ok(())
+    });
+
+    // Icarus Verilog.
+    let verilog_noverify = bld.state("verilog-noverify", &["sv"]);
+    let icarus_setup = bld.setup("Icarus Verilog", |e| {
+        e.var("iverilog", "iverilog")?;
+        e.rule("icarus-compile", "$iverilog -g2012 -o $out $testbench $in")?;
+        Ok(())
+    });
+    bld.op(
+        "calyx-noverify",
+        &[calyx_setup],
+        calyx,
+        verilog_noverify,
+        |e, input, output| {
+            // Icarus requires a special --disable-verify version of Calyx code.
+            e.build_cmd(&[output], "calyx", &[input], &[])?;
+            e.arg("backend", "verilog")?;
+            e.arg("args", "--disable-verify")?;
+            Ok(())
+        },
+    );
+    bld.op(
+        "icarus",
+        &[sim_setup, icarus_setup],
+        verilog_noverify,
+        simulator,
+        |e, input, output| {
+            e.build("icarus-compile", input, output)?;
+            Ok(())
+        },
+    );
+
+    // Calyx to FIRRTL.
+    let firrtl = bld.state("firrtl", &["fir"]);
+    bld.op(
+        "calyx-to-firrtl",
+        &[calyx_setup],
+        calyx,
+        firrtl,
+        |e, input, output| {
+            e.build_cmd(&[output], "calyx", &[input], &[])?;
+            e.arg("backend", "firrtl")?;
+            Ok(())
+        },
+    );
+
+    // The FIRRTL compiler.
+    let firrtl_setup = bld.setup("Firrtl to Verilog compiler", |e| {
+        e.config_var("firrtl-exe", "firrtl.exe")?;
+        e.rule("firrtl", "$firrtl-exe -i $in -o $out -X sverilog")?;
+
+        e.var(
+            "primitives-for-firrtl",
+            &format!("{}/primitives-for-firrtl.sv", e.config_val("rsrc")?),
+        )?;
+        e.rule("add-firrtl-prims", "cat $primitives-for-firrtl $in > $out")?;
+
+        Ok(())
+    });
+    fn firrtl_compile(
+        e: &mut Emitter,
+        input: &str,
+        output: &str,
+    ) -> EmitResult {
+        let tmp_verilog = "partial.sv";
+        e.build_cmd(&[tmp_verilog], "firrtl", &[input], &[])?;
+        e.build_cmd(&[output], "add-firrtl-prims", &[tmp_verilog], &[])?;
+        Ok(())
+    }
+    bld.op("firrtl", &[firrtl_setup], firrtl, verilog, firrtl_compile);
+    // This is a bit of a hack, but the Icarus-friendly "noverify" state is identical for this path
+    // (since FIRRTL compilation doesn't come with verification).
+    bld.op(
+        "firrtl-noverify",
+        &[firrtl_setup],
+        firrtl,
+        verilog_noverify,
+        firrtl_compile,
+    );
+
+    // primitive-uses backend
+    let primitive_uses_json = bld.state("primitive-uses-json", &["json"]);
+    bld.op(
+        "primitive-uses",
+        &[calyx_setup],
+        calyx,
+        primitive_uses_json,
+        |e, input, output| {
+            e.build_cmd(&[output], "calyx", &[input], &[])?;
+            e.arg("backend", "primitive-uses")?;
+            Ok(())
+        },
+    );
+
+    // Verilator.
+    let verilator_setup = bld.setup("Verilator", |e| {
+        e.config_var_or("verilator", "verilator.exe", "verilator")?;
+        e.config_var_or("cycle-limit", "sim.cycle_limit", "500000000")?;
+        e.rule(
+            "verilator-compile",
+            "$verilator $in $testbench --trace --binary --top-module TOP -fno-inline -Mdir $out-dir",
+        )?;
+        e.rule("cp", "cp $in $out")?;
+        Ok(())
+    });
+    bld.op(
+        "verilator",
+        &[sim_setup, verilator_setup],
+        verilog,
+        simulator,
+        |e, input, output| {
+            let out_dir = "verilator-out";
+            let sim_bin = format!("{}/VTOP", out_dir);
+            e.build("verilator-compile", input, &sim_bin)?;
+            e.arg("out-dir", out_dir)?;
+            e.build("cp", &sim_bin, output)?;
+            Ok(())
+        },
+    );
+
+    // Interpreter.
+    let debug = bld.state("debug", &[]); // A pseudo-state.
+    let cider_setup = bld.setup("Cider interpreter", |e| {
+        e.config_var_or(
+            "cider-exe",
+            "cider.exe",
+            "$calyx-base/target/debug/cider",
+        )?;
+        e.rule(
+            "cider",
+            "$cider-exe -l $calyx-base --raw --data data.json $in > $out",
+        )?;
+        e.rule(
+            "cider-debug",
+            "$cider-exe -l $calyx-base --data data.json $in debug || true",
+        )?;
+        e.arg("pool", "console")?;
+
+        // TODO Can we reduce the duplication around `rsrc_dir` and `$python`?
+        let rsrc_dir = e.config_val("rsrc")?;
+        e.var("interp-dat", &format!("{}/interp-dat.py", rsrc_dir))?;
+        e.config_var_or("python", "python", "python3")?;
+        e.rule("dat-to-interp", "$python $interp-dat --to-interp $in")?;
+        e.rule(
+            "interp-to-dat",
+            "$python $interp-dat --from-interp $in $sim_data > $out",
+        )?;
+        e.build_cmd(&["data.json"], "dat-to-interp", &["$sim_data"], &[])?;
+        Ok(())
+    });
+    bld.op(
+        "interp",
+        &[sim_setup, calyx_setup, cider_setup],
+        calyx,
+        dat,
+        |e, input, output| {
+            let out_file = "interp_out.json";
+            e.build_cmd(&[out_file], "cider", &[input], &["data.json"])?;
+            e.build_cmd(
+                &[output],
+                "interp-to-dat",
+                &[out_file],
+                &["$sim_data"],
+            )?;
+            Ok(())
+        },
+    );
+    bld.op(
+        "debug",
+        &[sim_setup, calyx_setup, cider_setup],
+        calyx,
+        debug,
+        |e, input, output| {
+            e.build_cmd(&[output], "cider-debug", &[input], &["data.json"])?;
+            Ok(())
+        },
+    );
+
+    // Xilinx compilation.
+    let xo = bld.state("xo", &["xo"]);
+    let xclbin = bld.state("xclbin", &["xclbin"]);
+    let xilinx_setup = bld.setup("Xilinx tools", |e| {
+        // Locations for Vivado and Vitis installations.
+        e.config_var("vivado-dir", "xilinx.vivado")?;
+        e.config_var("vitis-dir", "xilinx.vitis")?;
+
+        // Package a Verilog program as an `.xo` file.
+        let rsrc_dir = e.config_val("rsrc")?;
+        e.var("gen-xo-tcl", &format!("{}/gen_xo.tcl", rsrc_dir))?;
+        e.var("get-ports", &format!("{}/get-ports.py", rsrc_dir))?;
+        e.config_var_or("python", "python", "python3")?;
+        e.rule("gen-xo", "$vivado-dir/bin/vivado -mode batch -source $gen-xo-tcl -tclargs $out `$python $get-ports kernel.xml`")?;
+        e.arg("pool", "console")?;  // Lets Ninja stream the tool output "live."
+
+        // Compile an `.xo` file to an `.xclbin` file, which is where the actual EDA work occurs.
+        e.config_var_or("xilinx-mode", "xilinx.mode", "hw_emu")?;
+        e.config_var_or("platform", "xilinx.device", "xilinx_u50_gen3x16_xdma_201920_3")?;
+        e.rule("compile-xclbin", "$vitis-dir/bin/v++ -g -t $xilinx-mode --platform $platform --save-temps --profile.data all:all:all --profile.exec all:all:all -lo $out $in")?;
+        e.arg("pool", "console")?;
+
+        Ok(())
+    });
+    bld.op(
+        "xo",
+        &[calyx_setup, xilinx_setup],
+        calyx,
+        xo,
+        |e, input, output| {
+            // Emit the Verilog itself in "synthesis mode."
+            e.build_cmd(&["main.sv"], "calyx", &[input], &[])?;
+            e.arg("backend", "verilog")?;
+            e.arg("args", "--synthesis -p external")?;
+
+            // Extra ingredients for the `.xo` package.
+            e.build_cmd(&["toplevel.v"], "calyx", &[input], &[])?;
+            e.arg("backend", "xilinx")?;
+            e.build_cmd(&["kernel.xml"], "calyx", &[input], &[])?;
+            e.arg("backend", "xilinx-xml")?;
+
+            // Package the `.xo`.
+            e.build_cmd(
+                &[output],
+                "gen-xo",
+                &[],
+                &["main.sv", "toplevel.v", "kernel.xml"],
+            )?;
+            Ok(())
+        },
+    );
+    bld.op("xclbin", &[xilinx_setup], xo, xclbin, |e, input, output| {
+        e.build_cmd(&[output], "compile-xclbin", &[input], &[])?;
+        Ok(())
+    });
+
+    // Xilinx execution.
+    // TODO Only does `hw_emu` for now...
+    let xrt_setup = bld.setup("Xilinx execution via XRT", |e| {
+        // Generate `emconfig.json`.
+        e.rule("emconfig", "$vitis-dir/bin/emconfigutil --platform $platform")?;
+        e.build_cmd(&["emconfig.json"], "emconfig", &[], &[])?;
+
+        // Execute via the `xclrun` tool.
+        e.config_var("xrt-dir", "xilinx.xrt")?;
+        e.rule("xclrun", "bash -c 'source $vitis-dir/settings64.sh ; source $xrt-dir/setup.sh ; XRT_INI_PATH=$xrt_ini EMCONFIG_PATH=. XCL_EMULATION_MODE=$xilinx-mode $python -m fud.xclrun --out $out $in'")?;
+        e.arg("pool", "console")?;
+
+        // "Pre-sim" and "post-sim" scripts for simulation.
+        e.rule("echo", "echo $contents > $out")?;
+        e.build_cmd(&["pre_sim.tcl"], "echo", &[""], &[""])?;
+        e.arg("contents", "open_vcd\\\\nlog_vcd *\\\\n")?;
+        e.build_cmd(&["post_sim.tcl"], "echo", &[""], &[""])?;
+        e.arg("contents", "close_vcd\\\\n")?;
+
+        Ok(())
+    });
+    bld.op(
+        "xrt",
+        &[xilinx_setup, sim_setup, xrt_setup],
+        xclbin,
+        dat,
+        |e, input, output| {
+            e.build_cmd(
+                &[output],
+                "xclrun",
+                &[input, "$sim_data"],
+                &["emconfig.json"],
+            )?;
+            let rsrc_dir = e.config_val("rsrc")?;
+            e.arg("xrt_ini", &format!("{}/xrt.ini", rsrc_dir))?;
+            Ok(())
+        },
+    );
+    bld.op(
+        "xrt-trace",
+        &[xilinx_setup, sim_setup, xrt_setup],
+        xclbin,
+        vcd,
+        |e, input, output| {
+            e.build_cmd(
+                &[output], // TODO not the VCD, yet...
+                "xclrun",
+                &[input, "$sim_data"],
+                &["emconfig.json", "pre_sim.tcl", "post_sim.tcl"],
+            )?;
+            let rsrc_dir = e.config_val("rsrc")?;
+            e.arg("xrt_ini", &format!("{}/xrt_trace.ini", rsrc_dir))?;
+            Ok(())
+        },
+    );
+
+    bld.build()
+}
+
+fn main() -> anyhow::Result<()> {
+    let driver = build_driver();
+
+    cli::cli(&driver)
+}
diff --git a/src/cmdline.rs b/src/cmdline.rs
index 93d9f5b012..f8346aa4a6 100644
--- a/src/cmdline.rs
+++ b/src/cmdline.rs
@@ -68,6 +68,11 @@ pub struct Opts {
     #[argh(switch, long = "nested")]
     pub nested_assign: bool,
 
+    /// emit extmodules to use with SystemVerilog implementations
+    /// of primitives (only relevant to the FIRRTL backend)
+    #[argh(switch, long = "emit-primitive-extmodules")]
+    pub emit_primitive_extmodules: bool,
+
     /// select a backend
     #[argh(option, short = 'b', default = "BackendOpt::default()")]
     pub backend: BackendOpt,
diff --git a/src/main.rs b/src/main.rs
index 399bc99097..f0e4eb41d4 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -68,6 +68,7 @@ fn main() -> CalyxResult<()> {
         synthesis_mode: opts.enable_synthesis,
         enable_verification: !opts.disable_verify,
         flat_assign: !opts.nested_assign,
+        emit_primitive_extmodules: opts.emit_primitive_extmodules,
     };
     // Extra options for the passes
     ctx.extra_opts = opts.extra_opts.drain(..).collect();
diff --git a/tests/backend/firrtl/basic-cell.futil b/tests/backend/firrtl/basic-cell.futil
index 1359c6772f..09effbe5c1 100644
--- a/tests/backend/firrtl/basic-cell.futil
+++ b/tests/backend/firrtl/basic-cell.futil
@@ -1,4 +1,4 @@
-// -b firrtl
+// -b firrtl --emit-primitive-extmodules
 import "primitives/core.futil";
 component identity(in : 32) -> (out : 32) {
   cells {}
diff --git a/tests/backend/firrtl/primitive-cells.futil b/tests/backend/firrtl/primitive-cells.futil
index 8bf993496b..b8ce901869 100644
--- a/tests/backend/firrtl/primitive-cells.futil
+++ b/tests/backend/firrtl/primitive-cells.futil
@@ -1,4 +1,4 @@
-// -b firrtl
+// -b firrtl --emit-primitive-extmodules
 import "primitives/core.futil";
 component plus_one(in : 32) -> (out : 32) {
   cells {
diff --git a/tests/correctness/static-control/fixup-necessary.expect b/tests/correctness/static-control/fixup-necessary.expect
new file mode 100644
index 0000000000..b2a457b3f2
--- /dev/null
+++ b/tests/correctness/static-control/fixup-necessary.expect
@@ -0,0 +1,8 @@
+{
+  "cycles": 6,
+  "memories": {
+    "m": [
+      1
+    ]
+  }
+}
diff --git a/tests/correctness/static-control/fixup-necessary.futil b/tests/correctness/static-control/fixup-necessary.futil
new file mode 100644
index 0000000000..6cc087e7cd
--- /dev/null
+++ b/tests/correctness/static-control/fixup-necessary.futil
@@ -0,0 +1,63 @@
+import "primitives/core.futil";
+
+component foo(in: 1) -> (out: 32) {
+  cells {
+    r1 = std_reg(32);
+    adder = std_add(32);
+  }
+  wires {
+    out = r1.out;
+    group upd1 {
+      adder.left = 32'd1;
+      adder.right = r1.out;
+      r1.write_en = 1'd1;
+      r1.in = adder.out;
+      upd1[done] = r1.done;
+    }
+  }
+  control {
+    seq {
+      if in {
+        seq {
+          upd1;
+          upd1;
+          upd1;
+          upd1;
+          upd1;
+          upd1;
+          upd1;
+        }
+      }
+      else{
+        seq {
+          upd1;
+        }
+      }
+    }
+  }
+}
+
+
+component main() -> () {
+  cells {
+    foo_inst = foo();
+    @external m = std_mem_d1(32, 1, 1);
+  }
+
+  wires {
+    group M {
+      m.write_data = foo_inst.out;
+      m.addr0 = 1'd0;
+      m.write_en = 1'd1;
+      M[done] = m.done;
+    }
+  }
+
+  control {
+    seq {
+      invoke foo_inst(in = 1'd0)();
+      M;
+    }
+  }
+}
+
diff --git a/tests/correctness/static-control/fixup-necessary.futil.data b/tests/correctness/static-control/fixup-necessary.futil.data
new file mode 100644
index 0000000000..e2e2501837
--- /dev/null
+++ b/tests/correctness/static-control/fixup-necessary.futil.data
@@ -0,0 +1,12 @@
+{
+  "m": {
+    "data": [
+      0
+    ],
+    "format": {
+      "numeric_type": "bitnum",
+      "is_signed": false,
+      "width": 32
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/errors/insufficient-params.expect b/tests/errors/insufficient-params.expect
index 09ebb3bff7..96605cfd2b 100644
--- a/tests/errors/insufficient-params.expect
+++ b/tests/errors/insufficient-params.expect
@@ -1,5 +1,6 @@
 ---CODE---
-101
+1
 ---STDERR---
-thread 'main' panicked at 'Failed to add primitive.: Malformed Structure: Invalid parameter binding for primitive `std_fp_div_pipe`. Requires 3 parameters but provided with 1.', calyx-ir/src/builder.rs:224:14
-note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
+Error: tests/errors/insufficient-params.futil
+5 |    d = std_fp_div_pipe(32);
+  |    ^^^^^^^^^^^^^^^^^^^^^^^ Malformed Structure: primitive `std_fp_div_pipe` requires 3 parameters but instantiation provides 1 parameters
diff --git a/tests/passes/cell-share/empty-invoke.expect b/tests/passes/cell-share/empty-invoke.expect
index 98918ce5a4..44dde2c37d 100644
--- a/tests/passes/cell-share/empty-invoke.expect
+++ b/tests/passes/cell-share/empty-invoke.expect
@@ -12,7 +12,7 @@ component write_one<"state_share"=1>(@go go: 1, @clk clk: 1, @reset reset: 1) ->
     out = x.out;
   }
   control {
-    @promote_static invoke0;
+    invoke0;
   }
 }
 component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
@@ -50,9 +50,9 @@ component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
     seq {
       invoke0;
       invoke1;
-      @promote_static invoke2;
+      invoke2;
       invoke3;
-      @promote_static invoke4;
+      invoke4;
     }
   }
 }
diff --git a/tests/passes/cell-share/inline.expect b/tests/passes/cell-share/inline.expect
index d9269453e3..b991d54e22 100644
--- a/tests/passes/cell-share/inline.expect
+++ b/tests/passes/cell-share/inline.expect
@@ -12,7 +12,7 @@ component my_reg<"state_share"=1>(@data in: 32, @go go: 1, @clk clk: 1, @reset r
     out = r.out;
   }
   control {
-    @promote_static invoke0;
+    invoke0;
   }
 }
 component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
diff --git a/tests/passes/static-promotion/component.expect b/tests/passes/static-inference-promotion/component.expect
similarity index 100%
rename from tests/passes/static-promotion/component.expect
rename to tests/passes/static-inference-promotion/component.expect
diff --git a/tests/passes/static-inference-promotion/component.futil b/tests/passes/static-inference-promotion/component.futil
new file mode 100644
index 0000000000..4ff3969739
--- /dev/null
+++ b/tests/passes/static-inference-promotion/component.futil
@@ -0,0 +1,35 @@
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal
+
+import "primitives/core.futil";
+
+component main() -> () {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    c = std_reg(2);
+  }
+
+  wires {
+    group A {
+      a.in = 2'd0;
+      a.write_en = 1'b1;
+      A[done] = a.done;
+    }
+
+    group B {
+      b.in = 2'd1;
+      b.write_en = 1'b1;
+      B[done] = b.done;
+    }
+
+    group C {
+      c.in = 2'd2;
+      c.write_en = 1'b1;
+      C[done] = c.done;
+    }
+  }
+
+  control {
+    seq { A; B; C; }
+  }
+}
diff --git a/tests/passes/static-promotion/groups.expect b/tests/passes/static-inference-promotion/groups.expect
similarity index 93%
rename from tests/passes/static-promotion/groups.expect
rename to tests/passes/static-inference-promotion/groups.expect
index c46b3770e1..9c9fda81fa 100644
--- a/tests/passes/static-promotion/groups.expect
+++ b/tests/passes/static-inference-promotion/groups.expect
@@ -40,9 +40,9 @@ component main(go: 1, clk: 1, @go go0: 1, @clk clk0: 1, @reset reset: 1) -> (don
         mem_wrt_to_done0;
       }
       mult_wrts_to_done;
-      @promote_static one_cycle;
+      one_cycle;
       mult_wrts_to_done;
-      @promote_static(2) two_cycles;
+      two_cycles;
     }
   }
 }
diff --git a/tests/passes/static-inference-promotion/groups.futil b/tests/passes/static-inference-promotion/groups.futil
new file mode 100644
index 0000000000..8d3b35ae5f
--- /dev/null
+++ b/tests/passes/static-inference-promotion/groups.futil
@@ -0,0 +1,43 @@
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal
+import "primitives/core.futil";
+
+component main(go: 1, clk: 1) -> (done: 1) {
+  cells {
+    r0 = std_reg(1);
+    r1 = std_reg(1);
+    m0 = std_mem_d1(32, 1, 1);
+  }
+  wires {
+    group one_cycle {
+      r0.write_en = 1'd1;
+      one_cycle[done] = r0.done;
+    }
+    group two_cycles {
+      r0.write_en = 1'd1;
+      r1.write_en = r0.done;
+      two_cycles[done] = r1.done;
+    }
+    group mem_wrt_to_done {
+      m0.addr0 = 1'd0;
+      m0.write_data = 32'd5;
+      m0.write_en = 1'd1;
+      mem_wrt_to_done[done] = m0.done;
+    }
+    group mult_wrts_to_done {
+      r0.write_en = 1'd1;
+      mult_wrts_to_done[done] = r0.done ? 1'd1;
+    }
+  }
+
+  control {
+    seq {
+      one_cycle;
+      two_cycles;
+      mem_wrt_to_done;
+      mult_wrts_to_done;
+      one_cycle;
+      mult_wrts_to_done;
+      two_cycles;
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/if-diff.expect b/tests/passes/static-inference-promotion/if-diff.expect
similarity index 92%
rename from tests/passes/static-promotion/if-diff.expect
rename to tests/passes/static-inference-promotion/if-diff.expect
index c87729aa28..0b3fc0a182 100644
--- a/tests/passes/static-promotion/if-diff.expect
+++ b/tests/passes/static-inference-promotion/if-diff.expect
@@ -16,7 +16,7 @@ component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
     }
   }
   control {
-    @promote_static(5) if cond.out {
+    if cond.out {
       @compactable static<5> seq  {
         A0;
         A0;
diff --git a/tests/passes/static-promotion/if-diff.futil b/tests/passes/static-inference-promotion/if-diff.futil
similarity index 75%
rename from tests/passes/static-promotion/if-diff.futil
rename to tests/passes/static-inference-promotion/if-diff.futil
index c90174948e..1144bfab63 100644
--- a/tests/passes/static-promotion/if-diff.futil
+++ b/tests/passes/static-inference-promotion/if-diff.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -x static-promotion:if-diff-limit=3
+// -p well-formed -p static-inference -p static-promotion -x static-promotion:if-diff-limit=3
 
 import "primitives/core.futil";
 
diff --git a/tests/passes/static-promotion/if-no-else.expect b/tests/passes/static-inference-promotion/if-no-else.expect
similarity index 100%
rename from tests/passes/static-promotion/if-no-else.expect
rename to tests/passes/static-inference-promotion/if-no-else.expect
diff --git a/tests/passes/static-inference-promotion/if-no-else.futil b/tests/passes/static-inference-promotion/if-no-else.futil
new file mode 100644
index 0000000000..618a079474
--- /dev/null
+++ b/tests/passes/static-inference-promotion/if-no-else.futil
@@ -0,0 +1,24 @@
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal
+
+import "primitives/core.futil";
+
+component main() -> () {
+  cells {
+    a = std_reg(2);
+    cond = std_reg(1);
+  }
+
+  wires {
+    group A {
+      a.in = 2'd0;
+      a.write_en = 1'b1;
+      A[done] = a.done;
+    }
+  }
+
+  control {
+    if cond.out {
+      A;
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/invoke.expect b/tests/passes/static-inference-promotion/invoke.expect
similarity index 90%
rename from tests/passes/static-promotion/invoke.expect
rename to tests/passes/static-inference-promotion/invoke.expect
index e437c4e43c..e9c2e3ec80 100644
--- a/tests/passes/static-promotion/invoke.expect
+++ b/tests/passes/static-inference-promotion/invoke.expect
@@ -20,7 +20,7 @@ component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
     }
   }
 }
-static<2> component exponent<"promoted"=1>(base: 32, exp: 32, @go go: 1, @clk clk: 1, @reset reset: 1) -> (out: 32, @done done: 1) {
+static<2> component exponent<"promoted"=1>(base: 32, exp: 32, @go @static(2) go: 1, @clk clk: 1, @reset reset: 1) -> (out: 32, @done done: 1) {
   cells {
     r1 = std_reg(32);
     r2 = std_reg(32);
diff --git a/tests/passes/static-inference-promotion/invoke.futil b/tests/passes/static-inference-promotion/invoke.futil
new file mode 100644
index 0000000000..0d03e4399c
--- /dev/null
+++ b/tests/passes/static-inference-promotion/invoke.futil
@@ -0,0 +1,52 @@
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal
+import "primitives/core.futil";
+
+/**
+* Tests the infer-static-timing pass. `exponent` is intentionally placed
+* after main to test post-order iteration of components.
+*/
+component main() -> () {
+  cells {
+    r = std_reg(32);
+    exp0 = exponent();
+  }
+  wires {
+    group upd0 {
+      r.in = 32'd1;
+      r.write_en = 1'd1;
+      upd0[done] = r.done;
+    }
+  }
+  control {
+    seq {
+      upd0;
+      invoke exp0(base = r.out, exp = r.out)();
+    }
+  }
+}
+
+component exponent(base: 32, exp: 32) -> (out: 32) {
+  cells {
+    r1 = std_reg(32);
+    r2 = std_reg(32);
+  }
+  wires {
+    group upd2 {
+      r2.in = 32'd1;
+      r2.write_en = 1'd1;
+      upd2[done] = r2.done;
+    }
+    group upd1 {
+      r1.in = 32'd1;
+      r1.write_en = 1'd1;
+      upd1[done] = r1.done;
+    }
+  }
+  control {
+    seq {
+      upd1;
+      upd2;
+    }
+  }
+}
+
diff --git a/tests/passes/static-promotion/multi-static.expect b/tests/passes/static-inference-promotion/multi-static.expect
similarity index 90%
rename from tests/passes/static-promotion/multi-static.expect
rename to tests/passes/static-inference-promotion/multi-static.expect
index af49b1d0cf..78d60b674e 100644
--- a/tests/passes/static-promotion/multi-static.expect
+++ b/tests/passes/static-inference-promotion/multi-static.expect
@@ -20,7 +20,7 @@ component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
     }
   }
 }
-static<2> component exponent<"promoted"=1>(base: 32, exp: 4, @go go: 1, @clk clk: 1, @reset reset: 1) -> (out: 32, @done done: 1) {
+static<2> component exponent<"promoted"=1>(base: 32, exp: 4, @go @static(2) go: 1, @clk clk: 1, @reset reset: 1) -> (out: 32, @done done: 1) {
   cells {
     r1 = std_reg(32);
     r2 = std_reg(32);
diff --git a/tests/passes/static-promotion/multi-static.futil b/tests/passes/static-inference-promotion/multi-static.futil
similarity index 91%
rename from tests/passes/static-promotion/multi-static.futil
rename to tests/passes/static-inference-promotion/multi-static.futil
index 681ed1e239..53eed79bb9 100644
--- a/tests/passes/static-promotion/multi-static.futil
+++ b/tests/passes/static-inference-promotion/multi-static.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal
 import "primitives/core.futil";
 
 /**
diff --git a/tests/passes/static-inference-promotion/no_promote_loop.expect b/tests/passes/static-inference-promotion/no_promote_loop.expect
new file mode 100644
index 0000000000..26a22869ae
--- /dev/null
+++ b/tests/passes/static-inference-promotion/no_promote_loop.expect
@@ -0,0 +1,39 @@
+import "primitives/core.futil";
+component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    c = std_reg(2);
+  }
+  wires {
+    static<1> group A0 {
+      a.in = 2'd0;
+      a.write_en = 1'd1;
+    }
+    static<1> group B0 {
+      b.in = 2'd1;
+      b.write_en = 1'd1;
+    }
+    static<1> group C0 {
+      c.in = 2'd2;
+      c.write_en = 1'd1;
+    }
+  }
+  control {
+    seq {
+      repeat 10 {
+        @compactable static<4> seq  {
+          A0;
+          B0;
+          C0;
+          C0;
+        }
+      }
+      @compactable static<3> seq  {
+        A0;
+        B0;
+        C0;
+      }
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/no_promote_loop.futil b/tests/passes/static-inference-promotion/no_promote_loop.futil
similarity index 82%
rename from tests/passes/static-promotion/no_promote_loop.futil
rename to tests/passes/static-inference-promotion/no_promote_loop.futil
index 7279175049..72032baae5 100644
--- a/tests/passes/static-promotion/no_promote_loop.futil
+++ b/tests/passes/static-inference-promotion/no_promote_loop.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal -x static-promotion:cycle-limit=25
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal -x static-promotion:cycle-limit=25
 
 import "primitives/core.futil";
 
diff --git a/tests/passes/static-promotion/par.expect b/tests/passes/static-inference-promotion/par.expect
similarity index 100%
rename from tests/passes/static-promotion/par.expect
rename to tests/passes/static-inference-promotion/par.expect
diff --git a/tests/passes/static-inference-promotion/par.futil b/tests/passes/static-inference-promotion/par.futil
new file mode 100644
index 0000000000..8281537600
--- /dev/null
+++ b/tests/passes/static-inference-promotion/par.futil
@@ -0,0 +1,40 @@
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal
+import "primitives/core.futil";
+
+component main(go: 1, clk: 1) -> (done: 1) {
+  cells {
+    r0 = std_reg(1);
+    r1 = std_reg(1);
+    m0 = std_mem_d1(32, 1, 1);
+  }
+  wires {
+    group one_cycle {
+      r0.write_en = 1'd1;
+      one_cycle[done] = r0.done;
+    }
+    group two_cycles {
+      r0.write_en = 1'd1;
+      r1.write_en = r0.done;
+      two_cycles[done] = r1.done;
+    }
+    group mem_wrt_to_done {
+      m0.addr0 = 1'd0;
+      m0.write_data = 32'd5;
+      m0.write_en = 1'd1;
+      mem_wrt_to_done[done] = m0.done;
+    }
+    group mult_wrts_to_done {
+      r0.write_en = 1'd1;
+      mult_wrts_to_done[done] = r0.done ? 1'd1;
+    }
+  }
+
+  control {
+    par {
+      one_cycle;
+      two_cycles;
+      mult_wrts_to_done;
+      mem_wrt_to_done;
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/passes/static-promotion/promote-nested.expect b/tests/passes/static-inference-promotion/promote-nested.expect
similarity index 100%
rename from tests/passes/static-promotion/promote-nested.expect
rename to tests/passes/static-inference-promotion/promote-nested.expect
diff --git a/tests/passes/static-inference-promotion/promote-nested.futil b/tests/passes/static-inference-promotion/promote-nested.futil
new file mode 100644
index 0000000000..70179c4848
--- /dev/null
+++ b/tests/passes/static-inference-promotion/promote-nested.futil
@@ -0,0 +1,59 @@
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal -x static-promotion:threshold=5
+
+import "primitives/core.futil";
+
+component main() -> () {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    c = std_reg(2);
+    cond_reg = std_reg(1);
+    r0 = std_reg(2);
+  }
+
+  wires {
+    group A {
+      a.in = 2'd0;
+      a.write_en = 1'b1;
+      A[done] = a.done;
+    }
+
+    group B {
+      b.in = 2'd1;
+      b.write_en = 1'b1;
+      B[done] = b.done;
+    }
+
+    group C {
+      c.in = 2'd2;
+      c.write_en = 1'b1;
+      C[done] = c.done;
+    }
+
+    group no_upgrade {
+      r0.write_en = 1'd1;
+      no_upgrade[done] = r0.done ? 1'd1;
+    }
+  }
+
+  control {
+    seq {
+      seq {
+        par {A; B;}
+        seq {C; C;}
+        par {A; B;}
+      }
+      no_upgrade;
+      @bound(2) while cond_reg.out {
+        seq {
+          A;
+          B;
+          C;
+        }
+      }
+    }
+
+
+
+  }
+}
diff --git a/tests/passes/static-promotion/threshold.expect b/tests/passes/static-inference-promotion/threshold.expect
similarity index 80%
rename from tests/passes/static-promotion/threshold.expect
rename to tests/passes/static-inference-promotion/threshold.expect
index 976c597a77..66810105d2 100644
--- a/tests/passes/static-promotion/threshold.expect
+++ b/tests/passes/static-inference-promotion/threshold.expect
@@ -23,11 +23,11 @@ component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
     }
   }
   control {
-    @promote_static(4) seq {
-      @promote_static A;
-      @promote_static B;
-      @promote_static C;
-      @promote_static C;
+    seq {
+      A;
+      B;
+      C;
+      C;
     }
   }
 }
diff --git a/tests/passes/static-promotion/threshold.futil b/tests/passes/static-inference-promotion/threshold.futil
similarity index 82%
rename from tests/passes/static-promotion/threshold.futil
rename to tests/passes/static-inference-promotion/threshold.futil
index 10375bdecd..f254159ce8 100644
--- a/tests/passes/static-promotion/threshold.futil
+++ b/tests/passes/static-inference-promotion/threshold.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal -x static-promotion:threshold=4
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal -x static-promotion:threshold=4
 
 import "primitives/core.futil";
 
diff --git a/tests/passes/static-promotion/upgrade-bound.expect b/tests/passes/static-inference-promotion/upgrade-bound.expect
similarity index 100%
rename from tests/passes/static-promotion/upgrade-bound.expect
rename to tests/passes/static-inference-promotion/upgrade-bound.expect
diff --git a/tests/passes/static-promotion/upgrade-bound.futil b/tests/passes/static-inference-promotion/upgrade-bound.futil
similarity index 86%
rename from tests/passes/static-promotion/upgrade-bound.futil
rename to tests/passes/static-inference-promotion/upgrade-bound.futil
index b20e8fd383..40fc88bea2 100644
--- a/tests/passes/static-promotion/upgrade-bound.futil
+++ b/tests/passes/static-inference-promotion/upgrade-bound.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal
+// -p well-formed -p static-inference -p static-promotion -p dead-group-removal
 
 import "primitives/core.futil";
 
diff --git a/tests/passes/static-promotion/no_promote_loop.expect b/tests/passes/static-inference/component.expect
similarity index 60%
rename from tests/passes/static-promotion/no_promote_loop.expect
rename to tests/passes/static-inference/component.expect
index 2aa38ac2b5..bf2c58debe 100644
--- a/tests/passes/static-promotion/no_promote_loop.expect
+++ b/tests/passes/static-inference/component.expect
@@ -21,29 +21,9 @@ component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
       c.write_en = 1'd1;
       C[done] = c.done;
     }
-    static<1> group A0 {
-      a.in = 2'd0;
-      a.write_en = 1'd1;
-    }
-    static<1> group B0 {
-      b.in = 2'd1;
-      b.write_en = 1'd1;
-    }
-    static<1> group C0 {
-      c.in = 2'd2;
-      c.write_en = 1'd1;
-    }
   }
   control {
-    @promote_static(43) seq {
-      @promote_static(40) repeat 10 {
-        @compactable static<4> seq  {
-          A0;
-          B0;
-          C0;
-          C0;
-        }
-      }
+    @promote_static(3) seq {
       @promote_static A;
       @promote_static B;
       @promote_static C;
diff --git a/tests/passes/static-promotion/component.futil b/tests/passes/static-inference/component.futil
similarity index 88%
rename from tests/passes/static-promotion/component.futil
rename to tests/passes/static-inference/component.futil
index b9171d2c5c..43d857c8a9 100644
--- a/tests/passes/static-promotion/component.futil
+++ b/tests/passes/static-inference/component.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal
+// -p well-formed -p static-inference
 
 import "primitives/core.futil";
 
diff --git a/tests/passes/static-inference/groups.expect b/tests/passes/static-inference/groups.expect
new file mode 100644
index 0000000000..fba56d479d
--- /dev/null
+++ b/tests/passes/static-inference/groups.expect
@@ -0,0 +1,40 @@
+import "primitives/core.futil";
+component main(go: 1, clk: 1, @go go0: 1, @clk clk0: 1, @reset reset: 1) -> (done: 1, @done done0: 1) {
+  cells {
+    r0 = std_reg(1);
+    r1 = std_reg(1);
+    m0 = std_mem_d1(32, 1, 1);
+  }
+  wires {
+    group one_cycle<"promote_static"=1> {
+      r0.write_en = 1'd1;
+      one_cycle[done] = r0.done;
+    }
+    group two_cycles<"promote_static"=2> {
+      r0.write_en = 1'd1;
+      r1.write_en = r0.done;
+      two_cycles[done] = r1.done;
+    }
+    group mem_wrt_to_done<"promote_static"=1> {
+      m0.addr0 = 1'd0;
+      m0.write_data = 32'd5;
+      m0.write_en = 1'd1;
+      mem_wrt_to_done[done] = m0.done;
+    }
+    group mult_wrts_to_done {
+      r0.write_en = 1'd1;
+      mult_wrts_to_done[done] = r0.done ? 1'd1;
+    }
+  }
+  control {
+    seq {
+      @promote_static one_cycle;
+      @promote_static(2) two_cycles;
+      @promote_static mem_wrt_to_done;
+      mult_wrts_to_done;
+      @promote_static one_cycle;
+      mult_wrts_to_done;
+      @promote_static(2) two_cycles;
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/groups.futil b/tests/passes/static-inference/groups.futil
similarity index 93%
rename from tests/passes/static-promotion/groups.futil
rename to tests/passes/static-inference/groups.futil
index ab90109b17..f6571c0e17 100644
--- a/tests/passes/static-promotion/groups.futil
+++ b/tests/passes/static-inference/groups.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal
+// -p well-formed -p static-inference
 import "primitives/core.futil";
 
 component main(go: 1, clk: 1) -> (done: 1) {
diff --git a/tests/passes/static-inference/if-no-else.expect b/tests/passes/static-inference/if-no-else.expect
new file mode 100644
index 0000000000..a0c3588a27
--- /dev/null
+++ b/tests/passes/static-inference/if-no-else.expect
@@ -0,0 +1,19 @@
+import "primitives/core.futil";
+component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
+  cells {
+    a = std_reg(2);
+    cond = std_reg(1);
+  }
+  wires {
+    group A<"promote_static"=1> {
+      a.in = 2'd0;
+      a.write_en = 1'd1;
+      A[done] = a.done;
+    }
+  }
+  control {
+    @promote_static if cond.out {
+      @promote_static A;
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/if-no-else.futil b/tests/passes/static-inference/if-no-else.futil
similarity index 81%
rename from tests/passes/static-promotion/if-no-else.futil
rename to tests/passes/static-inference/if-no-else.futil
index 96250f8eac..42ae5f6112 100644
--- a/tests/passes/static-promotion/if-no-else.futil
+++ b/tests/passes/static-inference/if-no-else.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal
+// -p well-formed -p static-inference
 
 import "primitives/core.futil";
 
diff --git a/tests/passes/static-inference/invoke.expect b/tests/passes/static-inference/invoke.expect
new file mode 100644
index 0000000000..ae63e3984f
--- /dev/null
+++ b/tests/passes/static-inference/invoke.expect
@@ -0,0 +1,47 @@
+import "primitives/core.futil";
+component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
+  cells {
+    r = std_reg(32);
+    exp0 = exponent();
+  }
+  wires {
+    group upd0<"promote_static"=1> {
+      r.in = 32'd1;
+      r.write_en = 1'd1;
+      upd0[done] = r.done;
+    }
+  }
+  control {
+    @promote_static(3) seq {
+      @promote_static upd0;
+      @promote_static(2) invoke exp0(
+        base = r.out,
+        exp = r.out
+      )();
+    }
+  }
+}
+component exponent(base: 32, exp: 32, @go @static(2) go: 1, @clk clk: 1, @reset reset: 1) -> (out: 32, @done done: 1) {
+  cells {
+    r1 = std_reg(32);
+    r2 = std_reg(32);
+  }
+  wires {
+    group upd2<"promote_static"=1> {
+      r2.in = 32'd1;
+      r2.write_en = 1'd1;
+      upd2[done] = r2.done;
+    }
+    group upd1<"promote_static"=1> {
+      r1.in = 32'd1;
+      r1.write_en = 1'd1;
+      upd1[done] = r1.done;
+    }
+  }
+  control {
+    @promote_static(2) seq {
+      @promote_static upd1;
+      @promote_static upd2;
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/invoke.futil b/tests/passes/static-inference/invoke.futil
similarity index 93%
rename from tests/passes/static-promotion/invoke.futil
rename to tests/passes/static-inference/invoke.futil
index fa786f9e83..a3ea2f77de 100644
--- a/tests/passes/static-promotion/invoke.futil
+++ b/tests/passes/static-inference/invoke.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal
+// -p well-formed -p static-inference
 import "primitives/core.futil";
 
 /**
diff --git a/tests/passes/static-inference/par.expect b/tests/passes/static-inference/par.expect
new file mode 100644
index 0000000000..356396105e
--- /dev/null
+++ b/tests/passes/static-inference/par.expect
@@ -0,0 +1,37 @@
+import "primitives/core.futil";
+component main(go: 1, clk: 1, @go go0: 1, @clk clk0: 1, @reset reset: 1) -> (done: 1, @done done0: 1) {
+  cells {
+    r0 = std_reg(1);
+    r1 = std_reg(1);
+    m0 = std_mem_d1(32, 1, 1);
+  }
+  wires {
+    group one_cycle<"promote_static"=1> {
+      r0.write_en = 1'd1;
+      one_cycle[done] = r0.done;
+    }
+    group two_cycles<"promote_static"=2> {
+      r0.write_en = 1'd1;
+      r1.write_en = r0.done;
+      two_cycles[done] = r1.done;
+    }
+    group mem_wrt_to_done<"promote_static"=1> {
+      m0.addr0 = 1'd0;
+      m0.write_data = 32'd5;
+      m0.write_en = 1'd1;
+      mem_wrt_to_done[done] = m0.done;
+    }
+    group mult_wrts_to_done {
+      r0.write_en = 1'd1;
+      mult_wrts_to_done[done] = r0.done ? 1'd1;
+    }
+  }
+  control {
+    par {
+      @promote_static one_cycle;
+      @promote_static(2) two_cycles;
+      mult_wrts_to_done;
+      @promote_static mem_wrt_to_done;
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/par.futil b/tests/passes/static-inference/par.futil
similarity index 92%
rename from tests/passes/static-promotion/par.futil
rename to tests/passes/static-inference/par.futil
index 0020de7ba1..4dede68571 100644
--- a/tests/passes/static-promotion/par.futil
+++ b/tests/passes/static-inference/par.futil
@@ -1,4 +1,5 @@
-// -p well-formed -p static-promotion -p dead-group-removal
+// -p well-formed -p static-inference
+
 import "primitives/core.futil";
 
 component main(go: 1, clk: 1) -> (done: 1) {
diff --git a/tests/passes/static-inference/promote-nested.expect b/tests/passes/static-inference/promote-nested.expect
new file mode 100644
index 0000000000..bbb3e51b45
--- /dev/null
+++ b/tests/passes/static-inference/promote-nested.expect
@@ -0,0 +1,57 @@
+import "primitives/core.futil";
+component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    c = std_reg(2);
+    cond_reg = std_reg(1);
+    r0 = std_reg(2);
+  }
+  wires {
+    group A<"promote_static"=1> {
+      a.in = 2'd0;
+      a.write_en = 1'd1;
+      A[done] = a.done;
+    }
+    group B<"promote_static"=1> {
+      b.in = 2'd1;
+      b.write_en = 1'd1;
+      B[done] = b.done;
+    }
+    group C<"promote_static"=1> {
+      c.in = 2'd2;
+      c.write_en = 1'd1;
+      C[done] = c.done;
+    }
+    group no_upgrade {
+      r0.write_en = 1'd1;
+      no_upgrade[done] = r0.done ? 1'd1;
+    }
+  }
+  control {
+    seq {
+      @promote_static(4) seq {
+        @promote_static par {
+          @promote_static A;
+          @promote_static B;
+        }
+        @promote_static(2) seq {
+          @promote_static C;
+          @promote_static C;
+        }
+        @promote_static par {
+          @promote_static A;
+          @promote_static B;
+        }
+      }
+      no_upgrade;
+      @bound(2) @promote_static(6) while cond_reg.out {
+        @promote_static(3) seq {
+          @promote_static A;
+          @promote_static B;
+          @promote_static C;
+        }
+      }
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/promote-nested.futil b/tests/passes/static-inference/promote-nested.futil
similarity index 89%
rename from tests/passes/static-promotion/promote-nested.futil
rename to tests/passes/static-inference/promote-nested.futil
index e3a49fbd50..acc8e181e1 100644
--- a/tests/passes/static-promotion/promote-nested.futil
+++ b/tests/passes/static-inference/promote-nested.futil
@@ -1,4 +1,4 @@
-// -p well-formed -p static-promotion -p dead-group-removal -x static-promotion:threshold=5
+// -p well-formed -p static-inference
 
 import "primitives/core.futil";
 
diff --git a/tests/passes/static-inference/promote-repeat.expect b/tests/passes/static-inference/promote-repeat.expect
new file mode 100644
index 0000000000..c1d43dec1e
--- /dev/null
+++ b/tests/passes/static-inference/promote-repeat.expect
@@ -0,0 +1,48 @@
+import "primitives/core.futil";
+component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    c = std_reg(2);
+  }
+  wires {
+    group A<"promote_static"=1> {
+      a.in = 2'd0;
+      a.write_en = 1'd1;
+      A[done] = a.done;
+    }
+    group B<"promote_static"=1> {
+      b.in = 2'd1;
+      b.write_en = 1'd1;
+      B[done] = b.done;
+    }
+    group C<"promote_static"=1> {
+      c.in = 2'd2;
+      c.write_en = 1'd1;
+      C[done] = c.done;
+    }
+  }
+  control {
+    @promote_static(43) seq {
+      @promote_static(40) repeat 10 {
+        @promote_static(4) seq {
+          @promote_static A;
+          @promote_static B;
+          @promote_static C;
+          @promote_static C;
+        }
+      }
+      @promote_static(3) par {
+        @promote_static(2) seq {
+          @promote_static A;
+          @promote_static B;
+        }
+        @promote_static(3) seq {
+          @promote_static C;
+          @promote_static C;
+          @promote_static C;
+        }
+      }
+    }
+  }
+}
diff --git a/tests/passes/static-inference/promote-repeat.futil b/tests/passes/static-inference/promote-repeat.futil
new file mode 100644
index 0000000000..043bb0839c
--- /dev/null
+++ b/tests/passes/static-inference/promote-repeat.futil
@@ -0,0 +1,43 @@
+// -p well-formed -p static-inference
+
+import "primitives/core.futil";
+
+component main() -> () {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    c = std_reg(2);
+  }
+
+  wires {
+    group A {
+      a.in = 2'd0;
+      a.write_en = 1'b1;
+      A[done] = a.done;
+    }
+
+    group B {
+      b.in = 2'd1;
+      b.write_en = 1'b1;
+      B[done] = b.done;
+    }
+
+    group C {
+      c.in = 2'd2;
+      c.write_en = 1'b1;
+      C[done] = c.done;
+    }
+  }
+
+  control {
+    seq {
+      repeat 10 {
+        seq { A; B; C; C;}
+      }
+      par {
+        seq {A; B;}
+        seq {C; C; C;}
+      }
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/fixup-necessary.expect b/tests/passes/static-promotion/fixup-necessary.expect
new file mode 100644
index 0000000000..34bac1aa0b
--- /dev/null
+++ b/tests/passes/static-promotion/fixup-necessary.expect
@@ -0,0 +1,61 @@
+import "primitives/core.futil";
+component foo(base: 32, exp: 4, @go go: 1, @clk clk: 1, @reset reset: 1) -> (out: 32, @done done: 1) {
+  cells {
+    r1 = std_reg(32);
+    r2 = std_reg(32);
+  }
+  wires {
+    group upd2<"promote_static"=1> {
+      r2.in = 32'd1;
+      r2.write_en = 1'd1;
+      upd2[done] = r2.done;
+    }
+    group upd1<"promote_static"=1> {
+      r1.in = 32'd1;
+      r1.write_en = 1'd1;
+      upd1[done] = r1.done;
+    }
+  }
+  control {
+    seq {
+      upd1;
+      upd2;
+    }
+  }
+}
+component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    foo_inst = foo();
+  }
+  wires {
+    group F {
+      foo_inst.go = 1'd1;
+      F[done] = foo_inst.done;
+    }
+    static<1> group A0 {
+      a.in = 2'd0;
+      a.write_en = 1'd1;
+    }
+    static<1> group B0 {
+      b.in = 2'd1;
+      b.write_en = 1'd1;
+    }
+  }
+  control {
+    seq {
+      @compactable static<8> seq  {
+        A0;
+        A0;
+        A0;
+        A0;
+        B0;
+        B0;
+        B0;
+        B0;
+      }
+      F;
+    }
+  }
+}
diff --git a/tests/passes/static-promotion/fixup-necessary.futil b/tests/passes/static-promotion/fixup-necessary.futil
new file mode 100644
index 0000000000..49ddd85e61
--- /dev/null
+++ b/tests/passes/static-promotion/fixup-necessary.futil
@@ -0,0 +1,64 @@
+// -p well-formed -p static-promotion -x static-promotion:threshold=5 -p dead-group-removal
+
+import "primitives/core.futil";
+
+component foo(base: 32, exp: 4, @go @static(2) go: 1, @clk clk: 1, @reset reset: 1) -> (out: 32, @done done: 1) {
+  cells {
+    r1 = std_reg(32);
+    r2 = std_reg(32);
+  }
+  wires {
+    group upd2<"promote_static"=1> {
+      r2.in = 32'd1;
+      r2.write_en = 1'd1;
+      upd2[done] = r2.done;
+    }
+    group upd1<"promote_static"=1> {
+      r1.in = 32'd1;
+      r1.write_en = 1'd1;
+      upd1[done] = r1.done;
+    }
+  }
+  control {
+    @promote_static(2) seq {
+      @promote_static upd1;
+      @promote_static upd2;
+    }
+  }
+}
+component main(@go go: 1, @clk clk: 1, @reset reset: 1) -> (@done done: 1) {
+  cells {
+    a = std_reg(2);
+    b = std_reg(2);
+    foo_inst = foo();
+  }
+  wires {
+    group A<"promote_static"=1> {
+      a.in = 2'd0;
+      a.write_en = 1'd1;
+      A[done] = a.done;
+    }
+    group B<"promote_static"=1> {
+      b.in = 2'd1;
+      b.write_en = 1'd1;
+      B[done] = b.done;
+    }
+    group F<"promote_static"=2> {
+      foo_inst.go = 1'd1;
+      F[done] = foo_inst.done;
+    }
+  }
+  control {
+    @promote_static(10) seq {
+      @promote_static A;
+      @promote_static A;
+      @promote_static A;
+      @promote_static A;
+      @promote_static B;
+      @promote_static B;
+      @promote_static B;
+      @promote_static B;
+      @promote_static(2) F;
+    }
+  }
+}
\ No newline at end of file
diff --git a/tools/firrtl/generate-firrtl-with-primitives.py b/tools/firrtl/generate-firrtl-with-primitives.py
new file mode 100644
index 0000000000..b6d2163936
--- /dev/null
+++ b/tools/firrtl/generate-firrtl-with-primitives.py
@@ -0,0 +1,65 @@
+import json
+import os
+import sys
+
+# Generates a map where `key` should be replaced with `value`
+def generate_replacement_map(inst):
+    replacement_map = {}
+    for param in inst["params"]:
+        replacement_map[param["param_name"]] = param["param_value"]
+
+    # Special primitives that have a value dependent on their parameters.
+    if inst["name"] == "std_pad":
+        replacement_map["DIFF"] = replacement_map["OUT_WIDTH"] - replacement_map["IN_WIDTH"]
+    elif inst["name"] == "std_slice":
+        replacement_map["DIFF"] = replacement_map["IN_WIDTH"] - replacement_map["OUT_WIDTH"]
+
+    return replacement_map
+
+# Retrieves the appropriate template file for the given primitive
+def retrieve_firrtl_template(primitive_name):
+    firrtl_file_path = os.path.join(sys.path[0], "templates", primitive_name + ".fir")
+    if not(os.path.isfile(firrtl_file_path)):
+        print(f"{sys.argv[0]}: FIRRTL template file for primitive {primitive_name} does not exist! Exiting...")
+        sys.exit(1)
+    return firrtl_file_path
+
+# Generates a primitive definition from the provided JSON data of a unique primitive use
+def generate_primitive_definition(inst):
+    template_filename = retrieve_firrtl_template(inst["name"])
+    replacement_map = generate_replacement_map(inst)
+
+    with open(template_filename, "r") as template_file:
+        for line in template_file:
+            for key in replacement_map:
+                line = line.replace(key, str(replacement_map[key]))
+            print(line.rstrip())
+    print() # whitespace to buffer between modules
+
+# Generates a complete FIRRTL program with primitives.
+def generate(firrtl_filename, primitive_uses_filename):
+    firrtl_file = open(firrtl_filename)
+    primitive_uses_file = open(primitive_uses_filename)
+    # The first line contains the circuit name, which needs to come before the primitives.
+    print(firrtl_file.readline().rstrip())
+    # Display the primitive definitions.
+    primitive_insts = json.load(primitive_uses_file)
+    if primitive_insts:
+        for inst in primitive_insts:
+            generate_primitive_definition(inst)
+    # Display the rest of the FIRRTL program.
+    for line in firrtl_file.readlines():
+        print(line.rstrip())
+
+def main():
+    if len(sys.argv) != 3:
+        args_desc = [                                                                                                                                                                   
+            "FIRRTL_FILE",
+            "PRIMITIVE_USES_JSON"
+        ]
+        print(f"Usage: {sys.argv[0]} {' '.join(args_desc)}")                                                                                                                           
+        return 1
+    generate(sys.argv[1], sys.argv[2])
+
+if __name__ == '__main__':
+        main()
\ No newline at end of file
diff --git a/tools/firrtl/templates/std_add.fir b/tools/firrtl/templates/std_add.fir
new file mode 100644
index 0000000000..505f569bbb
--- /dev/null
+++ b/tools/firrtl/templates/std_add.fir
@@ -0,0 +1,6 @@
+    module std_add_WIDTH:
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= add(left, right)
diff --git a/tools/firrtl/templates/std_and.fir b/tools/firrtl/templates/std_and.fir
new file mode 100644
index 0000000000..93c46f98b7
--- /dev/null
+++ b/tools/firrtl/templates/std_and.fir
@@ -0,0 +1,6 @@
+    module std_and_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= and(left, right)
diff --git a/tools/firrtl/templates/std_const.fir b/tools/firrtl/templates/std_const.fir
new file mode 100644
index 0000000000..27a66a93df
--- /dev/null
+++ b/tools/firrtl/templates/std_const.fir
@@ -0,0 +1,3 @@
+    module std_const_WIDTH_VAL :
+        output out : UInt<WIDTH>
+        out <= UInt(VALUE)
diff --git a/tools/firrtl/templates/std_eq.fir b/tools/firrtl/templates/std_eq.fir
new file mode 100644
index 0000000000..8601d9b9d4
--- /dev/null
+++ b/tools/firrtl/templates/std_eq.fir
@@ -0,0 +1,6 @@
+    module std_eq_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<1>
+
+        out <= eq(left, right)
diff --git a/tools/firrtl/templates/std_ge.fir b/tools/firrtl/templates/std_ge.fir
new file mode 100644
index 0000000000..0aace3aedc
--- /dev/null
+++ b/tools/firrtl/templates/std_ge.fir
@@ -0,0 +1,6 @@
+    module std_ge_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<1>
+
+        out <= ge(left, right)
diff --git a/tools/firrtl/templates/std_gt.fir b/tools/firrtl/templates/std_gt.fir
new file mode 100644
index 0000000000..49b9f4ef92
--- /dev/null
+++ b/tools/firrtl/templates/std_gt.fir
@@ -0,0 +1,6 @@
+    module std_gt_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<1>
+
+        out <= gt(left, right)
diff --git a/tools/firrtl/templates/std_le.fir b/tools/firrtl/templates/std_le.fir
new file mode 100644
index 0000000000..6550884ad4
--- /dev/null
+++ b/tools/firrtl/templates/std_le.fir
@@ -0,0 +1,6 @@
+    module std_le_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<1>
+
+        out <= le(left, right)
diff --git a/tools/firrtl/templates/std_lsh.fir b/tools/firrtl/templates/std_lsh.fir
new file mode 100644
index 0000000000..934883c083
--- /dev/null
+++ b/tools/firrtl/templates/std_lsh.fir
@@ -0,0 +1,6 @@
+    module std_lsh_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= dshl(left, right)
diff --git a/tools/firrtl/templates/std_lt.fir b/tools/firrtl/templates/std_lt.fir
new file mode 100644
index 0000000000..8c580049e9
--- /dev/null
+++ b/tools/firrtl/templates/std_lt.fir
@@ -0,0 +1,6 @@
+    module std_lt_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<1>
+
+        out <= lt(left, right)
diff --git a/tools/firrtl/templates/std_mem_d1.fir b/tools/firrtl/templates/std_mem_d1.fir
new file mode 100644
index 0000000000..83bbd340b0
--- /dev/null
+++ b/tools/firrtl/templates/std_mem_d1.fir
@@ -0,0 +1,34 @@
+    module std_mem_d1_WIDTH_SIZE_IDX_SIZE :
+        input add0 : UInt<IDX_SIZE>
+        input write_data : UInt<WIDTH>
+        input write_en : UInt<1>
+        input clk : Clock
+        input reset : UInt<1>
+        output read_data : UInt<WIDTH>
+        output done : UInt<1>
+
+        mem internal_mem :
+            data-type => UInt<WIDTH>
+            depth => SIZE
+            read-latency => 1
+            write-latency => 1
+            reader => internal_read
+            writer => internal_write
+            read-under-write => undefined
+
+        ; read from memory
+        internal_mem.internal_read.addr <= add0
+        internal_mem.internal_read.en <= UInt(1)
+        internal_mem.internal_read.clk <= clk
+        read_data <= internal_mem.internal_read.data
+
+        ; write to memory
+        internal_mem.internal_write.addr <= add0
+        internal_mem.internal_write.en <= write_en
+        internal_mem.internal_write.clk <= clk
+        internal_mem.internal_write.data <= write_data
+        internal_mem.internal_write.mask <= UInt(1) ; unclear
+        when eq(write_en, UInt(1)):
+            done <= UInt(1)
+        else:
+            done <= UInt(0)
diff --git a/tools/firrtl/templates/std_neq.fir b/tools/firrtl/templates/std_neq.fir
new file mode 100644
index 0000000000..7b6d03adac
--- /dev/null
+++ b/tools/firrtl/templates/std_neq.fir
@@ -0,0 +1,6 @@
+    module std_neq_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<1>
+
+        out <= neq(left, right)
diff --git a/tools/firrtl/templates/std_not.fir b/tools/firrtl/templates/std_not.fir
new file mode 100644
index 0000000000..b0ead92746
--- /dev/null
+++ b/tools/firrtl/templates/std_not.fir
@@ -0,0 +1,5 @@
+    module std_not_WIDTH :
+        input in : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= neg(in)
diff --git a/tools/firrtl/templates/std_or.fir b/tools/firrtl/templates/std_or.fir
new file mode 100644
index 0000000000..ab29cf9e65
--- /dev/null
+++ b/tools/firrtl/templates/std_or.fir
@@ -0,0 +1,6 @@
+    module std_or_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= or(left, right)
diff --git a/tools/firrtl/templates/std_pad.fir b/tools/firrtl/templates/std_pad.fir
new file mode 100644
index 0000000000..7d8696d067
--- /dev/null
+++ b/tools/firrtl/templates/std_pad.fir
@@ -0,0 +1,5 @@
+    module std_pad_IN_WIDTH_OUT_WIDTH:
+        input in : UInt<IN_WIDTH>
+        output out : UInt<OUT_WIDTH>
+
+        out <= pad(in, DIFF) ; DIFF should be OUT_WIDTH - IN_WIDTH.
diff --git a/tools/firrtl/templates/std_reg.fir b/tools/firrtl/templates/std_reg.fir
new file mode 100644
index 0000000000..fe9a4192f4
--- /dev/null
+++ b/tools/firrtl/templates/std_reg.fir
@@ -0,0 +1,19 @@
+    module std_reg_WIDTH :
+        input in : UInt<WIDTH>
+        input write_en : UInt<1>
+        input clk : Clock
+        input reset : UInt<1>
+        output out : UInt<WIDTH>
+        output done : UInt<1>
+
+        reg internal_reg : UInt<WIDTH>, clk
+        out <= UInt(0)
+        when eq(write_en, UInt(1)):
+            out <= in
+            done <= UInt(1)
+        else:
+            when eq(reset, UInt(1)):
+                done <= UInt(0)
+                out <= UInt(0)
+            else:
+                done <= UInt(0)
diff --git a/tools/firrtl/templates/std_rsh.fir b/tools/firrtl/templates/std_rsh.fir
new file mode 100644
index 0000000000..15999fb5b4
--- /dev/null
+++ b/tools/firrtl/templates/std_rsh.fir
@@ -0,0 +1,6 @@
+    module std_rsh_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= dshr(left, right)
diff --git a/tools/firrtl/templates/std_slice.fir b/tools/firrtl/templates/std_slice.fir
new file mode 100644
index 0000000000..a5bd50943c
--- /dev/null
+++ b/tools/firrtl/templates/std_slice.fir
@@ -0,0 +1,5 @@
+    module std_slice_IN_WIDTH_OUT_WIDTH:
+        input in : UInt<IN_WIDTH>
+        output out : UInt<OUT_WIDTH>
+
+        out <= head(in, UInt(DIFF)) ; DIFF should be IN_WIDTH - OUT_WIDTH
diff --git a/tools/firrtl/templates/std_sub.fir b/tools/firrtl/templates/std_sub.fir
new file mode 100644
index 0000000000..49523801e0
--- /dev/null
+++ b/tools/firrtl/templates/std_sub.fir
@@ -0,0 +1,6 @@
+    module std_sub_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= sub(left, right)
diff --git a/tools/firrtl/templates/std_wire.fir b/tools/firrtl/templates/std_wire.fir
new file mode 100644
index 0000000000..e43f570259
--- /dev/null
+++ b/tools/firrtl/templates/std_wire.fir
@@ -0,0 +1,5 @@
+    module std_wire_WIDTH :
+        input in : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= in
diff --git a/tools/firrtl/templates/std_xor.fir b/tools/firrtl/templates/std_xor.fir
new file mode 100644
index 0000000000..f7805b5370
--- /dev/null
+++ b/tools/firrtl/templates/std_xor.fir
@@ -0,0 +1,6 @@
+    module std_xor_WIDTH :
+        input left : UInt<WIDTH>
+        input right : UInt<WIDTH>
+        output out : UInt<WIDTH>
+
+        out <= xor(left, right)
diff --git a/tools/firrtl/templates/undef.fir b/tools/firrtl/templates/undef.fir
new file mode 100644
index 0000000000..f2d81bec49
--- /dev/null
+++ b/tools/firrtl/templates/undef.fir
@@ -0,0 +1,3 @@
+    module undef_WIDTH :
+        output out : UInt<WIDTH>
+        out is invalid
diff --git a/yxi/axi-calyx/axi-combined-calyx.futil b/yxi/axi-calyx/axi-combined-calyx.futil
index 0782cf03d8..f28f30d2db 100644
--- a/yxi/axi-calyx/axi-combined-calyx.futil
+++ b/yxi/axi-calyx/axi-combined-calyx.futil
@@ -47,10 +47,10 @@ component m_arread_channel(
       is_arvalid = std_reg(1);
 
       // gets set high with ARVALID and remains high
-      arvalid_was_high = std_reg(1);
+      ar_handshake_occurred = std_reg(1);
       // TODO(nathanielnrn): should arguably eventually live in `s_axi_control`
       // but for now will live here.
-      ref base_addr = std_reg(64);
+      ref curr_addr_axi = std_reg(64);
       
       // number of trasfers in a transaction. This is sent to subordinate
       txn_len = std_reg(8);
@@ -78,24 +78,21 @@ component m_arread_channel(
       // this contains blocking logic previously in its own group
       group do_ar_transfer {
           //assert ARVALID as long as this is the first time we are asserting it
-          is_arvalid.in = !arvalid_was_high.out ? 1'b1;
+          is_arvalid.in = !ar_handshake_occurred.out ? 1'b1;
           
-          // TODO(nathanielnrn): in theory should be able to get rid of arvalid_was_high
-          // but for now we will be explicit and reduce this in generation maybe. Not sure
-          // it even matters.
 
           // This makes ARVALID go low after a single cycle.
           // Without it it stays high for 2 cycles
           // See issue #1828: https://github.com/calyxir/calyx/issues/1828
-          is_arvalid.in = is_arvalid.out & ARREADY & arvalid_was_high.out ? 1'b0;
+          is_arvalid.in = is_arvalid.out & ARREADY & ar_handshake_occurred.out ? 1'b0;
           is_arvalid.write_en = 1'b1;
 
-          arvalid_was_high.in = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;
-          arvalid_was_high.write_en = !(is_arvalid.out & ARREADY) & !arvalid_was_high.out ? 1'b1;
+          ar_handshake_occurred.in = !(is_arvalid.out & ARREADY) & !ar_handshake_occurred.out ? 1'b1;
+          ar_handshake_occurred.write_en = !(is_arvalid.out & ARREADY) & !ar_handshake_occurred.out ? 1'b1;
 
 
           // drive output signals for transfer  
-          ARADDR = base_addr.out;
+          ARADDR = curr_addr_axi.out;
           // see link above, needs to match data width to host.
           // In this case 2^2 = 4 bytes = 32 bits = width of our data_bus.
           ARSIZE = 3'b010; 
@@ -143,7 +140,7 @@ component m_arread_channel(
               seq{
                   par {
                       invoke bt_reg(in=1'b0)();
-                      invoke arvalid_was_high(in=1'b0)();
+                      invoke ar_handshake_occurred(in=1'b0)();
                   }
                   do_ar_transfer;
                   invoke is_arvalid(in=1'b0)();
@@ -174,9 +171,9 @@ component m_read_channel(
       // on the data we read from cocotb
       ref data_received = seq_mem_d1(32, 8, 64);
       is_rdy = std_reg(1);
-      ref curr_addr = std_reg(64);
+      ref curr_addr_internal_mem = std_reg(64);
       //need to increment this
-      ref base_addr = std_reg(64);
+      ref curr_addr_axi = std_reg(64);
       
       // registered because RLAST is high with last transfer, not after
       // before this was registered we were terminating immediately with
@@ -187,8 +184,8 @@ component m_read_channel(
       read_data_reg = std_reg(32);
 
       //address of seq_d1_mem we are writing to
-      curr_addr_adder = std_add(64);
-      base_addr_adder = std_add(64);
+      curr_addr_internal_mem_adder = std_add(64);
+      curr_addr_axi_adder = std_add(64);
 
       // block_transfer reg to avoid combinational loops
       // Used to block any servicing until handshake occurs. 
@@ -246,27 +243,27 @@ component m_read_channel(
           is_rdy.write_en = 1'b1;
 
           //write the data we received during transfer to seq_d1_mem
-          data_received.addr0 = curr_addr.out;
+          data_received.addr0 = curr_addr_internal_mem.out;
           data_received.write_en = 1'b1;
           data_received.write_data = read_data_reg.out;
           receive_r_transfer[done] = data_received.write_done;
 
       }
 
-      group incr_curr_addr{
-          curr_addr_adder.left = 64'd1 ;
-          curr_addr_adder.right = curr_addr.out;
-          curr_addr.in = curr_addr_adder.out;
-          curr_addr.write_en = 1'b1;
-          incr_curr_addr[done] = curr_addr.done;
+      group incr_curr_addr_internal_mem{
+          curr_addr_internal_mem_adder.left = 64'd1 ;
+          curr_addr_internal_mem_adder.right = curr_addr_internal_mem.out;
+          curr_addr_internal_mem.in = curr_addr_internal_mem_adder.out;
+          curr_addr_internal_mem.write_en = 1'b1;
+          incr_curr_addr_internal_mem[done] = curr_addr_internal_mem.done;
       }
 
-      group incr_base_addr{
-          base_addr_adder.left = 64'd4; //32-bit/8. TODO:parameterize via mem width
-          base_addr_adder.right = base_addr.out;
-          base_addr.in = base_addr_adder.out;
-          base_addr.write_en= 1'b1;
-          incr_base_addr[done] = base_addr.done;
+      group incr_curr_addr_axi{
+          curr_addr_axi_adder.left = 64'd4; //32-bit/8. TODO:parameterize via mem width
+          curr_addr_axi_adder.right = curr_addr_axi.out;
+          curr_addr_axi.in = curr_addr_axi_adder.out;
+          curr_addr_axi.write_en= 1'b1;
+          incr_curr_addr_axi[done] = curr_addr_axi.done;
       }
   }
   control{
@@ -277,8 +274,8 @@ component m_read_channel(
               block_transfer;
               receive_r_transfer;
               par{
-                  incr_curr_addr;
-                  incr_base_addr;
+                  incr_curr_addr_internal_mem;
+                  incr_curr_addr_axi;
               }
           }
       }
@@ -412,10 +409,10 @@ component m_awwrite_channel(
       is_awvalid = std_reg(1);
 
       // gets set high with AWVALID and remains high
-      awvalid_was_high = std_reg(1);
+      aw_handshake_occurred = std_reg(1);
       // TODO(nathanielnrn): should arguably eventually live in `s_axi_control`
       // but for now will live here.
-      ref base_addr = std_reg(64);
+      ref curr_addr_axi = std_reg(64);
 
       //we write to this here and read from it in m_write_channel
       ref max_trnsfrs = std_reg(8);
@@ -446,21 +443,21 @@ component m_awwrite_channel(
       // this contains blocking logic previously in its own group
       group do_aw_transfer {
           //assert AWVALID
-          is_awvalid.in = !awvalid_was_high.out ? 1'b1;
+          is_awvalid.in = !(is_awvalid.out & AWREADY) & !aw_handshake_occurred.out ? 1'b1;
           
-          // TODO(nathanielnrn): in theory should be able to get rid of awvalid_was_high
+          // TODO(nathanielnrn): in theory should be able to get rid of aw_handshake_occurred
           // but for now we will be explicit and reduce this in generation maybe. Not sure
           // it even matters.
           // This makes AWVALID go low after a single cycle. Without it it stays high for 2.
-          is_awvalid.in = is_awvalid.out & AWREADY & awvalid_was_high.out ? 1'b0;
+          is_awvalid.in = (is_awvalid.out & AWREADY) | aw_handshake_occurred.out ? 1'b0;
           is_awvalid.write_en = 1'b1;
 
-          awvalid_was_high.in = !(is_awvalid.out & AWREADY) & !awvalid_was_high.out ? 1'b1;
-          awvalid_was_high.write_en = !(is_awvalid.out & AWREADY) & !awvalid_was_high.out ? 1'b1;
+          aw_handshake_occurred.in = is_awvalid.out & AWREADY ? 1'b1;
+          aw_handshake_occurred.write_en = !aw_handshake_occurred.out ? 1'b1;
 
 
           // drive output signals for transfer  
-          AWADDR = base_addr.out;
+          AWADDR = curr_addr_axi.out;
           // see link above, needs to match data width to host.
           // In this case 2^2 = 4 bytes = 32 bits = width of our data_bus.
           AWSIZE = 3'b010; 
@@ -514,7 +511,7 @@ component m_awwrite_channel(
               seq{
                   par {
 		            invoke bt_reg(in=1'b0)();
-		            invoke awvalid_was_high(in=1'b0)();
+		            invoke aw_handshake_occurred(in=1'b0)();
                   }
                   do_aw_transfer;
 		          invoke is_awvalid(in=1'b0)();
@@ -540,10 +537,10 @@ component m_write_channel(
       // on the data we read from cocotb
       ref internal_mem = seq_mem_d1(32, 8, 64);
       wvalid = std_reg(1);
-      wvalid_was_high = std_reg(1);
+      w_handshake_occurred = std_reg(1);
       // used internally to access our seq_mem_d1
-      ref curr_addr = std_reg(64);
-      ref base_addr = std_reg(64);
+      ref curr_addr_internal_mem = std_reg(64);
+      ref curr_addr_axi = std_reg(64);
       
       //this increments
       curr_trnsfr_count = std_reg(8); //between 0 and 255, add +1 for transfer count
@@ -556,8 +553,8 @@ component m_write_channel(
       n_finished_last_trnsfr = std_reg(1);
 
       //used for address of seq_d1_mem we are reading from
-      curr_addr_adder = std_add(64);
-      base_addr_adder = std_add(64);
+      curr_addr_internal_mem_adder = std_add(64);
+      curr_addr_axi_adder = std_add(64);
       curr_trnsfr_count_adder = std_add(8);
 
 
@@ -580,18 +577,18 @@ component m_write_channel(
 
         //NOTE: wvalid.in = 1'b1; does not work, it leaves WVALID high for 2 cycles
         // this both asserts and deasserts one cycle later
-        wvalid.in = !(wvalid.out & WREADY & wvalid_was_high.out) ? 1'b1;
-        // TODO(nathanielnrn): Can prob get rid of wvalid_was_high
-        wvalid.in = (wvalid.out & WREADY) & wvalid_was_high.out ? 1'b0;
+        wvalid.in = !(wvalid.out & WREADY) & !w_handshake_occurred.out ? 1'b1;
+        // TODO(nathanielnrn): Can prob get rid of w_handshake_occurred
+        wvalid.in = (wvalid.out & WREADY) | w_handshake_occurred.out ? 1'b0;
         wvalid.write_en = 1'b1;
 
         //set to 1 after valid has been high even once
-        wvalid_was_high.in = !(wvalid.out & WREADY & wvalid_was_high.out) ? 1'b1;
-        wvalid_was_high.write_en = !(wvalid.out & WREADY & wvalid_was_high.out) ? 1'b1;
+        w_handshake_occurred.in = wvalid.out & WREADY ? 1'b1;
+        w_handshake_occurred.write_en = !w_handshake_occurred.out ? 1'b1;
 
      
-        // set data output based on curr_addr register
-        internal_mem.addr0 = curr_addr.out;
+        // set data output based on curr_addr_internal_mem register
+        internal_mem.addr0 = curr_addr_internal_mem.out;
         internal_mem.read_en = 1'b1;
         WDATA = internal_mem.read_data;
         
@@ -612,20 +609,20 @@ component m_write_channel(
         do_write_transfer[done] = bt_reg.out;
       }
 
-      group incr_curr_addr{
-          curr_addr_adder.left = 64'd1 ;
-          curr_addr_adder.right = curr_addr.out;
-          curr_addr.in = curr_addr_adder.out;
-          curr_addr.write_en = 1'b1;
-          incr_curr_addr[done] = curr_addr.done;
+      group incr_curr_addr_internal_mem{
+          curr_addr_internal_mem_adder.left = 64'd1 ;
+          curr_addr_internal_mem_adder.right = curr_addr_internal_mem.out;
+          curr_addr_internal_mem.in = curr_addr_internal_mem_adder.out;
+          curr_addr_internal_mem.write_en = 1'b1;
+          incr_curr_addr_internal_mem[done] = curr_addr_internal_mem.done;
       }
       
-      group incr_base_addr{
-          base_addr_adder.left = 64'd4; //32-bit/8. TODO:parameterize via mem width
-          base_addr_adder.right = base_addr.out;
-          base_addr.in = base_addr_adder.out;
-          base_addr.write_en= 1'b1;
-          incr_base_addr[done] = base_addr.done;
+      group incr_curr_addr_axi{
+          curr_addr_axi_adder.left = 64'd4; //32-bit/8. TODO:parameterize via mem width
+          curr_addr_axi_adder.right = curr_addr_axi.out;
+          curr_addr_axi.in = curr_addr_axi_adder.out;
+          curr_addr_axi.write_en= 1'b1;
+          incr_curr_addr_axi[done] = curr_addr_axi.done;
       }
 
       group incr_curr_trnsfr_count {
@@ -640,16 +637,17 @@ component m_write_channel(
   control{
       seq{
 
-        invoke curr_addr(in=64'b0)(); //reset curr_addr
+        invoke curr_addr_internal_mem(in=64'b0)(); //reset curr_addr_internal_mem
 	    invoke n_finished_last_trnsfr(in=1'b1)(); //init reg
         while n_finished_last_trnsfr.out{
           seq{
 	          invoke bt_reg(in=1'b0)();
               do_write_transfer;
               par{
-                incr_curr_addr;
+                incr_curr_addr_internal_mem;
                 incr_curr_trnsfr_count;
-                incr_base_addr;
+                incr_curr_addr_axi;
+                invoke w_handshake_occurred(in=1'b0)();
               }
           }
         }
@@ -859,12 +857,12 @@ component main(
 
         //original read stuff
 
-        curr_addr_A0 = std_reg(64);
-        base_addr_A0 = std_reg(64);
-        curr_addr_B0 = std_reg(64);
-        base_addr_B0 = std_reg(64);
-        curr_addr_Sum0 = std_reg(64);
-        base_addr_Sum0 = std_reg(64);
+        curr_addr_internal_mem_A0 = std_reg(64);
+        curr_addr_axi_A0 = std_reg(64);
+        curr_addr_internal_mem_B0 = std_reg(64);
+        curr_addr_axi_B0 = std_reg(64);
+        curr_addr_internal_mem_Sum0 = std_reg(64);
+        curr_addr_axi_Sum0 = std_reg(64);
 
         A0_read_channel = m_read_channel();
         A0_arread_channel = m_arread_channel();
@@ -923,19 +921,19 @@ component main(
         seq{
             //read stuff
             par{
-                //init base_addresses
+                //init curr_addr_axiesses
                 //TODO: get this from kernel.xml
-                invoke base_addr_A0(in = 64'x1000)();
-                invoke base_addr_B0(in = 64'x1000)();
-                invoke base_addr_Sum0(in = 64'x1000)();
-                invoke curr_addr_A0(in = 64'x0000)();
-                invoke curr_addr_B0(in = 64'x0000)();
-                invoke curr_addr_Sum0(in = 64'x0000)();
+                invoke curr_addr_axi_A0(in = 64'x1000)();
+                invoke curr_addr_axi_B0(in = 64'x1000)();
+                invoke curr_addr_axi_Sum0(in = 64'x1000)();
+                invoke curr_addr_internal_mem_A0(in = 64'x0000)();
+                invoke curr_addr_internal_mem_B0(in = 64'x0000)();
+                invoke curr_addr_internal_mem_Sum0(in = 64'x0000)();
             }
             par{
               seq{
                 //A0 reads
-                invoke A0_arread_channel[base_addr = base_addr_A0]
+                invoke A0_arread_channel[curr_addr_axi = curr_addr_axi_A0]
                 (
                 ARESET = m0_ARESET,
                 ARREADY = m0_ARREADY
@@ -948,9 +946,9 @@ component main(
                 ARBURST = m0_ARBURST
                 );
 
-                //invoke curr_addr_A0(in = base_addr_A0.out)(); //set curr_addr to base_address
+                //invoke curr_addr_internal_mem_A0(in = curr_addr_axi_A0.out)(); //set curr_addr_internal_mem to curr_addr_axiess
 
-                invoke A0_read_channel[data_received = A0, curr_addr = curr_addr_A0, base_addr = base_addr_A0]
+                invoke A0_read_channel[data_received = A0, curr_addr_internal_mem = curr_addr_internal_mem_A0, curr_addr_axi = curr_addr_axi_A0]
                 (
                 ARESET = m0_ARESET,
                 RVALID = m0_RVALID,
@@ -965,7 +963,7 @@ component main(
 
 
               seq{ //B0 reads
-                invoke B0_arread_channel[base_addr = base_addr_B0]
+                invoke B0_arread_channel[curr_addr_axi = curr_addr_axi_B0]
                 (
                 ARESET = m1_ARESET,
                 ARREADY = m1_ARREADY
@@ -978,9 +976,9 @@ component main(
                 ARBURST = m1_ARBURST
                 );
 
-                //invoke curr_addr_B0(in = base_addr_B0.out)(); //set curr_addr to base_address
+                //invoke curr_addr_internal_mem_B0(in = curr_addr_axi_B0.out)(); //set curr_addr_internal_mem to curr_addr_axiess
 
-                invoke B0_read_channel[data_received = B0, curr_addr = curr_addr_B0, base_addr = base_addr_B0]
+                invoke B0_read_channel[data_received = B0, curr_addr_internal_mem = curr_addr_internal_mem_B0, curr_addr_axi = curr_addr_axi_B0]
                 (
                 ARESET = m1_ARESET,
                 RVALID = m1_RVALID,
@@ -993,7 +991,7 @@ component main(
                 );
               }
               seq{ //Sum0 reads
-                invoke Sum0_arread_channel[base_addr = base_addr_Sum0]
+                invoke Sum0_arread_channel[curr_addr_axi = curr_addr_axi_Sum0]
                 (
                 ARESET = m2_ARESET,
                 ARREADY = m2_ARREADY
@@ -1006,9 +1004,9 @@ component main(
                 ARBURST = m2_ARBURST
                 );
 
-                //invoke curr_addr_Sum0(in = base_addr_Sum0.out)(); //set curr_addr to base_address
+                //invoke curr_addr_internal_mem_Sum0(in = curr_addr_axi_Sum0.out)(); //set curr_addr_internal_mem to curr_addr_axiess
 
-                invoke Sum0_read_channel[data_received = Sum0, curr_addr = curr_addr_Sum0, base_addr = base_addr_Sum0]
+                invoke Sum0_read_channel[data_received = Sum0, curr_addr_internal_mem = curr_addr_internal_mem_Sum0, curr_addr_axi = curr_addr_axi_Sum0]
                 (
                 ARESET = m2_ARESET,
                 RVALID = m2_RVALID,
@@ -1028,16 +1026,16 @@ component main(
             invoke vec_add_cell[A0 = A0, B0 = B0, Sum0 = Sum0]()();
             //end compute stuff
 
-            //reset base_addr registers before writing
+            //reset curr_addr_axi registers before writing
             par{
-                invoke base_addr_A0(in = 64'x1000)();
-                invoke base_addr_B0(in = 64'x1000)();
-                invoke base_addr_Sum0(in = 64'x1000)();
+                invoke curr_addr_axi_A0(in = 64'x1000)();
+                invoke curr_addr_axi_B0(in = 64'x1000)();
+                invoke curr_addr_axi_Sum0(in = 64'x1000)();
             }
             //write stuff
             par {
                 seq { //A0 writes
-                    invoke A0_awwrite_channel[base_addr = base_addr_A0, max_trnsfrs = max_trnsfrs]
+                    invoke A0_awwrite_channel[curr_addr_axi = curr_addr_axi_A0, max_trnsfrs = max_trnsfrs]
                     (
                     ARESET = m0_ARESET,
                     AWREADY = m0_AWREADY
@@ -1051,9 +1049,9 @@ component main(
                     AWPROT = m0_AWPROT
                     );
 
-                    //invoke curr_addr_A0(in = base_addr_A0.out)(); //set curr_addr to base_address
+                    //invoke curr_addr_internal_mem_A0(in = curr_addr_axi_A0.out)(); //set curr_addr_internal_mem to curr_addr_axiess
 
-                    invoke A0_write_channel[internal_mem = A0, curr_addr = curr_addr_A0, max_trnsfrs = max_trnsfrs, base_addr = base_addr_A0]
+                    invoke A0_write_channel[internal_mem = A0, curr_addr_internal_mem = curr_addr_internal_mem_A0, max_trnsfrs = max_trnsfrs, curr_addr_axi = curr_addr_axi_A0]
                     (
                     ARESET = m0_ARESET,
                     WREADY = m0_WREADY
@@ -1067,7 +1065,7 @@ component main(
                     invoke A0_bresp_channel(BVALID = m0_BVALID)(BREADY = m0_BREADY);
                 }
                 seq { //B0 writes
-                    invoke B0_awwrite_channel[base_addr = base_addr_B0, max_trnsfrs = max_trnsfrs]
+                    invoke B0_awwrite_channel[curr_addr_axi = curr_addr_axi_B0, max_trnsfrs = max_trnsfrs]
                     (
                     ARESET = m1_ARESET,
                     AWREADY = m1_AWREADY
@@ -1081,9 +1079,9 @@ component main(
                     AWPROT = m1_AWPROT
                     );
 
-                    //invoke curr_addr_B0(in = base_addr_B0.out)(); //set curr_addr to base_address
+                    //invoke curr_addr_internal_mem_B0(in = curr_addr_axi_B0.out)(); //set curr_addr_internal_mem to curr_addr_axiess
 
-                    invoke B0_write_channel[internal_mem = B0, curr_addr = curr_addr_B0, max_trnsfrs = max_trnsfrs, base_addr = base_addr_B0]
+                    invoke B0_write_channel[internal_mem = B0, curr_addr_internal_mem = curr_addr_internal_mem_B0, max_trnsfrs = max_trnsfrs, curr_addr_axi = curr_addr_axi_B0]
                     (
                     ARESET = m1_ARESET,
                     WREADY = m1_WREADY
@@ -1098,7 +1096,7 @@ component main(
                 }
 
                 seq { //Sum0 writes
-                    invoke Sum0_awwrite_channel[base_addr = base_addr_Sum0, max_trnsfrs = max_trnsfrs]
+                    invoke Sum0_awwrite_channel[curr_addr_axi = curr_addr_axi_Sum0, max_trnsfrs = max_trnsfrs]
                     (
                     ARESET = m2_ARESET,
                     AWREADY = m2_AWREADY
@@ -1112,9 +1110,9 @@ component main(
                     AWPROT = m2_AWPROT
                     );
 
-                    //invoke curr_addr_Sum0(in = base_addr_Sum0.out)(); //set curr_addr to base_address
+                    //invoke curr_addr_internal_mem_Sum0(in = curr_addr_axi_Sum0.out)(); //set curr_addr_internal_mem to curr_addr_axiess
 
-                    invoke Sum0_write_channel[internal_mem = Sum0, curr_addr = curr_addr_Sum0, max_trnsfrs = max_trnsfrs, base_addr = base_addr_Sum0]
+                    invoke Sum0_write_channel[internal_mem = Sum0, curr_addr_internal_mem = curr_addr_internal_mem_Sum0, max_trnsfrs = max_trnsfrs, curr_addr_axi = curr_addr_axi_Sum0]
                     (
                     ARESET = m2_ARESET,
                     WREADY = m2_WREADY
diff --git a/yxi/axi-calyx/axi-generator.py b/yxi/axi-calyx/axi-generator.py
index 89e87df330..21e61eade1 100644
--- a/yxi/axi-calyx/axi-generator.py
+++ b/yxi/axi-calyx/axi-generator.py
@@ -88,8 +88,8 @@ def _add_m_to_s_address_channel(prog, mem, prefix: Literal["AW", "AR"]):
 
     # Cells
     xvalid = m_to_s_address_channel.reg(f"{lc_x}valid", 1)
-    xvalid_was_high = m_to_s_address_channel.reg(f"{lc_x}valid_was_high", 1)
-    base_addr = m_to_s_address_channel.reg("base_addr", 64, is_ref=True)
+    xhandshake_occurred = m_to_s_address_channel.reg(f"{lc_x}_handshake_occurred", 1)
+    curr_addr_axi = m_to_s_address_channel.reg("curr_addr_axi", 64, is_ref=True)
     xlen = m_to_s_address_channel.reg(f"{lc_x}len", 8)
 
     # Number of txns we want to occur before m_arread_channel is done
@@ -111,19 +111,16 @@ def _add_m_to_s_address_channel(prog, mem, prefix: Literal["AW", "AR"]):
     # See #1828 https://github.com/calyxir/calyx/issues/1828
     with m_to_s_address_channel.group(f"do_{lc_x}_transfer") as do_x_transfer:
         xREADY = m_to_s_address_channel.this()[f"{x}READY"]
-        # TODO: Can we simplify this?
-        # See comments #1846 https://github.com/calyxir/calyx/pull/1846
-        # Assert arvalid if it was not previously high
-        xvalid.in_ = ~xvalid_was_high.out @ 1
+        xvalid.in_ = (~(xvalid.out & xREADY) & ~xhandshake_occurred.out) @ 1
         # Deassert in the next cycle once it is high
-        xvalid.in_ = (xvalid.out & xREADY & xvalid_was_high.out) @ 0
+        xvalid.in_ = ((xvalid.out & xREADY) | xhandshake_occurred.out) @ 0
         xvalid.write_en = 1
 
-        xvalid_was_high.in_ = (~(xvalid.out & xREADY) & ~xvalid_was_high.out) @ 1
-        xvalid_was_high.write_en = (~(xvalid.out & xREADY) & ~xvalid_was_high.out) @ 1
+        xhandshake_occurred.in_ = (xvalid.out & xREADY) @ 1
+        xhandshake_occurred.write_en =  (~xhandshake_occurred.out) @ 1
 
         # Drive output signals for transfer
-        m_to_s_address_channel.this()[f"{x}ADDR"] = base_addr.out
+        m_to_s_address_channel.this()[f"{x}ADDR"] = curr_addr_axi.out
         # This is taken from mem size, we assume the databus width is the size
         # of our memory cell and that width is a power of 2
         # TODO(nathanielnrn): convert to binary instead of decimal
@@ -163,7 +160,7 @@ def _add_m_to_s_address_channel(prog, mem, prefix: Literal["AW", "AR"]):
     while_body = [
         par(
             invoke(bt_reg, in_in=0),
-            invoke(xvalid_was_high, in_in=0),
+            invoke(xhandshake_occurred, in_in=0),
         ),
         do_x_transfer,
         invoke(xvalid, in_in=0),
@@ -206,8 +203,8 @@ def add_read_channel(prog, mem):
 
     # according to zipcpu, rready should be registered
     rready = read_channel.reg("rready", 1)
-    curr_addr = read_channel.reg("curr_addr", clog2(mem["size"]), is_ref=True)
-    base_addr = read_channel.reg("base_addr", 64, is_ref=True)
+    curr_addr_internal_mem = read_channel.reg("curr_addr_internal_mem", clog2(mem["size"]), is_ref=True)
+    curr_addr_axi = read_channel.reg("curr_addr_axi", 64, is_ref=True)
     # Registed because RLAST is high with laster transfer, not after
     # before this we were terminating immediately with
     # last transfer and not servicing it
@@ -266,18 +263,18 @@ def add_read_channel(prog, mem):
         rready.write_en = 1
 
         # write data we received to mem_ref
-        mem_ref.addr0 = curr_addr.out
+        mem_ref.addr0 = curr_addr_internal_mem.out
         mem_ref.write_data = read_data_reg.out
         mem_ref.write_en = 1
         service_read_transfer.done = mem_ref.done
 
-    # creates group that increments curr_addr by 1. Creates adder and wires up correctly
-    curr_addr_incr = read_channel.incr(curr_addr, 1)
-    # TODO(nathanielnrn): Currently we assume that width is a power of 2.
+    # creates group that increments curr_addr_internal_mem by 1. Creates adder and wires up correctly
+    curr_addr_internal_mem_incr = read_channel.incr(curr_addr_internal_mem, 1)
+    # TODO(nathanielnrn): Currently we assume that width is a power of 2 due to xSIZE.
     # In the future we should allow for non-power of 2 widths, will need some
     # splicing for this.
     # See https://cucapra.slack.com/archives/C05TRBNKY93/p1705587169286609?thread_ts=1705524171.974079&cid=C05TRBNKY93 # noqa: E501
-    base_addr_incr = read_channel.incr(base_addr, ceil(mem["width"] / 8))
+    curr_addr_axi_incr = read_channel.incr(curr_addr_axi, width_in_bytes(mem["width"]))
 
     # Control
     invoke_n_RLAST = invoke(n_RLAST, in_in=1)
@@ -286,7 +283,7 @@ def add_read_channel(prog, mem):
         invoke_bt_reg,
         block_transfer,
         service_read_transfer,
-        par(curr_addr_incr, base_addr_incr),
+        par(curr_addr_internal_mem_incr, curr_addr_axi_incr),
     ]
     while_n_RLAST = while_(n_RLAST.out, while_body)
 
@@ -296,8 +293,6 @@ def add_read_channel(prog, mem):
 def add_write_channel(prog, mem):
     # Inputs/Outputs
     write_channel = prog.component("m_write_channel")
-    # We assume idx_size is exactly clog2(len). See comment in #1751
-    # https://github.com/calyxir/calyx/issues/1751#issuecomment-1778360566
     channel_inputs = [("ARESETn", 1), ("WREADY", 1)]
     # TODO(nathanielnrn): We currently assume WDATA is the same width as the
     # memory. This limits throughput many AXI data busses are much wider
@@ -323,11 +318,11 @@ def add_write_channel(prog, mem):
 
     # according to zipcpu, rready should be registered
     wvalid = write_channel.reg("wvalid", 1)
-    wvalid_was_high = write_channel.reg("wvalid_was_high", 1)
+    w_handshake_occurred = write_channel.reg("w_handshake_occurred", 1)
     # internal calyx memory indexing
-    curr_addr = write_channel.reg("curr_addr", clog2(mem["size"]), is_ref=True)
+    curr_addr_internal_mem = write_channel.reg("curr_addr_internal_mem", clog2(mem["size"]), is_ref=True)
     # host indexing, must be 64 bits
-    base_addr = write_channel.reg("base_addr", 64, is_ref=True)
+    curr_addr_axi = write_channel.reg("curr_addr_axi", 64, is_ref=True)
 
     curr_trsnfr_count = write_channel.reg("curr_trsnfr_count", 8)
     # Number of transfers we want to do in current txn
@@ -346,19 +341,19 @@ def add_write_channel(prog, mem):
     with write_channel.group("service_write_transfer") as service_write_transfer:
         WREADY = write_channel.this()["WREADY"]
 
-        # Assert then deassert. Can maybe getgit right of wvalid_was_high in guard
-        wvalid.in_ = ~(wvalid.out & WREADY & wvalid_was_high.out) @ 1
-        wvalid.in_ = (wvalid.out & WREADY & wvalid_was_high.out) @ 0
+        # Assert then deassert. Can maybe getgit right of w_handshake_occurred in guard
+        wvalid.in_ = (~(wvalid.out & WREADY) & ~w_handshake_occurred.out) @ 1
+        wvalid.in_ = ((wvalid.out & WREADY) | w_handshake_occurred.out) @ 0
         wvalid.write_en = 1
 
         # Set high when wvalid is high even once
         # This is just wavlid.in_ guard from above
         # TODO: confirm this is correct?
-        wvalid_was_high.in_ = ~(wvalid.out & WREADY & wvalid_was_high.out) @ 1
-        wvalid_was_high.write_en = ~(wvalid.out & WREADY & wvalid_was_high.out) @ 1
+        w_handshake_occurred.in_ = (wvalid.out & WREADY) @ 1
+        w_handshake_occurred.write_en = (~w_handshake_occurred.out) @ 1
 
         # Set data output based on intermal memory output
-        mem_ref.addr0 = curr_addr.out
+        mem_ref.addr0 = curr_addr_internal_mem.out
         mem_ref.read_en = 1
         write_channel.this()["WDATA"] = mem_ref.read_data
 
@@ -383,29 +378,28 @@ def add_write_channel(prog, mem):
         bt_reg.write_en = 1
         service_write_transfer.done = bt_reg.out
 
-        # creates group that increments curr_addr by 1.
         # Creates adder and wires up correctly
-        curr_addr_incr = write_channel.incr(curr_addr, 1)
+        curr_addr_internal_mem_incr = write_channel.incr(curr_addr_internal_mem, 1)
         # TODO(nathanielnrn): Currently we assume that width is a power of 2.
         # In the future we should allow for non-power of 2 widths, will need some
         # splicing for this.
         # See https://cucapra.slack.com/archives/C05TRBNKY93/p1705587169286609?thread_ts=1705524171.974079&cid=C05TRBNKY93 # noqa: E501
-        base_addr_incr = write_channel.incr(base_addr, ceil(mem["width"] / 8))
+        curr_addr_axi_incr = write_channel.incr(curr_addr_axi, ceil(mem["width"] / 8))
         curr_trsnfr_count_incr = write_channel.incr(curr_trsnfr_count, 1)
 
         # Control
-        init_curr_addr = invoke(curr_addr, in_in=0)
+        init_curr_addr_internal_mem = invoke(curr_addr_internal_mem, in_in=0)
         init_n_finished_last_trnsfr = invoke(n_finished_last_trnsfr, in_in=1)
         while_n_finished_last_trnsfr_body = [
             invoke(bt_reg, in_in=0),
             service_write_transfer,
-            par(curr_addr_incr, curr_trsnfr_count_incr, base_addr_incr),
+            par(curr_addr_internal_mem_incr, curr_trsnfr_count_incr, curr_addr_axi_incr, invoke(w_handshake_occurred, in_in=0)),
         ]
         while_n_finished_last_trnsfr = while_(
             n_finished_last_trnsfr.out, while_n_finished_last_trnsfr_body
         )
         write_channel.control += [
-            init_curr_addr,
+            init_curr_addr_internal_mem,
             init_n_finished_last_trnsfr,
             while_n_finished_last_trnsfr,
         ]
@@ -469,13 +463,20 @@ def clog2(x):
 
 def build():
     prog = Builder()
-    # add_arread_channel(prog, mems[0])
-    # add_awwrite_channel(prog, mems[0])
-    # add_read_channel(prog, mems[0])
-    # add_write_channel(prog, mems[0])
+    check_mems_welformed(mems)
+    add_arread_channel(prog, mems[0])
+    add_awwrite_channel(prog, mems[0])
+    add_read_channel(prog, mems[0])
+    add_write_channel(prog, mems[0])
     add_bresp_channel(prog, mems[0])
     return prog.program
 
+def check_mems_welformed(mems):
+    """Checks if memories from yxi are well formed. Returns true if they are, false otherwise."""
+    for mem in mems:
+        assert mem["width"] % 8 == 0, "Width must be a multiple of 8 to alow byte addressing to host"
+        assert log2(mem["width"]).is_integer(), "Width must be a power of 2 to be correctly described by xSIZE"
+        assert mem["size"] > 0, "Memory size must be greater than 0"
 
 if __name__ == "__main__":
     build().emit()