From 0ed827f24517948b184d7f67200e0f5b0fb984c2 Mon Sep 17 00:00:00 2001 From: Ayaka Yorihiro <36107281+ayakayorihiro@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:13:10 -0400 Subject: [PATCH] [Profiling] Multi-component programs (#2269) This PR deals with profiling multi-component programs. It contains: - Modifications to TDCC to produce a single json after _processing the whole program_ (rather than outputting JSON after each component) - A `component-cells` tool that produces information about the user-defined component cells (so, no primitives and no constant cells) in each component. - Modifications to existing profiling scripts to take multi-component programs into account. I also cleaned the scripts up a bit and left more documentation in the comments. The component-cells backend is necessary because the instantiation of a component is used in the VCD, but we need to know the name of the original component to fetch the groups that could execute in that cell. i.e., in [`multi-component.futil`](https://github.com/calyxir/calyx/blob/main/examples/futil/multi-component.futil), we have `id`, which is an instantiation of the `identity()` component. The `go` signal for the `save` group would subsequently be `TOP.TOP.main.id.save_go...`. So, we need some way of relating that there is a `id` cell that instantiates `identity` within the `main` component. Any and all feedback would be appreciated :) Additionally, if anyone has better names for `component-cells` (or the labels in the JSON, in the Usage section below), it would be very helpful! (Tagging @ekiwi here since he wasn't in the list of suggested reviewers) We are now in a nice place where we can start profiling (non-optimized) Calyx programs! My next step is to try the profiler on some "real-life" programs (maybe the benchmarks on the performance dashboard). ## Usage for `component-cells` tool The tool lives in `tools/component_cells/`, so to run: ex) ``` cargo run --manifest-path tools/component_cells/Cargo.toml examples/futil/multi-component.futil ``` will produce ``` [ { "component": "main", "is_main_component": true, "cell_info": [ { "cell_name": "id", "component_name": "identity" } ] }, { "component": "identity", "is_main_component": false, "cell_info": [] } ] ``` indicating that (1) `main` is the main/entry point component, (2) `main` contains a cell named `id` that instantiates the `identity` component, (3) the `identity` component doesn't have any (non-primitive, non-constant) cells. --- Cargo.lock | 26 +++ Cargo.toml | 4 +- .../src/passes/top_down_compile_control.rs | 36 +-- tools/component_cells/Cargo.toml | 28 +++ tools/component_cells/src/main.rs | 111 ++++++++++ tools/component_groups/Cargo.toml | 28 +++ tools/component_groups/src/main.rs | 100 +++++++++ tools/profiler/get-profile-counts-info.sh | 21 +- tools/profiler/parse-vcd.py | 207 ++++++++++++------ 9 files changed, 478 insertions(+), 83 deletions(-) create mode 100644 tools/component_cells/Cargo.toml create mode 100644 tools/component_cells/src/main.rs create mode 100644 tools/component_groups/Cargo.toml create mode 100644 tools/component_groups/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 4ecd484b95..6f58dd2a51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -716,6 +716,32 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "component_cells" +version = "0.7.1" +dependencies = [ + "argh", + "calyx-frontend", + "calyx-ir", + "calyx-opt", + "calyx-utils", + "serde", + "serde_json", +] + +[[package]] +name = "component_groups" +version = "0.7.1" +dependencies = [ + "argh", + "calyx-frontend", + "calyx-ir", + "calyx-opt", + "calyx-utils", + "serde", + "serde_json", +] + [[package]] name = "console" version = "0.15.8" diff --git a/Cargo.toml b/Cargo.toml index 94ca7bf447..aa8b637a4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,8 +16,10 @@ members = [ "fud2/fud-core", "tools/data-conversion", "tools/btor2/btor2i", - "tools/cider-data-converter", "tools/calyx-pass-explorer", + "tools/cider-data-converter", + "tools/component_cells", + "tools/component_groups", "tools/yxi", "tools/calyx-writer", ] diff --git a/calyx-opt/src/passes/top_down_compile_control.rs b/calyx-opt/src/passes/top_down_compile_control.rs index 927bb188dd..de1634da6e 100644 --- a/calyx-opt/src/passes/top_down_compile_control.rs +++ b/calyx-opt/src/passes/top_down_compile_control.rs @@ -1215,23 +1215,17 @@ impl Named for TopDownCompileControl { } /// Helper function to emit profiling information when the control consists of a single group. -fn emit_single_enable( +fn extract_single_enable( con: &mut ir::Control, component: Id, - json_out_file: &OutputFile, -) { +) -> Option { if let ir::Control::Enable(enable) = con { - let mut profiling_info_set: HashSet = HashSet::new(); - profiling_info_set.insert(ProfilingInfo::SingleEnable( - SingleEnableInfo { - component, - group: enable.group.borrow().name(), - }, - )); - let _ = serde_json::to_writer_pretty( - json_out_file.get_write(), - &profiling_info_set, - ); + return Some(SingleEnableInfo { + component, + group: enable.group.borrow().name(), + }); + } else { + None } } @@ -1244,8 +1238,11 @@ impl Visitor for TopDownCompileControl { ) -> VisResult { let mut con = comp.control.borrow_mut(); if matches!(*con, ir::Control::Empty(..) | ir::Control::Enable(..)) { - if let Some(json_out_file) = &self.dump_fsm_json { - emit_single_enable(&mut con, comp.name, json_out_file); + if let Some(enable_info) = + extract_single_enable(&mut con, comp.name) + { + self.fsm_groups + .insert(ProfilingInfo::SingleEnable(enable_info)); } return Ok(Action::Stop); } @@ -1459,12 +1456,17 @@ impl Visitor for TopDownCompileControl { let comp_group = sch.realize_schedule(self.dump_fsm, &mut self.fsm_groups, fsm_impl); + Ok(Action::change(ir::Control::enable(comp_group))) + } + + /// If requested, emit FSM json after all components are processed + fn finish_context(&mut self, _ctx: &mut calyx_ir::Context) -> VisResult { if let Some(json_out_file) = &self.dump_fsm_json { let _ = serde_json::to_writer_pretty( json_out_file.get_write(), &self.fsm_groups, ); } - Ok(Action::change(ir::Control::enable(comp_group))) + Ok(Action::Continue) } } diff --git a/tools/component_cells/Cargo.toml b/tools/component_cells/Cargo.toml new file mode 100644 index 0000000000..1a91750f19 --- /dev/null +++ b/tools/component_cells/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "component_cells" +authors.workspace = true +license-file.workspace = true +keywords.workspace = true +repository.workspace = true +readme.workspace = true +description.workspace = true +categories.workspace = true +homepage.workspace = true +edition.workspace = true +version.workspace = true +rust-version.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde.workspace = true +argh.workspace = true +serde_json = "1.0.79" + +calyx-utils = { path = "../../calyx-utils" } +calyx-frontend = { path = "../../calyx-frontend" } +calyx-opt = { path = "../../calyx-opt" } + +[dependencies.calyx-ir] +path = "../../calyx-ir" +features = ["serialize"] diff --git a/tools/component_cells/src/main.rs b/tools/component_cells/src/main.rs new file mode 100644 index 0000000000..85c8f0eaec --- /dev/null +++ b/tools/component_cells/src/main.rs @@ -0,0 +1,111 @@ +use argh::FromArgs; +use calyx_frontend as frontend; +use calyx_ir::{self as ir, Id}; +use calyx_utils::{CalyxResult, OutputFile}; +use serde::Serialize; +use std::path::{Path, PathBuf}; +use std::{collections::HashSet, io}; + +#[derive(FromArgs)] +/// Path for library and path for file to read from +struct Args { + /// file path to read data from + #[argh(positional, from_str_fn(read_path))] + file_path: Option, + + /// library path + #[argh(option, short = 'l', default = "Path::new(\".\").into()")] + pub lib_path: PathBuf, + + /// output file + #[argh(option, short = 'o', default = "OutputFile::Stdout")] + pub output: OutputFile, +} + +fn read_path(path: &str) -> Result { + Ok(Path::new(path).into()) +} + +#[derive(Default)] +pub struct ComponentCellsBackend; + +fn main() -> CalyxResult<()> { + let p: Args = argh::from_env(); + + let ws = frontend::Workspace::construct(&p.file_path, &p.lib_path)?; + + let ctx: ir::Context = ir::from_ast::ast_to_ir(ws)?; + + let main_comp = ctx.entrypoint(); + + let mut component_info: HashSet = HashSet::new(); + gen_component_info(&ctx, main_comp, true, &mut component_info); + write_json(component_info.clone(), p.output)?; + Ok(()) +} + +fn id_serialize_passthrough(id: &Id, ser: S) -> Result +where + S: serde::Serializer, +{ + id.to_string().serialize(ser) +} + +#[derive(PartialEq, Eq, Hash, Clone, Serialize)] +struct ComponentInfo { + #[serde(serialize_with = "id_serialize_passthrough")] + pub component: Id, + pub is_main_component: bool, + pub cell_info: Vec, +} + +#[derive(PartialEq, Eq, Hash, Clone, Serialize)] +struct ComponentCellInfo { + #[serde(serialize_with = "id_serialize_passthrough")] + pub cell_name: Id, + #[serde(serialize_with = "id_serialize_passthrough")] + pub component_name: Id, +} + +/// Accumulates a set of components to the cells that they contain +/// in the program with entrypoint `main_comp`. The contained cells +/// are denoted with the name of the cell and the name of the component +/// the cell is associated with. +fn gen_component_info( + ctx: &ir::Context, + comp: &ir::Component, + is_main_comp: bool, + component_info: &mut HashSet, +) { + let mut curr_comp_info = ComponentInfo { + component: comp.name, + is_main_component: is_main_comp, + cell_info: Vec::new(), + }; + for cell in comp.cells.iter() { + let cell_ref = cell.borrow(); + if let ir::CellType::Component { name } = cell_ref.prototype { + curr_comp_info.cell_info.push(ComponentCellInfo { + cell_name: cell_ref.name(), + component_name: name, + }); + let component = ctx + .components + .iter() + .find(|comp| comp.name == name) + .unwrap(); + gen_component_info(ctx, component, false, component_info); + } + } + component_info.insert(curr_comp_info); +} + +/// Write the collected set of component information to a JSON file. +fn write_json( + component_info: HashSet, + file: OutputFile, +) -> Result<(), io::Error> { + let created_vec: Vec = component_info.into_iter().collect(); + serde_json::to_writer_pretty(file.get_write(), &created_vec)?; + Ok(()) +} diff --git a/tools/component_groups/Cargo.toml b/tools/component_groups/Cargo.toml new file mode 100644 index 0000000000..cbc2c2abb3 --- /dev/null +++ b/tools/component_groups/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "component_groups" +authors.workspace = true +license-file.workspace = true +keywords.workspace = true +repository.workspace = true +readme.workspace = true +description.workspace = true +categories.workspace = true +homepage.workspace = true +edition.workspace = true +version.workspace = true +rust-version.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde.workspace = true +argh.workspace = true +serde_json = "1.0.79" + +calyx-utils = { path = "../../calyx-utils" } +calyx-frontend = { path = "../../calyx-frontend" } +calyx-opt = { path = "../../calyx-opt" } + +[dependencies.calyx-ir] +path = "../../calyx-ir" +features = ["serialize"] diff --git a/tools/component_groups/src/main.rs b/tools/component_groups/src/main.rs new file mode 100644 index 0000000000..79f9c33105 --- /dev/null +++ b/tools/component_groups/src/main.rs @@ -0,0 +1,100 @@ +use argh::FromArgs; +use calyx_frontend as frontend; +use calyx_ir::{self as ir, Id}; +use calyx_utils::{CalyxResult, OutputFile}; +use serde::Serialize; +use std::path::{Path, PathBuf}; +use std::{collections::HashSet, io}; + +#[derive(FromArgs)] +/// Path for library and path for file to read from +struct Args { + /// file path to read data from + #[argh(positional, from_str_fn(read_path))] + file_path: Option, + + /// library path + #[argh(option, short = 'l', default = "Path::new(\".\").into()")] + pub lib_path: PathBuf, + + /// output file + #[argh(option, short = 'o', default = "OutputFile::Stdout")] + pub output: OutputFile, +} + +fn read_path(path: &str) -> Result { + Ok(Path::new(path).into()) +} + +#[derive(Default)] +pub struct ComponentCellsBackend; + +fn main() -> CalyxResult<()> { + let p: Args = argh::from_env(); + + let ws = frontend::Workspace::construct(&p.file_path, &p.lib_path)?; + + let ctx: ir::Context = ir::from_ast::ast_to_ir(ws)?; + + let main_comp = ctx.entrypoint(); + + let mut component_info: HashSet = HashSet::new(); + gen_component_info(&ctx, main_comp, &mut component_info); + write_json(component_info.clone(), p.output)?; + Ok(()) +} + +fn id_serialize_passthrough(id: &Id, ser: S) -> Result +where + S: serde::Serializer, +{ + id.to_string().serialize(ser) +} + +#[derive(PartialEq, Eq, Hash, Clone, Serialize)] +struct ComponentGroupInfo { + #[serde(serialize_with = "id_serialize_passthrough")] + pub component: Id, + pub groups: Vec, +} + +/// Accumulates a set of components to the cells that they contain +/// in the program with entrypoint `main_comp`. The contained cells +/// are denoted with the name of the cell and the name of the component +/// the cell is associated with. +fn gen_component_info( + ctx: &ir::Context, + comp: &ir::Component, + component_info: &mut HashSet, +) { + let mut curr_comp_info = ComponentGroupInfo { + component: comp.name, + groups: Vec::new(), + }; + for group_wrapped in comp.get_groups() { + curr_comp_info.groups.push(group_wrapped.borrow().name()); + } + for cell in comp.cells.iter() { + let cell_ref = cell.borrow(); + if let ir::CellType::Component { name } = cell_ref.prototype { + let component = ctx + .components + .iter() + .find(|comp| comp.name == name) + .unwrap(); + gen_component_info(ctx, component, component_info); + } + } + component_info.insert(curr_comp_info); +} + +/// Write the collected set of component information to a JSON file. +fn write_json( + component_info: HashSet, + file: OutputFile, +) -> Result<(), io::Error> { + let created_vec: Vec = + component_info.into_iter().collect(); + serde_json::to_writer_pretty(file.get_write(), &created_vec)?; + Ok(()) +} diff --git a/tools/profiler/get-profile-counts-info.sh b/tools/profiler/get-profile-counts-info.sh index 5ab197207c..466ba08df7 100644 --- a/tools/profiler/get-profile-counts-info.sh +++ b/tools/profiler/get-profile-counts-info.sh @@ -1,7 +1,7 @@ # Wrapper script for running TDCC, running simulation, and obtaining cycle counts information -if [ $# -ne 2 ]; then - echo "USAGE: bash $0 INPUT_FILE SIM_DATA_JSON" +if [ $# -lt 2 ]; then + echo "USAGE: bash $0 INPUT_FILE SIM_DATA_JSON [OUT_CSV]" exit fi @@ -11,14 +11,19 @@ CALYX_DIR=$( dirname $( dirname ${SCRIPT_DIR} ) ) TMP_DIR=${SCRIPT_DIR}/tmp TMP_VERILOG=${TMP_DIR}/no-opt-verilog.sv FSM_JSON=${TMP_DIR}/fsm.json +CELLS_JSON=${TMP_DIR}/cells.json +OUT_CSV=${TMP_DIR}/summary.csv VCD_FILE=${TMP_DIR}/trace-info.vcd LOGS_DIR=${SCRIPT_DIR}/logs mkdir -p ${TMP_DIR} ${LOGS_DIR} -rm -f ${TMP_VERILOG} ${FSM_JSON} +rm -f ${TMP_DIR}/* ${LOGS_DIR}/* # remove data from last run INPUT_FILE=$1 SIM_DATA_JSON=$2 +if [ $# -eq 3 ]; then + OUT_CSV=$3 +fi # Run TDCC to get the FSM info echo "[${SCRIPT_NAME}] Obtaining FSM info from TDCC" @@ -29,6 +34,14 @@ echo "[${SCRIPT_NAME}] Obtaining FSM info from TDCC" set +o xtrace ) &> ${LOGS_DIR}/gol-tdcc +# Run component-cells backend to get cell information +echo "[${SCRIPT_NAME}] Obtaining cell information from component-cells backend" +( + cd ${CALYX_DIR} + set -o xtrace + cargo run --manifest-path tools/component_cells/Cargo.toml ${INPUT_FILE} -o ${CELLS_JSON} +) &> ${LOGS_DIR}/gol-cells + # Run simuation to get VCD echo "[${SCRIPT_NAME}] Obtaining VCD file via simulation" ( @@ -40,5 +53,5 @@ echo "[${SCRIPT_NAME}] Obtaining VCD file via simulation" # Run script to get cycle level counts echo "[${SCRIPT_NAME}] Using FSM info and VCD file to obtain cycle level counts" ( - python3 ${SCRIPT_DIR}/parse-vcd.py ${VCD_FILE} ${FSM_JSON} + python3 ${SCRIPT_DIR}/parse-vcd.py ${VCD_FILE} ${FSM_JSON} ${CELLS_JSON} ${OUT_CSV} ) # &> ${LOGS_DIR}/gol-process diff --git a/tools/profiler/parse-vcd.py b/tools/profiler/parse-vcd.py index 449495e2ff..6c4361b11d 100644 --- a/tools/profiler/parse-vcd.py +++ b/tools/profiler/parse-vcd.py @@ -1,3 +1,4 @@ +import csv import sys import json import vcdvcd @@ -17,31 +18,45 @@ def __init__(self, name, fsm_name=None, fsm_values=None, tdcc_group_name=None): self.tdcc_group = tdcc_group_name def __repr__ (self): + segments_str = "" + for segment in self.closed_segments: + if (segments_str != ""): + segments_str += ", " + segments_str += f"[{segment['start']}, {segment['end']})" return (f"Group {self.name}:\n" + f"\tFSM name: {self.fsm_name}\n" + f"\tFSM state ids: {self.fsm_values}\n" + - f"\tTotal cycles: {self.total_cycles}\n" + - f"\tSegments: {self.closed_segments}\n" + f"\tTotal cycles: {self.total_cycles}\n" + + f"\t# of times active: {len(self.closed_segments)}\n" + + f"\tSegments: {segments_str}\n" ) - + def is_active(self): return self.current_segment is not None - + def start_clock_cycle(self): if self.current_segment is None: return -1 else: return self.current_segment["start"] - - def summary(self): + + def compute_average_cycles(self): if len(self.closed_segments) == 0: - average_cycles = 0 + return 0 else: - average_cycles = self.total_cycles / len(self.closed_segments) + return round(self.total_cycles / len(self.closed_segments), 2) + + def emit_csv_data(self): + return {"name": self.name, + "total-cycles" : self.total_cycles, + "times-active" : len(self.closed_segments), + "avg" : self.compute_average_cycles()} + + def summary(self): return (f"Group {self.name} Summary:\n" + f"\tTotal cycles: {self.total_cycles}\n" + f"\t# of times active: {len(self.closed_segments)}\n" + - f"\tAvg runtime: {average_cycles}\n" + f"\tAvg runtime: {self.compute_average_cycles()}\n" ) def start_new_segment(self, curr_clock_cycle): @@ -61,12 +76,18 @@ def end_current_segment(self, curr_clock_cycle): class VCDConverter(vcdvcd.StreamParserCallbacks): - def __init__(self, fsms, single_enable_names, tdcc_group_names, groups_to_fsms): + def __init__(self, fsms, single_enable_names, tdcc_group_names, fsm_group_maps, cells_to_components, main_component): super().__init__() + self.main_component = main_component self.fsms = fsms self.single_enable_names = single_enable_names - self.tdcc_group_to_values = {tdcc_group_name : [] for tdcc_group_name in tdcc_group_names} + self.cells_to_components = cells_to_components + # Recording the first cycle when the TDCC group became active + # FIXME: remove after fixing enddefinitions + self.tdcc_group_active_cycle = {tdcc_group_name : -1 for tdcc_group_name in tdcc_group_names} self.tdcc_group_to_go_id = {tdcc_group_name : None for tdcc_group_name in tdcc_group_names} + # self.tdcc_group_active_cycle = {} # filled in enddefinitions + # self.tdcc_group_to_go_id = {} # filled in enddefinitions self.profiling_info = {} self.signal_to_signal_id = {fsm : None for fsm in fsms} self.signal_to_curr_value = {fsm : 0 for fsm in fsms} @@ -75,23 +96,21 @@ def __init__(self, fsms, single_enable_names, tdcc_group_names, groups_to_fsms): self.main_go_on_time = None self.clock_id = None self.clock_cycle_acc = -1 # The 0th clock cycle will be 0. - for group in groups_to_fsms: - self.profiling_info[group] = ProfilingInfo(group, groups_to_fsms[group]["fsm"], groups_to_fsms[group]["ids"], groups_to_fsms[group]["tdcc-group-name"]) + for group in fsm_group_maps: + self.profiling_info[group] = ProfilingInfo(group, fsm_group_maps[group]["fsm"], fsm_group_maps[group]["ids"], fsm_group_maps[group]["tdcc-group-name"]) for single_enable_group in single_enable_names: self.profiling_info[single_enable_group] = ProfilingInfo(single_enable_group) self.signal_to_curr_value[f"{single_enable_group}_go"] = -1 self.signal_to_curr_value[f"{single_enable_group}_done"] = -1 - + def enddefinitions(self, vcd, signals, cur_sig_vals): # convert references to list and sort by name refs = [(k, v) for k, v in vcd.references_to_ids.items()] refs = sorted(refs, key=lambda e: e[0]) names = [remove_size_from_name(e[0]) for e in refs] + self.main_go_id = vcd.references_to_ids[f"{self.main_component}.go"] - # FIXME: When we get to profiling multi-component programs, we want to search for each component's go signal - self.main_go_id = vcd.references_to_ids["TOP.TOP.main.go"] - - clock_name = "TOP.TOP.main.clk" + clock_name = f"{self.main_component}.clk" if clock_name in names: self.clock_id = vcd.references_to_ids[clock_name] else: @@ -101,15 +120,15 @@ def enddefinitions(self, vcd, signals, cur_sig_vals): for name, id in refs: # We may want to optimize these nested for loops for tdcc_group in self.tdcc_group_to_go_id: - if f"{tdcc_group}_go.out[" in name: + if name.startswith(f"{tdcc_group}_go.out["): self.tdcc_group_to_go_id[tdcc_group] = id for fsm in self.fsms: - if f"{fsm}.out[" in name: + if name.startswith(f"{fsm}.out["): self.signal_to_signal_id[fsm] = id for single_enable_group in self.single_enable_names: - if f"{single_enable_group}_go.out[" in name: + if name.startswith(f"{single_enable_group}_go.out["): self.signal_to_signal_id[f"{single_enable_group}_go"] = id - if f"{single_enable_group}_done.out[" in name: + if name.startswith(f"{single_enable_group}_done.out["): self.signal_to_signal_id[f"{single_enable_group}_done"] = id def value( @@ -131,85 +150,151 @@ def value( self.clock_cycle_acc += 1 # Update TDCC group signals first for (tdcc_group_name, tdcc_signal_id) in self.tdcc_group_to_go_id.items(): - self.tdcc_group_to_values[tdcc_group_name].append(int(cur_sig_vals[tdcc_signal_id], 2)) + tdcc_group_is_active = int(cur_sig_vals[tdcc_signal_id], 2) == 1 + if self.tdcc_group_active_cycle[tdcc_group_name] == -1 and tdcc_group_is_active: # the tdcc group just became active + self.tdcc_group_active_cycle[tdcc_group_name] = self.clock_cycle_acc + elif self.tdcc_group_active_cycle[tdcc_group_name] > -1 and not tdcc_group_is_active: + self.tdcc_group_active_cycle[tdcc_group_name] = -1 # for each signal that we want to check, we need to sample the values for (signal_name, signal_id) in self.signal_to_signal_id.items(): + signal_curr_value = self.signal_to_curr_value[signal_name] signal_new_value = int(cur_sig_vals[signal_id], 2) # signal value at this point in time - fsm_curr_value = self.signal_to_curr_value[signal_name] if "_go" in signal_name and signal_new_value == 1: # start of single enable group group = "_".join(signal_name.split("_")[0:-1]) curr_group_info = self.profiling_info[group] # We want to start a segment regardless of whether it changed - if self.main_go_on_time == time or signal_new_value != fsm_curr_value: + if self.main_go_on_time == time or signal_new_value != signal_curr_value: curr_group_info.start_new_segment(self.clock_cycle_acc) elif "_done" in signal_name and signal_new_value == 1: # end of single enable group group = "_".join(signal_name.split("_")[0:-1]) self.profiling_info[group].end_current_segment(self.clock_cycle_acc) elif "fsm" in signal_name: - next_group = self.fsms[signal_name][signal_new_value] - tdcc_group_values = self.tdcc_group_to_values[self.profiling_info[next_group].tdcc_group] - # if the FSM value changed, then we must end the previous group (regardless of whether we can start the next group) - if signal_new_value != fsm_curr_value and fsm_curr_value != -1: - prev_group = self.fsms[signal_name][fsm_curr_value] - self.profiling_info[prev_group].end_current_segment(self.clock_cycle_acc) - # if the FSM value didn't change but the TDCC group just got enabled, then we must start the next group - if signal_new_value == fsm_curr_value and (tdcc_group_values[-1] == 1 and (len(tdcc_group_values) == 1 or tdcc_group_values[-2] == 0)): - self.profiling_info[next_group].start_new_segment(self.clock_cycle_acc) - if tdcc_group_values[-1] == 1 and signal_new_value != fsm_curr_value: - self.profiling_info[next_group].start_new_segment(self.clock_cycle_acc) + # Workarounds because the value 0 may not correspond to a group + if signal_curr_value in self.fsms[signal_name]: + # group that is recorded to be active last cycle. If the signal changed then it would be the previous group + curr_group = self.fsms[signal_name][signal_curr_value] + # if the FSM value changed, then we must end the current group (regardless of whether we can start the next group) + if signal_new_value != signal_curr_value and signal_curr_value != -1: + self.profiling_info[curr_group].end_current_segment(self.clock_cycle_acc) + if signal_new_value in self.fsms[signal_name]: + next_group = self.fsms[signal_name][signal_new_value] + tdcc_group_active_cycle = self.tdcc_group_active_cycle[self.profiling_info[next_group].tdcc_group] + if tdcc_group_active_cycle == -1: # If the TDCC group is not active, then no segments should start + continue + # if the FSM value didn't change but the TDCC group just got enabled, then we must start the next group + if signal_new_value == signal_curr_value and tdcc_group_active_cycle == self.clock_cycle_acc: + self.profiling_info[next_group].start_new_segment(self.clock_cycle_acc) + elif signal_new_value != signal_curr_value: # otherwise we start a new segment when the signal changed + self.profiling_info[next_group].start_new_segment(self.clock_cycle_acc) # Update internal signal value self.signal_to_curr_value[signal_name] = signal_new_value -def remap_tdcc_json(json_file): +# Generates a list of all of the components to potential cell names +# prefix is the cell's "path" (ex. for a cell "my_cell" defined in "main", the prefix would be "TOP.toplevel.main") +# The initial value of curr_component should be the top level/main component +def build_components_to_cells(prefix, curr_component, cells_to_components, components_to_cells): + # prefix += f".{curr_component}" + for (cell, cell_component) in cells_to_components[curr_component].items(): + if cell_component not in components_to_cells: + components_to_cells[cell_component] = [f"{prefix}.{cell}"] + else: + components_to_cells[cell_component].append(f"{prefix}.{cell}") + build_components_to_cells(prefix + f".{cell}", cell_component, cells_to_components, components_to_cells) + +# Reads json generated by component-cells backend to produce a mapping from all components +# to cell names they could have. +def read_component_cell_names_json(json_file): + cell_json = json.load(open(json_file)) + # For each component, contains a map from each cell name to its corresponding component + # component name --> { cell name --> component name} + cells_to_components = {} + main_component = "" + for curr_component_entry in cell_json: + cell_map = {} # mapping cell names to component names for all cells in the current component + if curr_component_entry["is_main_component"]: + main_component = curr_component_entry["component"] + for cell_info in curr_component_entry["cell_info"]: + cell_map[cell_info["cell_name"]] = cell_info["component_name"] + cells_to_components[curr_component_entry["component"]] = cell_map + full_main_component = f"TOP.toplevel.{main_component}" + components_to_cells = {main_component : [full_main_component]} # come up with a better name for this + build_components_to_cells(full_main_component, main_component, cells_to_components, components_to_cells) + return full_main_component, components_to_cells + +# Reads json generated by TDCC (via dump-fsm-json option) to produce +def remap_tdcc_json(json_file, components_to_cells): profiling_infos = json.load(open(json_file)) - single_enable_names = set() - tdcc_group_names = set() - groups_to_fsms = {} + single_enable_names = set() # groups that aren't managed by FSMs + tdcc_group_names = set() # TDCC-generated groups that manage control flow using FSMs + fsm_group_maps = {} # fsm-managed groups info (fsm register, TDCC group that manages fsm, id of group within fsm) fsms = {} # Remapping of JSON data for easy access for profiling_info in profiling_infos: if "Fsm" in profiling_info: fsm = profiling_info["Fsm"] - fsm_name = fsm["fsm"] - fsms[fsm_name] = {} - for state in fsm["states"]: - fsms[fsm_name][state["id"]] = state["group"] - group_name = state["group"] - if group_name not in groups_to_fsms: - groups_to_fsms[group_name] = {"fsm": fsm_name, "tdcc-group-name": fsm["group"], "ids": [state["id"]]} - tdcc_group_names.add(fsm["group"]) # Hack: Keep track of the TDCC group for use later - else: - groups_to_fsms[group_name]["ids"].append(state["id"]) + # create entries for all possible cells of component + for cell in components_to_cells[fsm["component"]]: + fsm_name = cell + "." + fsm["fsm"] + fsms[fsm_name] = {} + for state in fsm["states"]: + group_name = cell + "." + state["group"] + fsms[fsm_name][state["id"]] = group_name + tdcc_group = cell + "." + fsm["group"] + if group_name not in fsm_group_maps: + fsm_group_maps[group_name] = {"fsm": fsm_name, "tdcc-group-name": tdcc_group, "ids": [state["id"]]} + tdcc_group_names.add(tdcc_group) # Keep track of the TDCC group to figure out when first group starts + else: + fsm_group_maps[group_name]["ids"].append(state["id"]) else: - single_enable_names.add(profiling_info["SingleEnable"]["group"]) - - return fsms, single_enable_names, tdcc_group_names, groups_to_fsms + for cell in components_to_cells[profiling_info["SingleEnable"]["component"]]: # get all possibilities of cells + single_enable_names.add(cell + "." + profiling_info["SingleEnable"]["group"]) + return fsms, single_enable_names, tdcc_group_names, fsm_group_maps -def main(vcd_filename, json_file): - fsms, single_enable_names, tdcc_group_names, groups_to_fsms = remap_tdcc_json(json_file) - converter = VCDConverter(fsms, single_enable_names, tdcc_group_names, groups_to_fsms) +def main(vcd_filename, groups_json_file, cells_json_file, out_csv): + main_component, components_to_cells = read_component_cell_names_json(cells_json_file) + fsms, single_enable_names, tdcc_group_names, fsm_group_maps = remap_tdcc_json(groups_json_file, components_to_cells) + converter = VCDConverter(fsms, single_enable_names, tdcc_group_names, fsm_group_maps, components_to_cells, main_component) vcdvcd.VCDVCD(vcd_filename, callbacks=converter, store_tvs=False) print(f"Total clock cycles: {converter.clock_cycle_acc}") print("=====SUMMARY=====") print() - for group_info in filter(lambda group : not group.name.startswith("tdcc") and not group.name.endswith("END"), converter.profiling_info.values()): + groups_to_emit = list(filter(lambda group : not group.name.startswith("tdcc") and not group.name.endswith("END"), converter.profiling_info.values())) + groups_to_emit.sort(key=lambda x : x.name) # to preserve stability + groups_to_emit.sort(key=lambda x : x.total_cycles, reverse=True) + csv_acc = [] + for group_info in groups_to_emit: + csv_acc.append(group_info.emit_csv_data()) print(group_info.summary()) print("=====DUMP=====") print() - for group_info in filter(lambda group : not group.name.startswith("tdcc") and not group.name.endswith("END"), converter.profiling_info.values()): + for group_info in groups_to_emit: print(group_info) + # emitting a CSV file for easier eyeballing + print(f"Writing summary to {out_csv}") + with open(out_csv, 'w') as csvfile: + csv_keys = ["name", "total-cycles", "times-active", "avg"] + csv_acc.append({ "name": "TOTAL", "total-cycles": converter.clock_cycle_acc, "times-active": "-", "avg": "-"}) + writer = csv.DictWriter(csvfile, csv_keys, lineterminator="\n") + writer.writeheader() + writer.writerows(csv_acc) if __name__ == "__main__": - if len(sys.argv) > 2: + if len(sys.argv) > 4: vcd_filename = sys.argv[1] fsm_json = sys.argv[2] - main(vcd_filename, fsm_json) + cells_json = sys.argv[3] + out_csv = sys.argv[4] + main(vcd_filename, fsm_json, cells_json, out_csv) else: args_desc = [ "VCD_FILE", - "TDCC_JSON" + "TDCC_JSON", + "CELLS_JSON", + "SUMMARY_OUT_CSV" ] print(f"Usage: {sys.argv[0]} {' '.join(args_desc)}") + print("TDCC_JSON: Run Calyx with `tdcc:dump-fsm-json` option") + print("CELLS_JSON: Run Calyx with `component-cells` backend") sys.exit(-1)