From acb972954091d14bf91797383ebf0017810a4168 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Fri, 9 Jun 2023 10:00:56 -0500
Subject: [PATCH 01/81] Fix VGPR issue (#139)

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/utils/perfagg.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index e965b784c..651bcb86d 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -127,11 +127,17 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
         "wgr": [col for col in df.columns if "wgr" in col],
         "lds": [col for col in df.columns if "lds" in col],
         "scr": [col for col in df.columns if "scr" in col],
-        "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col],
-        "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col],
         "spgr": [col for col in df.columns if "sgpr" in col],
     }
+    # Check for vgpr counter in ROCm < 5.3
+    if "vgpr" in df.columns:
+        duplicate_cols["vgpr"] = [col for col in df.columns if "vgpr" in col]
+    # Check for vgpr counter in ROCm >= 5.3
+    else:
+        duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col]
+        duplicate_cols["accum_vgpr"] =  [col for col in df.columns if "accum_vgpr" in col]
     for key, cols in duplicate_cols.items():
+        print("Key is ", key)
         _df = df[cols]
         if not test_df_column_equality(_df):
             msg = (

From 5f6c776170f01bd62c4eac16a0ec4257583c32c3 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Fri, 9 Jun 2023 10:01:37 -0500
Subject: [PATCH 02/81] Omniperf rocomni changes

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py | 38 +++++++++++++++-------
 src/omniperf_analyze/utils/parser.py     | 41 ++++++++++++++++++++++--
 src/omniperf_analyze/utils/schema.py     |  2 ++
 3 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 58991e8b3..c15181c6f 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -47,36 +47,50 @@
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
 
 
-def initialize_run(args, normalization_filter=None):
-    import pandas as pd
-    from collections import OrderedDict
+################################################
+# Helper Functions
+################################################
+def generate_configs(config_dir, list_kernels, filter_metrics):
     from omniperf_analyze.utils import schema
-    from tabulate import tabulate
 
-    # Fixme: cur_root.parent.joinpath('soc_params')
-    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
-    soc_spec_df = file_io.load_soc_params(soc_params_dir)
-
-    single_panel_config = file_io.is_single_panel_config(Path(args.config_dir))
+    single_panel_config = file_io.is_single_panel_config(Path(config_dir))
     global archConfigs
     archConfigs = {}
     for arch in file_io.supported_arch.keys():
         ac = schema.ArchConfig()
-        if args.list_kernels:
+        if list_kernels:
             ac.panel_configs = file_io.top_stats_build_in_config
         else:
             arch_panel_config = (
-                args.config_dir if single_panel_config else args.config_dir.joinpath(arch)
+                config_dir if single_panel_config else config_dir.joinpath(arch)
             )
             ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
 
         # TODO: filter_metrics should/might be one per arch
         # print(ac)
 
-        parser.build_dfs(ac, args.filter_metrics)
+        parser.build_dfs(ac, filter_metrics)
 
         archConfigs[arch] = ac
 
+    return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
+
+
+################################################
+# Core Functions
+################################################
+def initialize_run(args, normalization_filter=None):
+    import pandas as pd
+    from collections import OrderedDict
+    from tabulate import tabulate
+    from omniperf_analyze.utils import schema
+
+    # Fixme: cur_root.parent.joinpath('soc_params')
+    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
+    soc_spec_df = file_io.load_soc_params(soc_params_dir)
+
+    generate_configs(args.config_dir, args.list_kernels, args.filter_metrics)
+
     if args.list_metrics in file_io.supported_arch.keys():
         print(
             tabulate(
diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index d11cbbbfc..5fb03c39a 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -320,6 +320,26 @@ def update_normUnit_string(equation, unit):
         str(equation),
     ).capitalize()
 
+def gen_counter_list(formula):
+    function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None}
+
+    counters = []
+    if not isinstance(formula,str):
+        return counters
+    try:
+        tree = ast.parse(
+            formula
+            .replace("$normUnit", "SQ_WAVES")
+            .replace("$denom", "SQ_WAVES")
+            .replace("$","")
+        )
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter:
+                counters.append(node.id.rstrip("_sum"))
+    except:
+        pass
+    return counters
+            
 
 def build_dfs(archConfigs, filter_metrics):
     """
@@ -338,6 +358,7 @@ def build_dfs(archConfigs, filter_metrics):
     d = {}
     metric_list = {}
     dfs_type = {}
+    metric_counters = {}
     for panel_id, panel in archConfigs.panel_configs.items():
         for data_source in panel["data source"]:
             for type, data_cofig in data_source.items():
@@ -362,6 +383,7 @@ def build_dfs(archConfigs, filter_metrics):
                         )
                         metric_idx = data_source_idx + "." + str(i)
                         values = []
+                        eqn_content = []
 
                         if (
                             (not filter_metrics)
@@ -378,6 +400,7 @@ def build_dfs(archConfigs, filter_metrics):
                             for k, v in entries.items():
                                 if k != "tips" and k != "coll_level" and k != "alias":
                                     values.append(v)
+                                    eqn_content.append(v)
 
                             if "alias" in entries.keys():
                                 values.append(entries["alias"])
@@ -396,6 +419,15 @@ def build_dfs(archConfigs, filter_metrics):
 
                         # collect metric_list
                         metric_list[metric_idx] = key.replace(" ", "_")
+                        # generate mapping of counters and metrics
+                        filter = {}
+                        for formula in eqn_content:
+                            if formula is not None and formula != "None":
+                                for k in gen_counter_list(formula):
+                                    filter[k] = None
+                        if len(filter) > 0:
+                            metric_counters[key] = list(filter)
+
                         i += 1
 
                     df.set_index("Index", inplace=True)
@@ -431,6 +463,7 @@ def build_dfs(archConfigs, filter_metrics):
     setattr(archConfigs, "dfs", d)
     setattr(archConfigs, "metric_list", metric_list)
     setattr(archConfigs, "dfs_type", dfs_type)
+    setattr(archConfigs, "metric_counters", metric_counters)
 
 
 def build_metric_value_string(dfs, dfs_type, normal_unit):
@@ -469,7 +502,8 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug):
 
     # confirm no illogical counter values (only consider non-roofline runs)
     roof_only_run = sys_info.ip_blocks == "roofline"
-    if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any():
+    rocscope_run = sys_info.ip_blocks == "rocscope"
+    if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any():
         print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.")
         sys.exit(1)
 
@@ -711,12 +745,13 @@ def load_kernel_top(workload, dir):
     workload.dfs.update(tmp)
 
 
-def load_table_data(workload, dir, is_gui, debug, verbose):
+def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False):
     """
     Load data for all "raw_csv_table".
     Calculate mertric value for all "metric_table".
     """
-    load_kernel_top(workload, dir)
+    if not skipKernelTop:
+        load_kernel_top(workload, dir)
 
     eval_metric(
         workload.dfs,
diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py
index bcfc0bff5..6e147fcae 100644
--- a/src/omniperf_analyze/utils/schema.py
+++ b/src/omniperf_analyze/utils/schema.py
@@ -52,6 +52,8 @@ class ArchConfig:
     # [Index: Metric name] pairs
     metric_list: Dict[str, str] = field(default_factory=dict)
 
+    # [Metric name: Counters] pairs
+    metric_counters: Dict[str, list] = field(default_factory=dict)
 
 @dataclass
 class Workload:

From 79eecb445e4cc4fdc02bdf20fe638bb9c10f755d Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Fri, 9 Jun 2023 10:04:32 -0500
Subject: [PATCH 03/81] Comply to Python formatting

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py |  2 +-
 src/omniperf_analyze/utils/parser.py     | 41 +++++++++++++++++++-----
 src/omniperf_analyze/utils/schema.py     |  1 +
 src/utils/perfagg.py                     |  2 +-
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index c15181c6f..6415ed285 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -73,7 +73,7 @@ def generate_configs(config_dir, list_kernels, filter_metrics):
 
         archConfigs[arch] = ac
 
-    return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
+    return archConfigs  # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
 
 
 ################################################
diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index 5fb03c39a..b6573566b 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -320,26 +320,47 @@ def update_normUnit_string(equation, unit):
         str(equation),
     ).capitalize()
 
+
 def gen_counter_list(formula):
-    function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None}
+    function_filter = {
+        "MIN": None,
+        "MAX": None,
+        "AVG": None,
+        "ROUND": None,
+        "TO_INT": None,
+        "GB": None,
+        "STD": None,
+        "GFLOP": None,
+        "GOP": None,
+        "OP": None,
+        "CU": None,
+        "NC": None,
+        "UC": None,
+        "CC": None,
+        "RW": None,
+        "GIOP": None,
+    }
 
     counters = []
-    if not isinstance(formula,str):
+    if not isinstance(formula, str):
         return counters
     try:
         tree = ast.parse(
-            formula
-            .replace("$normUnit", "SQ_WAVES")
+            formula.replace("$normUnit", "SQ_WAVES")
             .replace("$denom", "SQ_WAVES")
-            .replace("$","")
+            .replace("$", "")
         )
         for node in ast.walk(tree):
-            if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter:
+            if (
+                isinstance(node, ast.Name)
+                and node.id.rstrip("_sum").isupper()
+                and node.id not in function_filter
+            ):
                 counters.append(node.id.rstrip("_sum"))
     except:
         pass
     return counters
-            
+
 
 def build_dfs(archConfigs, filter_metrics):
     """
@@ -503,7 +524,11 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug):
     # confirm no illogical counter values (only consider non-roofline runs)
     roof_only_run = sys_info.ip_blocks == "roofline"
     rocscope_run = sys_info.ip_blocks == "rocscope"
-    if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any():
+    if (
+        not rocscope_run
+        and not roof_only_run
+        and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any()
+    ):
         print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.")
         sys.exit(1)
 
diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py
index 6e147fcae..f9b59868f 100644
--- a/src/omniperf_analyze/utils/schema.py
+++ b/src/omniperf_analyze/utils/schema.py
@@ -55,6 +55,7 @@ class ArchConfig:
     # [Metric name: Counters] pairs
     metric_counters: Dict[str, list] = field(default_factory=dict)
 
+
 @dataclass
 class Workload:
     sys_info: pd.DataFrame = None
diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 651bcb86d..109fdecda 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -135,7 +135,7 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
     # Check for vgpr counter in ROCm >= 5.3
     else:
         duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col]
-        duplicate_cols["accum_vgpr"] =  [col for col in df.columns if "accum_vgpr" in col]
+        duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col]
     for key, cols in duplicate_cols.items():
         print("Key is ", key)
         _df = df[cols]

From 049ba12f6994cbf617e69980d1a2f5b897e306a7 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Wed, 21 Jun 2023 11:06:03 -0500
Subject: [PATCH 04/81] Add subsection title to System Speed-of-Light

Signed-off-by: colramos-amd <colramos@amd.com>
---
 .../configs/gfx906/0200_system-speed-of-light.yaml               | 1 +
 .../configs/gfx908/0200_system-speed-of-light.yaml               | 1 +
 .../configs/gfx90a/0200_system-speed-of-light.yaml               | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml
index 74de040b2..986b2f0ae 100644
--- a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value
diff --git a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml
index 74de040b2..986b2f0ae 100644
--- a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value
diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
index f10d7630f..20721ee1f 100644
--- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value

From a89cb96b69ca9969bdb182d6d21f214494ee1a98 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Mon, 26 Jun 2023 15:30:38 -0500
Subject: [PATCH 05/81] Extend filtering into timestamps.csv (#80)

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf         | 34 ++--------------------------------
 src/utils/perfagg.py |  9 +++++++++
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index 3b38e419b..e611547d4 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -439,23 +439,7 @@ def characterize_app(args, VER):
         else:
             run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose)
     
-    
-
-    # run again with timestamps
-    success, output = capture_subprocess_output(
-        [
-            rocprof_cmd,
-            # "-i", fname,
-            # "-m", perfmon_dir + "/" + "metrics.xml",
-            "--timestamp",
-            "on",
-            "-o",
-            workload_dir + "/" + "timestamps.csv",
-            '"' + app_cmd + '"',
-        ]
-    )
-    log.write(output)
-    # Update pmc_perf.csv timestamps
+    # Update timestamps
     replace_timestamps(workload_dir, log)
 
     # Manually join each pmc_perf*.csv output
@@ -676,21 +660,7 @@ def omniperf_profile(args, VER):
             else:
                 run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose)
 
-        # run again with timestamps
-        success, output = capture_subprocess_output(
-            [
-                rocprof_cmd,
-                # "-i", fname,
-                # "-m", perfmon_dir + "/" + "metrics.xml",
-                "--timestamp",
-                "on",
-                "-o",
-                workload_dir + "/" + "timestamps.csv",
-                '"' + args.remaining + '"',
-            ]
-        )
-        log.write(output)
-        # Update pmc_perf.csv timestamps
+        # Update timestamps
         replace_timestamps(workload_dir, log)
         
         # Manually join each pmc_perf*.csv output
diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 109fdecda..1c80a22a9 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -345,6 +345,15 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
                             # initial counter in this channel
                             pmc_list["TCC2"][str(ch)] = [counter]
 
+
+    # add a timestamp file
+    fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
+    fd.write("pmc:\n\n")
+    fd.write("gpu:\n")
+    fd.write("range:\n")
+    fd.write("kernel:\n")
+    fd.close()
+    
     # sort the per channel counter, so that same counter in all channels can be aligned
     for ch in range(perfmon_config[soc]["TCC_channels"]):
         pmc_list["TCC2"][str(ch)].sort()

From f91de7d2f7478ac143b77914ad6560c5a5816f23 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Mon, 26 Jun 2023 15:38:51 -0500
Subject: [PATCH 06/81] Comply to Python formatting

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/utils/perfagg.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 1c80a22a9..59460bc80 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -345,7 +345,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
                             # initial counter in this channel
                             pmc_list["TCC2"][str(ch)] = [counter]
 
-
     # add a timestamp file
     fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
     fd.write("pmc:\n\n")
@@ -353,7 +352,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     fd.write("range:\n")
     fd.write("kernel:\n")
     fd.close()
-    
+
     # sort the per channel counter, so that same counter in all channels can be aligned
     for ch in range(perfmon_config[soc]["TCC_channels"]):
         pmc_list["TCC2"][str(ch)].sort()

From 54bc0580850095a91a60c9115934f5e747774426 Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Tue, 6 Jun 2023 11:01:37 -0400
Subject: [PATCH 07/81] Incorporate review comments

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 src/docs/analysis.md          | 16 ++++++++--------
 src/docs/conf.py              |  4 ++++
 src/docs/getting_started.md   | 20 ++++++++++++--------
 src/docs/high_level_design.md |  4 ++--
 src/docs/installation.md      | 14 +++++++++-----
 src/docs/introduction.md      |  9 ++++-----
 src/docs/profiling.md         | 15 +++++++++------
 src/parser.py                 |  2 +-
 8 files changed, 49 insertions(+), 35 deletions(-)

diff --git a/src/docs/analysis.md b/src/docs/analysis.md
index 2321ddabd..9feff1f64 100644
--- a/src/docs/analysis.md
+++ b/src/docs/analysis.md
@@ -26,7 +26,7 @@ Run `omniperf analyze -h` for more details.
 ### Recommended workflow
 
 1) Do a comprehensive analysis with Omniperf CLI at the beginning.
-```shell
+```shell-session
 $ omniperf analyze -p workloads/vcopy/mi200/
 
 --------
@@ -108,7 +108,7 @@ Analyze
 ....
 ```
  2. Use `--list-metrics` to generate a list of availible metrics for inspection
- ```shell
+ ```shell-session
 $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
 ╒═════════╤═════════════════════════════╕
 │         │ Metric                      │
@@ -172,7 +172,7 @@ $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
 ...
  ```
  2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below we'll inspect block 2 (a.k.a. System Speed-of-Light).
-```shell
+```shell-session
 $ omniperf analyze -p workloads/vcopy/mi200/ -b 2
 --------
 Analyze
@@ -286,7 +286,7 @@ Analyze
 - Filter kernels
 
   First, list the top kernels in your application using `--list-kernels`.
-  ```shell
+  ```shell-session
   $ omniperf analyze -p workloads/vcopy/mi200/ --list-kernels
   
   --------
@@ -306,7 +306,7 @@ Analyze
 
   Second, select the index of the kernel you'd like to filter (i.e. __vecCopy(double*, double*, double*, int, int) [clone .kd]__ at index __0__). Then, use this index to apply the filter via `-k/--kernels`.
 
-  ```shell
+  ```shell-session
   $ omniperf -p workloads/vcopy/mi200/ -k 0
   
   --------
@@ -372,7 +372,7 @@ See [FAQ](https://amdresearch.github.io/omniperf/faq.html) for more details on S
 
 To launch the standalone GUI, include the `--gui` flag with your desired analysis command. For example:
 
-```bash
+```shell-session
 $ omniperf analyze -p workloads/vcopy/mi200/ --gui
 
 --------
@@ -499,7 +499,7 @@ e.g., omniperf_asw_vcopy_mi200.
 
 Below is the sample command to import the *vcopy* profiling data.
 
-```shell
+```shell-session
 $ omniperf database --help
 ROC Profiler:  /usr/bin/rocprof
 
@@ -544,7 +544,7 @@ Connection Options:
 ```
 
 **omniperf import for vcopy:**
-```shell
+```shell-session
 $ omniperf database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/
 ROC Profiler:  /usr/bin/rocprof
  
diff --git a/src/docs/conf.py b/src/docs/conf.py
index 48d4c5596..b659553f9 100644
--- a/src/docs/conf.py
+++ b/src/docs/conf.py
@@ -53,6 +53,10 @@ def install(package):
 ]
 
 myst_heading_anchors = 2
+# enable replacement of (tm) & friends
+myst_enable_extensions = [
+    "replacements"
+]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
diff --git a/src/docs/getting_started.md b/src/docs/getting_started.md
index c75bda9ab..80ae888f0 100644
--- a/src/docs/getting_started.md
+++ b/src/docs/getting_started.md
@@ -10,24 +10,28 @@
 
 1. **Launch & Profile the target application with the command line profiler**
    
-    The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or ipblock’s.
+    The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or IP blocks.  If not specified, Omniperf will default to collecting all available counters for all kernels/dispatches launched by the user's executable.
 
-    To collect the default set of data for all kernels in the target application, launch:
+    To collect the default set of data for all kernels in the target application, launch, e.g.:
     ```shell
-    $ omniperf profile -n vcopy -- ./vcopy 1048576 256
+    $ omniperf profile -n vcopy_data -- ./vcopy 1048576 256
     ```
-    The app runs, each kernel is launched, and profiling results are generated. By default, results are written to ./workloads/\<name>. To collect all requested profile information, it may be required to replay kernels multiple times.
+    The app runs, each kernel is launched, and profiling results are generated. By default, results are written to (e.g.,) ./workloads/vcopy_data (configurable via the `-n` argument). To collect all requested profile information, it may be required to replay kernels multiple times.
 
 2. **Customize data collection**
     
-    Options are available to specify for which kernels data should be collected.
-    `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID. `-b`/`--ipblocks` enables profiling on one or more IP Block(s).
+    Options are available to specify for which kernels/metrics data should be collected.
+    Note that filtering can be applied either in the profiling or analysis stage, however filtering at during profiling collection will often speed up your overall profiling run time.
 
-    To view available metrics by IP Block you can always use `--list-metrics` to view a list of all available metrics organized by IP Block. 
+    Some common filters include:
+
+    - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID
+    - `-b`/`--ipblocks` enables collects metrics for only the specified (one or more) IP Blocks.
+
+    To view available metrics by IP Block you can use the `--list-metrics` argument to view a list of all available metrics organized by IP Block.
     ```shell
     $ omniperf analyze --list-metrics <sys_arch>
     ```
-    Note that filtering can also be applied after the fact, at the analysis stage, however filtering at the profiling level will often speed up your overall profiling run time.
 
 3. **Analyze at the command line**
    
diff --git a/src/docs/high_level_design.md b/src/docs/high_level_design.md
index 28c09ff12..6168b7ace 100644
--- a/src/docs/high_level_design.md
+++ b/src/docs/high_level_design.md
@@ -8,10 +8,10 @@
 
 The [Omniperf](https://github.com/AMDResearch/omniperf) Tool is architecturally composed of three major components, as shown in the following figure.
 
-- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. 
+- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler).  The counters are stored in a comma-seperated value, for further analyis. A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. The roofline model is not available on earlier accelerators.
 
 - **Omniperf Grafana Analyzer**: 
-  - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization.
+  - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. Compatibility of previously generated data between Omniperf versions is not necessarily guarenteed.
   - *Grafana GUI Analyzer*: A Grafana dashboard is designed to retrieve the raw counters info from the backend database. It also creates the relevant performance metrics and visualization.
 - **Omniperf Standalone GUI Analyzer**: A standalone GUI is provided to enable performance analysis without importing data into the backend database.
 
diff --git a/src/docs/installation.md b/src/docs/installation.md
index ee2489710..e550669b7 100644
--- a/src/docs/installation.md
+++ b/src/docs/installation.md
@@ -109,7 +109,7 @@ ROC Profiler:   /opt/rocm-5.1.0/bin/rocprof
 omniperf (v{__VERSION__})
 ```
 
-```{tip} Sites relying on an Lmod Python module locally may wish to
+```{tip} Users relying on an Lmod Python module locally may wish to
 customize the resulting Omniperf modulefile post-installation to
 include additional module dependencies.
 ```
@@ -129,8 +129,9 @@ export PYTHONPATH=$INSTALL_DIR/python-libs
 
 Omniperf relies on a rocprof binary during the profiling
 process. Normally the path to this binary will be detected
-automatically, but it can also be overridden via the use of an
-optional `ROCPROF` environment variable.
+automatically, but it can also be overridden via the setting the
+optional `ROCPROF` environment variable to the path of the binary the user
+wishes to use instead.
 
 
 
@@ -162,9 +163,12 @@ Omniperf uses [mongoimport](https://www.mongodb.com/docs/database-tools/mongoimp
 $ wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb
 $ sudo apt install ./mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb
 ```
-> Find install for alternative distros [here](https://www.mongodb.com/download-center/database-tools/releases/archive)
+> Installation instructions for alternative distributions can be found [here](https://www.mongodb.com/download-center/database-tools/releases/archive)
+
+### Persistent Storage
+
+The user may also choose to bind MongoDB to a directory on the host OS to create a local backup in case of a crash or reset:
 
-### Persist Storage
 ```bash
 $ sudo mkdir -p /usr/local/persist && cd /usr/local/persist/
 $ sudo mkdir -p grafana-storage mongodb
diff --git a/src/docs/introduction.md b/src/docs/introduction.md
index 436146db8..6b39d4088 100644
--- a/src/docs/introduction.md
+++ b/src/docs/introduction.md
@@ -10,17 +10,17 @@
 
 ## Scope
 
-MI Performance Profiler ([Omniperf](https://github.com/AMDResearch/omniperf)) is a system performance profiling tool for Machine Learning/HPC workloads running on AMD MI GPUs. It is currently built on top of the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler) to monitor hardware performance counters. The Omniperf tool primarily targets MI100 and MI200 silicon. Development is in progress to support MI300 and NAVI GPUs. 
+MI Performance Profiler ([Omniperf](https://github.com/AMDResearch/omniperf)) is a system performance profiling tool for Machine Learning/HPC workloads running on AMD Instinct (tm) Accelerators. It is currently built on top of the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler) to monitor hardware performance counters. The Omniperf tool primarily targets accelerators in the MI100 and MI200 families. Development is in progress to support MI300 and Radeon (tm) RDNA (tm) GPUs.
 
 ## Features
 
-The Omniperf tool performs system profiling based on all approved hardware counters for MI200. It provides high level performance analysis features including System Speed-of-Light, IP block Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... 
+The Omniperf tool performs system profiling based on all available hardware counters for the target accelerator. It provides high level performance analysis features including System Speed-of-Light, IP block Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more...
   
 Both command line analysis and GUI analysis are supported. 
 
 Detailed Feature List:
-- MI200 support
 - MI100 support
+- MI200 support
 - Standalone GUI Analyzer
 - Grafana/MongoDB GUI Analyzer
 - Dispatch Filtering
@@ -50,8 +50,7 @@ Detailed Feature List:
 
 | Platform | Status         |
 | :------- | :------------- |
-| Vega 20  | No             |
-| MI50     | No             |
+| Vega 20 (MI-50/60)  | No  |
 | MI100    | Supported      |
 | MI200    | Supported      |
 | MI300    | In development |
diff --git a/src/docs/profiling.md b/src/docs/profiling.md
index 6776097c9..1a9547775 100644
--- a/src/docs/profiling.md
+++ b/src/docs/profiling.md
@@ -19,7 +19,7 @@ the MI200 platform.
 
 ## Workload Compilation
 **vcopy compilation:**
-```shell
+```shell-session
 $ hipcc vcopy.cpp -o vcopy
 $ ls
 vcopy   vcopy.cpp
@@ -40,7 +40,7 @@ Releasing CPU memory
 The *omniperf* script, availible through the [Omniperf](https://github.com/AMDResearch/omniperf) repository, is used to aquire all necessary perfmon data through analysis of compute workloads.
 
 **omniperf help:**
-```shell
+```shell-session
 $ omniperf profile --help
 ROC Profiler:  /usr/bin/rocprof
 
@@ -56,7 +56,7 @@ Examples:
 
         omniperf profile -n vcopy_all -- ./vcopy 1048576 256
 
-        omniperf profile -n vcopy_SPI_TD -b SQ TCC -- ./vcopy 1048576 256
+        omniperf profile -n vcopy_SPI_TCC -b SQ TCC -- ./vcopy 1048576 256
 
         omniperf profile -n vcopy_kernel -k vecCopy -- ./vcopy 1048576 256
 
@@ -111,7 +111,7 @@ Standalone Roofline Options:
 The following sample command profiles the *vcopy* workload.
 
 **vcopy profiling:**
-```shell
+```shell-session
 $ omniperf profile --name vcopy -- ./vcopy 1048576 256
 Resolving rocprof
 ROC Profiler:  /usr/bin/rocprof
@@ -206,7 +206,10 @@ Peak MFMA IOPs (I8), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments
 ```
 You'll notice two stages in *default* Omniperf profiling. The first stage collects all the counters needed for Omniperf analysis (omitting any filters you've provided). The second stage collects data for the roofline analysis (this stage can be disabled using `--no-roof`)
 
-At the end of the profiling, all resulting csv files should be located in the SOC specific target directory, e.g., mi200.
+At the end of the profiling, all resulting csv files should be located in a SOC specific target directory, e.g.:
+  - "mi200" for the AMD Instinct (tm) MI-200 family of accelerators
+  - "mi100" for the AMD Instinct (tm) MI-100 family of accelerators
+etc.  The SOC names are generated as a part of Omniperf, and do not necessarily distinguish between different accelerators in the same family (e.g., an AMD Instinct (tm) MI-210 vs an MI-250)
 
 > Note: Additionally, you'll notice a few extra files. An SoC parameters file, *sysinfo.csv*, is created to reflect the target device settings. All profiling output is stored in *log.txt*. Roofline specific benchmark results are stored in *roofline.csv*.
 
@@ -316,7 +319,7 @@ ROCProfiler: input from "/tmp/rpl_data_230411_170300_29696/input0.xml"
 
 #### Dispatch Filtering
 The following example demonstrates profiling on selected dispatches:
-```shell
+```shell-session
 $ omniperf profile --name vcopy -d 0 -- ./vcopy 1048576 256
 Resolving rocprof
 ROC Profiler:  /usr/bin/rocprof
diff --git a/src/parser.py b/src/parser.py
index da018ba94..9d6dd8f6f 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -66,7 +66,7 @@ def parse(my_parser):
                                         \n\n-------------------------------------------------------------------------------
                                         \nExamples:
                                         \n\tomniperf profile -n vcopy_all -- ./vcopy 1048576 256
-                                        \n\tomniperf profile -n vcopy_SPI_TD -b SQ TCC -- ./vcopy 1048576 256
+                                        \n\tomniperf profile -n vcopy_SPI_TCC -b SQ TCC -- ./vcopy 1048576 256
                                         \n\tomniperf profile -n vcopy_kernel -k vecCopy -- ./vcopy 1048576 256
                                         \n\tomniperf profile -n vcopy_disp -d 0 -- ./vcopy 1048576 256
                                         \n\tomniperf profile -n vcopy_roof --roof-only -- ./vcopy 1048576 256

From aaed37d00417014e35bb2dc97cc95a2aa89b6bf9 Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Tue, 6 Jun 2023 12:03:55 -0400
Subject: [PATCH 08/81] fix formatting

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 src/docs/conf.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/docs/conf.py b/src/docs/conf.py
index b659553f9..014ae7752 100644
--- a/src/docs/conf.py
+++ b/src/docs/conf.py
@@ -54,9 +54,7 @@ def install(package):
 
 myst_heading_anchors = 2
 # enable replacement of (tm) & friends
-myst_enable_extensions = [
-    "replacements"
-]
+myst_enable_extensions = ["replacements"]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]

From 8857393571a97f2550ce012098893c3be4751de5 Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Tue, 6 Jun 2023 12:25:40 -0400
Subject: [PATCH 09/81] fix missing

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 src/docs/profiling.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/docs/profiling.md b/src/docs/profiling.md
index 1a9547775..b0b56dc91 100644
--- a/src/docs/profiling.md
+++ b/src/docs/profiling.md
@@ -370,7 +370,7 @@ Standalone Roofline Options:
 
 #### Roofline Only
 The following example demonstrates profiling roofline data only:
-```shell
+```shell-session
 $ omniperf profile --name vcopy --roof-only -- ./vcopy 1048576 256
 Resolving rocprof
 ROC Profiler:  /usr/bin/rocprof
@@ -394,7 +394,8 @@ Checking for pmc_perf.csv in  /home/colramos/GitHub/omniperf-pub/workloads/mix/m
 Empirical Roofline PDFs saved!
 ```
 An inspection of our workload output folder shows .pdf plots were generated successfully
-```shell
+```shell-session
+$ ls workloads/vcopy/mi200/
 total 176
 drwxrwxr-x 3 colramos colramos  4096 Apr 11 17:18 .
 drwxrwxr-x 3 colramos colramos  4096 Apr 11 17:15 ..
@@ -409,4 +410,4 @@ drwxrwxr-x 2 colramos colramos  4096 Apr 11 17:16 perfmon
 ```
 A sample *empirRoof_gpu-ALL_fp32.pdf* looks something like this:
 
-![Sample Standalone Roof Plot](images/sample-roof-plot.png)
+![Sample Standalone Roof Plot](images/sample-roof-plot.png)
\ No newline at end of file

From 60d4a425366ddf96fb14554564e6d8412d5e8e3c Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Wed, 7 Jun 2023 10:23:49 -0400
Subject: [PATCH 10/81] Add options to enable latexpdf builds

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 src/docs/conf.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/docs/conf.py b/src/docs/conf.py
index 014ae7752..d97f79bb7 100644
--- a/src/docs/conf.py
+++ b/src/docs/conf.py
@@ -89,6 +89,10 @@ def install(package):
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None
 
+# options for latex output
+latex_engine = 'lualatex'
+latex_show_urls = 'footnote'
+
 
 # -- Options for HTML output -------------------------------------------------
 

From be1eeee370cbbccb4c5667d2f359f9fd125431b6 Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Wed, 7 Jun 2023 15:19:56 -0400
Subject: [PATCH 11/81] apply formatting

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 src/docs/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/docs/conf.py b/src/docs/conf.py
index d97f79bb7..af0003fb7 100644
--- a/src/docs/conf.py
+++ b/src/docs/conf.py
@@ -90,8 +90,8 @@ def install(package):
 pygments_style = None
 
 # options for latex output
-latex_engine = 'lualatex'
-latex_show_urls = 'footnote'
+latex_engine = "lualatex"
+latex_show_urls = "footnote"
 
 
 # -- Options for HTML output -------------------------------------------------

From 8edba713fbbf1294b412d2eb603f1af082839ba6 Mon Sep 17 00:00:00 2001
From: "Karl W. Schulz" <karl.schulz@amd.com>
Date: Fri, 30 Jun 2023 15:01:57 -0500
Subject: [PATCH 12/81] updating path for rocm repo to supported rhel8 release
 (8.8)

Signed-off-by: Karl W. Schulz <karl.schulz@amd.com>
---
 docker/rhel8/rocm.repo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/rhel8/rocm.repo b/docker/rhel8/rocm.repo
index 17171d755..8b2048978 100644
--- a/docker/rhel8/rocm.repo
+++ b/docker/rhel8/rocm.repo
@@ -7,7 +7,7 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
 
 [amdgpu]
 name=amdgpu
-baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.5/main/x86_64
+baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64
 enabled=1
 gpgcheck=1
 gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
\ No newline at end of file

From 5d84d0bb63c78c386761a91547611281f0e29138 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 10 Jul 2023 16:26:56 -0500
Subject: [PATCH 13/81] Fixed Units inconsistencies -  Table 10: Units were
 output as "$normUnit" now they are instr + normUnit

-  Table 16: Changed to Req per $normUnit

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../gfx906/1000_compute-unit-instruction-mix.yaml  |  4 ++--
 .../configs/gfx906/1600_L1_cache.yaml              |  8 ++++----
 .../configs/gfx906/1800_L2_cache_per_channel.yaml  | 14 +++++++-------
 .../configs/gfx908/1600_L1_cache.yaml              |  8 ++++----
 .../configs/gfx908/1800_L2_cache_per_channel.yaml  | 14 +++++++-------
 .../configs/gfx90a/1600_L1_cache.yaml              |  8 ++++----
 .../configs/gfx90a/1800_L2_cache_per_channel.yaml  | 14 +++++++-------
 7 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index b72344f3b..fd4653c23 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -27,7 +27,7 @@ Panel Config:
             tips: 
           LDS:
             count: AVG((SQ_INSTS_LDS / $denom))
-            unit: $normUnit
+            unit: (instr + $normUnit)
             tips: 
           VALU - MFMA:
             count: None # No HW module
@@ -61,7 +61,7 @@ Panel Config:
         metric:
           INT-32:
             count: None # No perf counter
-            unit: $normUnit
+            unit: (instr + $normUnit)
             tips:
           INT-64:
             count: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
index 1713068d2..1e05b3e4c 100644
--- a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
@@ -361,7 +361,7 @@ Panel Config:
             mean: AVG((TCP_UTCL1_REQUEST_sum / $denom))
             min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
             max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           Hit Ratio:
             mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
@@ -376,17 +376,17 @@ Panel Config:
             mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
-            units: ( + $normUnit)
+            units: (Hits + $normUnit)
             tips: 
           Misses (Translation):
             mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
           Misses (Permission):
             mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
diff --git a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml
index 95bba22e8..08a9a9f76 100644
--- a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])
diff --git a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
index 4ff3fd4d4..f65309a31 100644
--- a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
@@ -361,7 +361,7 @@ Panel Config:
             mean: AVG((TCP_UTCL1_REQUEST_sum / $denom))
             min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
             max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           Hit Ratio:
             mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
@@ -376,17 +376,17 @@ Panel Config:
             mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
-            units: ( + $normUnit)
+            units: (Hits + $normUnit)
             tips: 
           Misses (Translation):
             mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
           Misses (Permission):
             mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
diff --git a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml
index e68511e9e..3acee5740 100644
--- a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])
diff --git a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
index 985be3803..917cb3aa0 100644
--- a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
@@ -361,7 +361,7 @@ Panel Config:
             mean: AVG((TCP_UTCL1_REQUEST_sum / $denom))
             min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
             max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           Hit Ratio:
             mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
@@ -376,17 +376,17 @@ Panel Config:
             mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
-            units: ( + $normUnit)
+            units: (Hits + $normUnit)
             tips: 
           Misses (Translation):
             mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
           Misses (Permission):
             mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
index a5bf6fa25..094df5b19 100644
--- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])

From 2469716d13b227b2f9435f2e86160a2b8851c9c2 Mon Sep 17 00:00:00 2001
From: Cole Ramos <colramos@amd.com>
Date: Tue, 11 Jul 2023 13:27:46 -0500
Subject: [PATCH 14/81] Update 1800_L2_cache_per_channel.yaml

Capitalizing for consistency

Signed-off-by: Cole Ramos <colramos@amd.com>
---
 .../configs/gfx90a/1800_L2_cache_per_channel.yaml      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
index 094df5b19..93fc2b412 100644
--- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])

From 6042cfb16a908ae89a7091a927cd124126b04643 Mon Sep 17 00:00:00 2001
From: Cole Ramos <colramos@amd.com>
Date: Tue, 11 Jul 2023 13:29:07 -0500
Subject: [PATCH 15/81] Update 1800_L2_cache_per_channel.yaml

Capitalizing for consistency

Signed-off-by: Cole Ramos <colramos@amd.com>
---
 .../configs/gfx90a/1800_L2_cache_per_channel.yaml             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
index 93fc2b412..f13647847 100644
--- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))

From 267750c085b222748971ea96f369f1a886aa4fef Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 11 Jul 2023 14:11:38 -0500
Subject: [PATCH 16/81] Rearranging build_df func to optimize ArchConfig for
 rocomni plugin

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/parser.py | 57 +++++++++++++++++++---------
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index b6573566b..025745b17 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -339,11 +339,25 @@ def gen_counter_list(formula):
         "CC": None,
         "RW": None,
         "GIOP": None,
+        "GFLOPs": None,
     }
 
+    built_in_counter=[
+        "lds",
+        "grd",
+        "wgr",
+        "arch_vgpr",
+        "accum_vgpr",
+        "sgpr",
+        "scr",
+        "BeginNs",
+        "EndNs"
+    ]
+
+    visited = False
     counters = []
     if not isinstance(formula, str):
-        return counters
+        return visited, counters
     try:
         tree = ast.parse(
             formula.replace("$normUnit", "SQ_WAVES")
@@ -351,15 +365,17 @@ def gen_counter_list(formula):
             .replace("$", "")
         )
         for node in ast.walk(tree):
-            if (
-                isinstance(node, ast.Name)
-                and node.id.rstrip("_sum").isupper()
-                and node.id not in function_filter
-            ):
-                counters.append(node.id.rstrip("_sum"))
+            if isinstance(node, ast.Name):
+                val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
+                if (val.isupper() and val not in function_filter):
+                    counters.append(val)
+                    visited = True
+                if val in built_in_counter:
+                    visited = True
     except:
         pass
-    return counters
+
+    return visited, counters
 
 
 def build_dfs(archConfigs, filter_metrics):
@@ -381,9 +397,14 @@ def build_dfs(archConfigs, filter_metrics):
     dfs_type = {}
     metric_counters = {}
     for panel_id, panel in archConfigs.panel_configs.items():
+        panel_idx = str(panel_id // 100)
         for data_source in panel["data source"]:
             for type, data_cofig in data_source.items():
                 if type == "metric_table":
+                    metric_list[panel_idx] = panel["title"]
+                    table_idx = panel_idx + "." + str(data_cofig["id"] % 100)
+                    metric_list[table_idx] = data_cofig["title"]
+                    
                     headers = ["Index"]
                     for key, tile in data_cofig["header"].items():
                         if key != "tips":
@@ -397,12 +418,7 @@ def build_dfs(archConfigs, filter_metrics):
 
                     i = 0
                     for key, entries in data_cofig["metric"].items():
-                        data_source_idx = (
-                            str(data_cofig["id"] // 100)
-                            + "."
-                            + str(data_cofig["id"] % 100)
-                        )
-                        metric_idx = data_source_idx + "." + str(i)
+                        metric_idx = table_idx + "." + str(i)
                         values = []
                         eqn_content = []
 
@@ -411,7 +427,7 @@ def build_dfs(archConfigs, filter_metrics):
                             or (metric_idx in filter_metrics)  # no filter
                             or  # metric in filter
                             # the whole table in filter
-                            (data_source_idx in filter_metrics)
+                            (table_idx in filter_metrics)
                             or
                             # the whole IP block in filter
                             (str(panel_id // 100) in filter_metrics)
@@ -439,14 +455,19 @@ def build_dfs(archConfigs, filter_metrics):
                             df = pd.concat([df, df_new_row])
 
                         # collect metric_list
-                        metric_list[metric_idx] = key.replace(" ", "_")
+                        metric_list[metric_idx] = key
                         # generate mapping of counters and metrics
                         filter = {}
+                        _visited = False
                         for formula in eqn_content:
                             if formula is not None and formula != "None":
-                                for k in gen_counter_list(formula):
+                                visited, counters = gen_counter_list(formula)
+                                if visited:
+                                    _visited = True
+                                for k in counters:
                                     filter[k] = None
-                        if len(filter) > 0:
+
+                        if len(filter) > 0 or _visited:
                             metric_counters[key] = list(filter)
 
                         i += 1

From 80c04feb77961d17b3e062d8d8f1fa78897d318d Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 11 Jul 2023 14:13:09 -0500
Subject: [PATCH 17/81] Abstract perfmon coalesing for useage in rocomni plugin

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/utils/perfagg.py | 173 ++++++++++++++++++++++++++++---------------
 1 file changed, 115 insertions(+), 58 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 59460bc80..8e95482c5 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -256,6 +256,96 @@ def pmc_perf_split(workload_dir):
     os.remove(workload_perfmon_dir + "/pmc_perf.txt")
 
 
+def update_pmc_bucket(
+        counters, 
+        save_file,
+        soc,
+        pmc_list=None,
+        stext=None, 
+        workload_perfmon_dir=None
+    ):
+    # Verify inputs.
+    # If save_file is True, we're being called internally, from perfmon_coalesce
+    # Else we're being called externally, from rocomni
+    detected_extermal_call = False
+    if save_file and (stext is None or workload_perfmon_dir is None):
+        raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True")
+    if pmc_list is None:
+        detected_extermal_call = True
+        pmc_list = dict(
+            [
+                ("SQ", []),
+                ("GRBM", []),
+                ("TCP", []),
+                ("TA", []),
+                ("TD", []),
+                ("TCC", []),
+                ("SPI", []),
+                ("CPC", []),
+                ("CPF", []),
+                ("GDS", []),
+                ("TCC2", {}),  # per-channel TCC perfmon
+            ]
+        )
+        for ch in range(perfmon_config[soc]["TCC_channels"]):
+            pmc_list["TCC2"][str(ch)] = []
+    
+    if "SQ_ACCUM_PREV_HIRES" in counters:
+        # save  all level counters separately
+        nindex = counters.index("SQ_ACCUM_PREV_HIRES")
+        level_counter = counters[nindex - 1]
+
+        if save_file:
+            # Save to level counter file, file name = level counter name
+            fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w")
+            fd.write(stext + "\n\n")
+            fd.write("gpu:\n")
+            fd.write("range:\n")
+            fd.write("kernel:\n")
+            fd.close()
+
+        return pmc_list
+    
+    # save normal pmc counters in matching buckets
+    for counter in counters:
+        IP_block = counter.split(sep="_")[0].upper()
+        # SQC and SQ belong to the IP block, coalesce them
+        if IP_block == "SQC":
+            IP_block = "SQ"
+
+        if IP_block != "TCC":
+            # Insert unique pmc counters into its bucket
+            if counter not in pmc_list[IP_block]:
+                pmc_list[IP_block].append(counter)
+
+        else:
+            # TCC counters processing
+            m = re.match(r"[\s\S]+\[(\d+)\]", counter)
+            if m is None:
+                # Aggregated TCC counters
+                if counter not in pmc_list[IP_block]:
+                    pmc_list[IP_block].append(counter)
+
+            else:
+                # TCC channel ID
+                ch = m.group(1)
+
+                # fake IP block for per channel TCC
+                if str(ch) in pmc_list["TCC2"]:
+                    # append unique counter into the channel
+                    if counter not in pmc_list["TCC2"][str(ch)]:
+                        pmc_list["TCC2"][str(ch)].append(counter)
+                else:
+                    # initial counter in this channel
+                    pmc_list["TCC2"][str(ch)] = [counter]
+
+    if detected_extermal_call:
+        # sort the per channel counter, so that same counter in all channels can be aligned
+        for ch in range(perfmon_config[soc]["TCC_channels"]):
+            pmc_list["TCC2"][str(ch)].sort()
+    return pmc_list
+
+
 def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     workload_perfmon_dir = workload_dir + "/perfmon"
 
@@ -296,55 +386,11 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
 
             # we have found all the counters, store them in buckets
             counters = m.group(1).split()
-            if "SQ_ACCUM_PREV_HIRES" in counters:
-                # save  all level counters separately
-
-                nindex = counters.index("SQ_ACCUM_PREV_HIRES")
-                level_counter = counters[nindex - 1]
-
-                # Save to level counter file, file name = level counter name
-                fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w")
-                fd.write(stext + "\n\n")
-                fd.write("gpu:\n")
-                fd.write("range:\n")
-                fd.write("kernel:\n")
-                fd.close()
-
-                continue
-
-            # save normal pmc counters in matching buckets
-            for counter in counters:
-                IP_block = counter.split(sep="_")[0].upper()
-                # SQC and SQ belong to the IP block, coalesce them
-                if IP_block == "SQC":
-                    IP_block = "SQ"
-
-                if IP_block != "TCC":
-                    # Insert unique pmc counters into its bucket
-                    if counter not in pmc_list[IP_block]:
-                        pmc_list[IP_block].append(counter)
-
-                else:
-                    # TCC counters processing
-                    m = re.match(r"[\s\S]+\[(\d+)\]", counter)
-                    if m is None:
-                        # Aggregated TCC counters
-                        if counter not in pmc_list[IP_block]:
-                            pmc_list[IP_block].append(counter)
-
-                    else:
-                        # TCC channel ID
-                        ch = m.group(1)
-
-                        # fake IP block for per channel TCC
-                        if str(ch) in pmc_list["TCC2"]:
-                            # append unique counter into the channel
-                            if counter not in pmc_list["TCC2"][str(ch)]:
-                                pmc_list["TCC2"][str(ch)].append(counter)
-                        else:
-                            # initial counter in this channel
-                            pmc_list["TCC2"][str(ch)] = [counter]
-
+            
+            # Utilitze helper function once a list of counters has be extracted
+            save_file = True
+            pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir)
+    
     # add a timestamp file
     fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
     fd.write("pmc:\n\n")
@@ -360,9 +406,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     return pmc_list
 
 
-def perfmon_emit(pmc_list, workload_dir, soc):
-    workload_perfmon_dir = workload_dir + "/perfmon"
-
+def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
     # Calculate the minimum number of iteration to save the pmc counters
     # non-TCC counters
     pmc_cnt = [
@@ -384,7 +428,12 @@ def perfmon_emit(pmc_list, workload_dir, soc):
     niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt)))
 
     # Emit PMC counters into pmc config file
-    fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
+    if save_file:
+        workload_perfmon_dir = workload_dir + "/perfmon"
+        fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
+    else:
+        batches = []
+
 
     tcc2_index = 0
     for iter in range(niter):
@@ -414,12 +463,20 @@ def perfmon_emit(pmc_list, workload_dir, soc):
 
         # TCC aggregated counters
         line = line + " " + " ".join(tcc_counters)
-        fd.write(line + "\n")
+        if save_file:
+            fd.write(line + "\n")
+        else:
+            b = line.split()
+            b.remove("pmc:")
+            batches.append(b)
 
-    fd.write("\ngpu:\n")
-    fd.write("range:\n")
-    fd.write("kernel:\n")
-    fd.close()
+    if save_file:
+        fd.write("\ngpu:\n")
+        fd.write("range:\n")
+        fd.write("kernel:\n")
+        fd.close()
+    else:
+        return batches
 
 
 def perfmon_filter(workload_dir, perfmon_dir, args):

From 4d8383b4390cf8cacf225954e971f31891dea39d Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 11 Jul 2023 14:14:10 -0500
Subject: [PATCH 18/81] Comply to Python formatting

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/parser.py |  8 ++++----
 src/utils/perfagg.py                 | 26 ++++++++++++--------------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index 025745b17..0328d7aa8 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -342,7 +342,7 @@ def gen_counter_list(formula):
         "GFLOPs": None,
     }
 
-    built_in_counter=[
+    built_in_counter = [
         "lds",
         "grd",
         "wgr",
@@ -351,7 +351,7 @@ def gen_counter_list(formula):
         "sgpr",
         "scr",
         "BeginNs",
-        "EndNs"
+        "EndNs",
     ]
 
     visited = False
@@ -367,7 +367,7 @@ def gen_counter_list(formula):
         for node in ast.walk(tree):
             if isinstance(node, ast.Name):
                 val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
-                if (val.isupper() and val not in function_filter):
+                if val.isupper() and val not in function_filter:
                     counters.append(val)
                     visited = True
                 if val in built_in_counter:
@@ -404,7 +404,7 @@ def build_dfs(archConfigs, filter_metrics):
                     metric_list[panel_idx] = panel["title"]
                     table_idx = panel_idx + "." + str(data_cofig["id"] % 100)
                     metric_list[table_idx] = data_cofig["title"]
-                    
+
                     headers = ["Index"]
                     for key, tile in data_cofig["header"].items():
                         if key != "tips":
diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 8e95482c5..04658795c 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -257,19 +257,16 @@ def pmc_perf_split(workload_dir):
 
 
 def update_pmc_bucket(
-        counters, 
-        save_file,
-        soc,
-        pmc_list=None,
-        stext=None, 
-        workload_perfmon_dir=None
-    ):
+    counters, save_file, soc, pmc_list=None, stext=None, workload_perfmon_dir=None
+):
     # Verify inputs.
     # If save_file is True, we're being called internally, from perfmon_coalesce
     # Else we're being called externally, from rocomni
     detected_extermal_call = False
     if save_file and (stext is None or workload_perfmon_dir is None):
-        raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True")
+        raise ValueError(
+            "stext and workload_perfmon_dir must be specified if save_file is True"
+        )
     if pmc_list is None:
         detected_extermal_call = True
         pmc_list = dict(
@@ -289,7 +286,7 @@ def update_pmc_bucket(
         )
         for ch in range(perfmon_config[soc]["TCC_channels"]):
             pmc_list["TCC2"][str(ch)] = []
-    
+
     if "SQ_ACCUM_PREV_HIRES" in counters:
         # save  all level counters separately
         nindex = counters.index("SQ_ACCUM_PREV_HIRES")
@@ -305,7 +302,7 @@ def update_pmc_bucket(
             fd.close()
 
         return pmc_list
-    
+
     # save normal pmc counters in matching buckets
     for counter in counters:
         IP_block = counter.split(sep="_")[0].upper()
@@ -386,11 +383,13 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
 
             # we have found all the counters, store them in buckets
             counters = m.group(1).split()
-            
+
             # Utilitze helper function once a list of counters has be extracted
             save_file = True
-            pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir)
-    
+            pmc_list = update_pmc_bucket(
+                counters, save_file, soc, pmc_list, stext, workload_perfmon_dir
+            )
+
     # add a timestamp file
     fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
     fd.write("pmc:\n\n")
@@ -434,7 +433,6 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
     else:
         batches = []
 
-
     tcc2_index = 0
     for iter in range(niter):
         # Prefix

From 3137076a72fa5cf35e723d6211f7b6cec17bc3f9 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 17 Jul 2023 12:46:05 -0500
Subject: [PATCH 19/81] Migrate to  @grafana/create-plugin

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 grafana_plugins/svg_plugin/.prettierrc.js |  5 +-
 grafana_plugins/svg_plugin/package.json   | 65 +++++++++++++++++++----
 grafana_plugins/svg_plugin/tsconfig.json  | 12 +----
 3 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/grafana_plugins/svg_plugin/.prettierrc.js b/grafana_plugins/svg_plugin/.prettierrc.js
index f60eb1d25..aaa5045c6 100644
--- a/grafana_plugins/svg_plugin/.prettierrc.js
+++ b/grafana_plugins/svg_plugin/.prettierrc.js
@@ -1,3 +1,4 @@
 module.exports = {
-    ...require("./node_modules/@grafana/toolkit/src/config/prettier.plugin.config.json"),
-  };
\ No newline at end of file
+  // Prettier configuration provided by Grafana scaffolding
+  ...require("./.config/.prettierrc.js")
+};
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/package.json b/grafana_plugins/svg_plugin/package.json
index fb88025c1..aa445bd25 100644
--- a/grafana_plugins/svg_plugin/package.json
+++ b/grafana_plugins/svg_plugin/package.json
@@ -3,29 +3,72 @@
   "version": "1.0.0",
   "description": "",
   "scripts": {
-    "build": "grafana-toolkit plugin:build",
-    "test": "grafana-toolkit plugin:test",
-    "dev": "grafana-toolkit plugin:dev",
-    "watch": "grafana-toolkit plugin:dev --watch",
-    "sign": "grafana-toolkit plugin:sign",
-    "start": "yarn watch"
+    "build": "webpack -c ./.config/webpack/webpack.config.ts --env production",
+    "dev": "webpack -w -c ./.config/webpack/webpack.config.ts --env development",
+    "e2e": "yarn exec cypress install && yarn exec grafana-e2e run",
+    "e2e:update": "yarn exec cypress install && yarn exec grafana-e2e run --update-screenshots",
+    "lint": "eslint --cache --ignore-path ./.gitignore --ext .js,.jsx,.ts,.tsx .",
+    "lint:fix": "yarn run lint --fix",
+    "server": "docker-compose up --build",
+    "sign": "npx --yes @grafana/sign-plugin@latest",
+    "start": "yarn watch",
+    "test": "jest --watch --onlyChanged",
+    "test:ci": "jest --passWithNoTests --maxWorkers 4",
+    "typecheck": "tsc --noEmit"
   },
   "author": "Audacious Software Group",
   "license": "MIT",
   "devDependencies": {
-    "@grafana/toolkit": "latest",
+    "@babel/core": "^7.21.4",
+    "@grafana/e2e": "9.5.3",
+    "@grafana/e2e-selectors": "9.5.3",
+    "@grafana/eslint-config": "^6.0.0",
+    "@grafana/tsconfig": "^1.2.0-rc1",
+    "@swc/core": "^1.3.62",
+    "@swc/helpers": "^0.5.0",
+    "@swc/jest": "^0.2.26",
+    "@testing-library/jest-dom": "^5.16.5",
+    "@testing-library/react": "^12.1.4",
+    "@types/jest": "^29.5.0",
+    "@types/lodash": "^4.14.194",
+    "@types/node": "^18.15.11",
+    "copy-webpack-plugin": "^11.0.0",
+    "css-loader": "^6.7.3",
     "emotion": "10.0.27",
+    "eslint-webpack-plugin": "^4.0.1",
+    "fork-ts-checker-webpack-plugin": "^8.0.0",
+    "glob": "^10.2.7",
+    "identity-obj-proxy": "3.0.0",
+    "jest": "^29.5.0",
+    "jest-environment-jsdom": "^29.5.0",
+    "prettier": "^2.8.7",
     "react-monaco-editor": "^0.44.0",
-    "tslib": "^2.3.1"
+    "replace-in-file-webpack-plugin": "^1.0.6",
+    "sass": "1.63.2",
+    "sass-loader": "13.3.1",
+    "style-loader": "3.3.3",
+    "swc-loader": "^0.2.3",
+    "ts-node": "^10.9.1",
+    "tsconfig-paths": "^4.2.0",
+    "tslib": "^2.3.1",
+    "typescript": "4.8.4",
+    "webpack": "^5.86.0",
+    "webpack-cli": "^5.1.4",
+    "webpack-livereload-plugin": "^3.0.2"
   },
   "engines": {
     "node": ">=14"
   },
   "dependencies": {
-    "@grafana/runtime": "9.1.2",
+    "@emotion/css": "^11.1.3",
     "@grafana/data": "9.1.2",
+    "@grafana/runtime": "9.1.2",
     "@grafana/ui": "9.1.2",
-    "@svgdotjs/svg.js": "^3.1.1"
+    "@svgdotjs/svg.js": "^3.1.1",
+    "react": "17.0.2",
+    "react-dom": "17.0.2",
+    "tslib": "2.5.3"
   },
-  "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project."
+  "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project.",
+  "packageManager": "yarn@1.22.19"
 }
diff --git a/grafana_plugins/svg_plugin/tsconfig.json b/grafana_plugins/svg_plugin/tsconfig.json
index 7e6657d2f..d294745aa 100644
--- a/grafana_plugins/svg_plugin/tsconfig.json
+++ b/grafana_plugins/svg_plugin/tsconfig.json
@@ -1,11 +1,3 @@
 {
-  "extends": "./node_modules/@grafana/toolkit/src/config/tsconfig.plugin.json",
-  "include": ["src", "types"],
-  "compilerOptions": {
-    "types": ["@emotion/core"],
-    "rootDir": "./src",
-    "baseUrl": "./src",
-    "typeRoots": ["./node_modules/@types"],
-    "jsx": "react"
-  }
-}
+  "extends": "./.config/tsconfig.json"
+}
\ No newline at end of file

From 43d492dce2bb0bdbc2f26de9b569fa3b4010dbd0 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 17 Jul 2023 13:02:00 -0500
Subject: [PATCH 20/81] Adding config files

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../svg_plugin/.config/tsconfig.json          |  26 +++
 .../.config/webpack/webpack.config.ts         | 201 ++++++++++++++++++
 2 files changed, 227 insertions(+)
 create mode 100644 grafana_plugins/svg_plugin/.config/tsconfig.json
 create mode 100644 grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts

diff --git a/grafana_plugins/svg_plugin/.config/tsconfig.json b/grafana_plugins/svg_plugin/.config/tsconfig.json
new file mode 100644
index 000000000..64b376907
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/tsconfig.json
@@ -0,0 +1,26 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-typescript-config
+ */
+ {
+  "compilerOptions": {
+    "alwaysStrict": true,
+    "declaration": false,
+    "rootDir": "../src",
+    "baseUrl": "../src",
+    "typeRoots": ["../node_modules/@types"],
+    "resolveJsonModule": true
+  },
+  "ts-node": {
+    "compilerOptions": {
+      "module": "commonjs",
+      "target": "es5",
+      "esModuleInterop": true
+    },
+    "transpileOnly": true
+  },
+  "include": ["../src", "./types"],
+  "extends": "@grafana/tsconfig"
+}
diff --git a/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts b/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts
new file mode 100644
index 000000000..22cb86ca4
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts
@@ -0,0 +1,201 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-webpack-config
+ */
+
+import CopyWebpackPlugin from 'copy-webpack-plugin';
+import ESLintPlugin from 'eslint-webpack-plugin';
+import ForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin';
+import LiveReloadPlugin from 'webpack-livereload-plugin';
+import path from 'path';
+import ReplaceInFileWebpackPlugin from 'replace-in-file-webpack-plugin';
+import { Configuration } from 'webpack';
+
+import { getPackageJson, getPluginJson, hasReadme, getEntries } from './utils';
+import { SOURCE_DIR, DIST_DIR } from './constants';
+
+const pluginJson = getPluginJson();
+
+const config = async (env): Promise<Configuration> => ({
+  cache: {
+    type: 'filesystem',
+    buildDependencies: {
+      config: [__filename],
+    },
+  },
+
+  context: path.join(process.cwd(), SOURCE_DIR),
+
+  devtool: env.production ? 'source-map' : 'eval-source-map',
+
+  entry: await getEntries(),
+
+  externals: [
+    'lodash',
+    'jquery',
+    'moment',
+    'slate',
+    'emotion',
+    '@emotion/react',
+    '@emotion/css',
+    'prismjs',
+    'slate-plain-serializer',
+    '@grafana/slate-react',
+    'react',
+    'react-dom',
+    'react-redux',
+    'redux',
+    'rxjs',
+    'react-router',
+    'react-router-dom',
+    'd3',
+    'angular',
+    '@grafana/ui',
+    '@grafana/runtime',
+    '@grafana/data',
+
+    // Mark legacy SDK imports as external if their name starts with the "grafana/" prefix
+    ({ request }, callback) => {
+      const prefix = 'grafana/';
+      const hasPrefix = (request) => request.indexOf(prefix) === 0;
+      const stripPrefix = (request) => request.substr(prefix.length);
+
+      if (hasPrefix(request)) {
+        return callback(undefined, stripPrefix(request));
+      }
+
+      callback();
+    },
+  ],
+
+  mode: env.production ? 'production' : 'development',
+
+  module: {
+    rules: [
+      {
+        exclude: /(node_modules)/,
+        test: /\.[tj]sx?$/,
+        use: {
+          loader: 'swc-loader',
+          options: {
+            jsc: {
+              baseUrl: './src',
+              target: 'es2015',
+              loose: false,
+              parser: {
+                syntax: 'typescript',
+                tsx: true,
+                decorators: false,
+                dynamicImport: true,
+              },
+            },
+          },
+        },
+      },
+      {
+        test: /\.css$/,
+        use: ["style-loader", "css-loader"]
+      },
+      {
+        test: /\.s[ac]ss$/,
+        use: ['style-loader', 'css-loader', 'sass-loader'],
+      },
+      {
+        test: /\.(png|jpe?g|gif|svg)$/,
+        type: 'asset/resource',
+        generator: {
+          // Keep publicPath relative for host.com/grafana/ deployments
+          publicPath: `public/plugins/${pluginJson.id}/img/`,
+          outputPath: 'img/',
+          filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]',
+        },
+      },
+      {
+        test: /\.(woff|woff2|eot|ttf|otf)(\?v=\d+\.\d+\.\d+)?$/,
+        type: 'asset/resource',
+        generator: {
+          // Keep publicPath relative for host.com/grafana/ deployments
+          publicPath: `public/plugins/${pluginJson.id}/fonts/`,
+          outputPath: 'fonts/',
+          filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]',
+        },
+      },
+    ],
+  },
+
+  output: {
+    clean: {
+      keep: new RegExp(`.*?_(amd64|arm(64)?)(.exe)?`),
+    },
+    filename: '[name].js',
+    library: {
+      type: 'amd',
+    },
+    path: path.resolve(process.cwd(), DIST_DIR),
+    publicPath: '/',
+  },
+
+  plugins: [
+    new CopyWebpackPlugin({
+      patterns: [
+        // If src/README.md exists use it; otherwise the root README
+        // To `compiler.options.output`
+        { from: hasReadme() ? 'README.md' : '../README.md', to: '.', force: true },
+        { from: 'plugin.json', to: '.' },
+        { from: '../LICENSE', to: '.' },
+        { from: '../CHANGELOG.md', to: '.', force: true },
+        { from: '**/*.json', to: '.' }, // TODO<Add an error for checking the basic structure of the repo>
+        { from: '**/*.svg', to: '.', noErrorOnMissing: true }, // Optional
+        { from: '**/*.png', to: '.', noErrorOnMissing: true }, // Optional
+        { from: '**/*.html', to: '.', noErrorOnMissing: true }, // Optional
+        { from: 'img/**/*', to: '.', noErrorOnMissing: true }, // Optional
+        { from: 'libs/**/*', to: '.', noErrorOnMissing: true }, // Optional
+        { from: 'static/**/*', to: '.', noErrorOnMissing: true }, // Optional
+      ],
+    }),
+    // Replace certain template-variables in the README and plugin.json
+    new ReplaceInFileWebpackPlugin([
+      {
+        dir: DIST_DIR,
+        files: ['plugin.json', 'README.md'],
+        rules: [
+          {
+            search: /\%VERSION\%/g,
+            replace: getPackageJson().version,
+          },
+          {
+            search: /\%TODAY\%/g,
+            replace: new Date().toISOString().substring(0, 10),
+          },
+          {
+            search: /\%PLUGIN_ID\%/g,
+            replace: pluginJson.id,
+          },
+        ],
+      },
+    ]),
+    new ForkTsCheckerWebpackPlugin({
+      async: Boolean(env.development),
+      issue: {
+        include: [{ file: '**/*.{ts,tsx}' }],
+      },
+      typescript: { configFile: path.join(process.cwd(), 'tsconfig.json') },
+    }),
+    new ESLintPlugin({
+      extensions: ['.ts', '.tsx'],
+      lintDirtyModulesOnly: Boolean(env.development), // don't lint on start, only lint changed files
+    }),
+    ...(env.development ? [new LiveReloadPlugin()] : []),
+  ],
+
+  resolve: {
+    extensions: ['.js', '.jsx', '.ts', '.tsx'],
+    // handle resolving "rootDir" paths
+    modules: [path.resolve(process.cwd(), 'src'), 'node_modules'],
+    unsafeCache: true,
+  },
+});
+
+export default config;

From fd55a698057929b2cc9bceb47ec5dac9ea941e18 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Mon, 17 Jul 2023 13:12:22 -0500
Subject: [PATCH 21/81] Filter additional ops in gen_counter_list fucn

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/parser.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index 0328d7aa8..bff3314b2 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -340,6 +340,8 @@ def gen_counter_list(formula):
         "RW": None,
         "GIOP": None,
         "GFLOPs": None,
+        "CONCAT": None,
+        "MOD": None,
     }
 
     built_in_counter = [
@@ -362,6 +364,12 @@ def gen_counter_list(formula):
         tree = ast.parse(
             formula.replace("$normUnit", "SQ_WAVES")
             .replace("$denom", "SQ_WAVES")
+            .replace(
+                "$numActiveCUs",
+                "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / GRBM_GUI_ACTIVE)), \
+              0) / $maxWavesPerCU) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
+              / GRBM_GUI_ACTIVE)), 0), $maxWavesPerCU), 8)), $numCU))",
+            )
             .replace("$", "")
         )
         for node in ast.walk(tree):

From 2b0ac9b5d8c9edb94ce62a9e0480790121e3a15d Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Mon, 17 Jul 2023 13:12:56 -0500
Subject: [PATCH 22/81] Enable join_prof() merge util to be called from outside
 Omniperf

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/utils/perfagg.py | 57 +++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 04658795c..1c21b1736 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -95,13 +95,19 @@ def test_df_column_equality(df):
 # joins disparate runs less dumbly than rocprof
 def join_prof(workload_dir, join_type, log_file, verbose, out=None):
     # Set default output directory if not specified
-    if out == None:
-        out = workload_dir + "/pmc_perf.csv"
-    files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv")
-    df = None
+    if type(workload_dir) == str:
+        if out is None:
+            out = workload_dir + "/pmc_perf.csv"
+        files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv")
+    elif type(workload_dir) == list:
+        files = workload_dir
+    else:
+        print("ERROR: Invalid workload_dir")
+        sys.exit(1)
 
+    df = None
     for i, file in enumerate(files):
-        _df = pd.read_csv(file)
+        _df = pd.read_csv(file) if type(workload_dir) == str else file
         if join_type == "kernel":
             key = _df.groupby("KernelName").cumcount()
             _df["key"] = _df.KernelName + " - " + key.astype(str)
@@ -137,7 +143,6 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
         duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col]
         duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col]
     for key, cols in duplicate_cols.items():
-        print("Key is ", key)
         _df = df[cols]
         if not test_df_column_equality(_df):
             msg = (
@@ -146,10 +151,12 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
                 )
             )
             warnings.warn(msg)
-            log_file.write(msg + "\n")
+            if log_file:
+                log_file.write(msg + "\n")
         else:
             msg = "Successfully joined {} in pmc_perf.csv".format(key)
-            log_file.write(msg + "\n")
+            if log_file:
+                log_file.write(msg + "\n")
         if test_df_column_equality(_df) and verbose:
             print(msg)
 
@@ -179,6 +186,8 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
                     "fbar",
                     "sig",
                     "obj",
+                    # rocscope specific merged counters, keep original
+                    "dispatch_",
                 ]
             )
         ]
@@ -189,7 +198,15 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
         [
             k
             for k in df.keys()
-            if not any(check in k for check in ["DispatchNs", "CompleteNs"])
+            if not any(
+                check in k
+                for check in [
+                    "DispatchNs",
+                    "CompleteNs",
+                    # rocscope specific timestamp
+                    "HostDuration",
+                ]
+            )
         ]
     ]
     #   C) sanity check the name and key
@@ -216,12 +233,14 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
     df["EndNs"] = endNs
     # finally, join the drop key
     df = df.drop(columns=["key"])
-    # and save to file
-    df.to_csv(out, index=False)
-    # and delete old file(s)
-    if not verbose:
-        for file in files:
-            os.remove(file)
+    # save to file and delete old file(s), skip if we're being called outside of Omniperf
+    if type(workload_dir) == str:
+        df.to_csv(out, index=False)
+        if not verbose:
+            for file in files:
+                os.remove(file)
+    else:
+        return df
 
 
 def pmc_perf_split(workload_dir):
@@ -262,13 +281,13 @@ def update_pmc_bucket(
     # Verify inputs.
     # If save_file is True, we're being called internally, from perfmon_coalesce
     # Else we're being called externally, from rocomni
-    detected_extermal_call = False
+    detected_external_call = False
     if save_file and (stext is None or workload_perfmon_dir is None):
         raise ValueError(
             "stext and workload_perfmon_dir must be specified if save_file is True"
         )
     if pmc_list is None:
-        detected_extermal_call = True
+        detected_external_call = True
         pmc_list = dict(
             [
                 ("SQ", []),
@@ -287,7 +306,7 @@ def update_pmc_bucket(
         for ch in range(perfmon_config[soc]["TCC_channels"]):
             pmc_list["TCC2"][str(ch)] = []
 
-    if "SQ_ACCUM_PREV_HIRES" in counters:
+    if "SQ_ACCUM_PREV_HIRES" in counters and not detected_external_call:
         # save  all level counters separately
         nindex = counters.index("SQ_ACCUM_PREV_HIRES")
         level_counter = counters[nindex - 1]
@@ -336,7 +355,7 @@ def update_pmc_bucket(
                     # initial counter in this channel
                     pmc_list["TCC2"][str(ch)] = [counter]
 
-    if detected_extermal_call:
+    if detected_external_call:
         # sort the per channel counter, so that same counter in all channels can be aligned
         for ch in range(perfmon_config[soc]["TCC_channels"]):
             pmc_list["TCC2"][str(ch)].sort()

From d7ba2acec93a2fec5438593f6fe5c67e462a617f Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 17 Jul 2023 13:19:03 -0500
Subject: [PATCH 23/81] Adding config files

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 grafana_plugins/svg_plugin/.config/.eslintrc  |  13 ++
 .../svg_plugin/.config/.prettierrc.js         |  16 ++
 grafana_plugins/svg_plugin/.config/Dockerfile |  16 ++
 grafana_plugins/svg_plugin/.config/README.md  | 164 ++++++++++++++++++
 .../svg_plugin/.config/jest-setup.js          |  25 +++
 .../svg_plugin/.config/jest.config.js         |  43 +++++
 .../.config/jest/mocks/react-inlinesvg.tsx    |  25 +++
 .../svg_plugin/.config/jest/utils.js          |  31 ++++
 .../svg_plugin/.config/types/custom.d.ts      |  37 ++++
 .../svg_plugin/.config/webpack/constants.ts   |   2 +
 .../svg_plugin/.config/webpack/utils.ts       |  40 +++++
 grafana_plugins/svg_plugin/.eslintrc          |   3 +
 grafana_plugins/svg_plugin/.nvmrc             |   1 +
 .../svg_plugin/docker-compose.yaml            |  15 ++
 grafana_plugins/svg_plugin/jest-setup.js      |   2 +
 grafana_plugins/svg_plugin/jest.config.js     |   8 +
 16 files changed, 441 insertions(+)
 create mode 100644 grafana_plugins/svg_plugin/.config/.eslintrc
 create mode 100644 grafana_plugins/svg_plugin/.config/.prettierrc.js
 create mode 100644 grafana_plugins/svg_plugin/.config/Dockerfile
 create mode 100644 grafana_plugins/svg_plugin/.config/README.md
 create mode 100644 grafana_plugins/svg_plugin/.config/jest-setup.js
 create mode 100644 grafana_plugins/svg_plugin/.config/jest.config.js
 create mode 100644 grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx
 create mode 100644 grafana_plugins/svg_plugin/.config/jest/utils.js
 create mode 100644 grafana_plugins/svg_plugin/.config/types/custom.d.ts
 create mode 100644 grafana_plugins/svg_plugin/.config/webpack/constants.ts
 create mode 100644 grafana_plugins/svg_plugin/.config/webpack/utils.ts
 create mode 100644 grafana_plugins/svg_plugin/.eslintrc
 create mode 100644 grafana_plugins/svg_plugin/.nvmrc
 create mode 100644 grafana_plugins/svg_plugin/docker-compose.yaml
 create mode 100644 grafana_plugins/svg_plugin/jest-setup.js
 create mode 100644 grafana_plugins/svg_plugin/jest.config.js

diff --git a/grafana_plugins/svg_plugin/.config/.eslintrc b/grafana_plugins/svg_plugin/.config/.eslintrc
new file mode 100644
index 000000000..3f8c381a4
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/.eslintrc
@@ -0,0 +1,13 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-eslint-config
+ */
+ {
+  "extends": ["@grafana/eslint-config"],
+  "root": true,
+  "rules": {
+    "react/prop-types": "off"
+  }
+}
diff --git a/grafana_plugins/svg_plugin/.config/.prettierrc.js b/grafana_plugins/svg_plugin/.config/.prettierrc.js
new file mode 100644
index 000000000..66a76ec5b
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/.prettierrc.js
@@ -0,0 +1,16 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in .config/README.md
+ */
+
+module.exports = {
+  "endOfLine": "auto",
+  "printWidth": 120,
+  "trailingComma": "es5",
+  "semi": true,
+  "jsxSingleQuote": false,
+  "singleQuote": true,
+  "useTabs": false,
+  "tabWidth": 2
+};
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/.config/Dockerfile b/grafana_plugins/svg_plugin/.config/Dockerfile
new file mode 100644
index 000000000..35d89bd1c
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/Dockerfile
@@ -0,0 +1,16 @@
+ARG grafana_version=latest
+ARG grafana_image=grafana-enterprise
+
+FROM grafana/${grafana_image}:${grafana_version}
+
+# Make it as simple as possible to access the grafana instance for development purposes
+# Do NOT enable these settings in a public facing / production grafana instance
+ENV GF_AUTH_ANONYMOUS_ORG_ROLE "Admin"
+ENV GF_AUTH_ANONYMOUS_ENABLED "true"
+ENV GF_AUTH_BASIC_ENABLED "false"
+# Set development mode so plugins can be loaded without the need to sign
+ENV GF_DEFAULT_APP_MODE "development"
+
+# Inject livereload script into grafana index.html
+USER root
+RUN sed -i 's/<\/body><\/html>/<script src=\"http:\/\/localhost:35729\/livereload.js\"><\/script><\/body><\/html>/g' /usr/share/grafana/public/views/index.html
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/.config/README.md b/grafana_plugins/svg_plugin/.config/README.md
new file mode 100644
index 000000000..f1ded3464
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/README.md
@@ -0,0 +1,164 @@
+# Default build configuration by Grafana
+
+**This is an auto-generated directory and is not intended to be changed! ⚠️**
+
+The `.config/` directory holds basic configuration for the different tools
+that are used to develop, test and build the project. In order to make it updates easier we ask you to
+not edit files in this folder to extend configuration.
+
+## How to extend the basic configs?
+
+Bear in mind that you are doing it at your own risk, and that extending any of the basic configuration can lead
+to issues around working with the project.
+
+### Extending the ESLint config
+
+Edit the `.eslintrc` file in the project root in order to extend the ESLint configuration.
+
+**Example:**
+
+```json
+{
+  "extends": "./.config/.eslintrc",
+  "rules": {
+    "react/prop-types": "off"
+  }
+}
+```
+
+---
+
+### Extending the Prettier config
+
+Edit the `.prettierrc.js` file in the project root in order to extend the Prettier configuration.
+
+**Example:**
+
+```javascript
+module.exports = {
+  // Prettier configuration provided by Grafana scaffolding
+  ...require('./.config/.prettierrc.js'),
+
+  semi: false,
+};
+```
+
+---
+
+### Extending the Jest config
+
+There are two configuration in the project root that belong to Jest: `jest-setup.js` and `jest.config.js`.
+
+**`jest-setup.js`:** A file that is run before each test file in the suite is executed. We are using it to
+set up the Jest DOM for the testing library and to apply some polyfills. ([link to Jest docs](https://jestjs.io/docs/configuration#setupfilesafterenv-array))
+
+**`jest.config.js`:** The main Jest configuration file that extends the Grafana recommended setup. ([link to Jest docs](https://jestjs.io/docs/configuration))
+
+#### ESM errors with Jest
+
+A common issue found with the current jest config involves importing an npm package which only offers an ESM build. These packages cause jest to error with `SyntaxError: Cannot use import statement outside a module`. To work around this we provide a list of known packages to pass to the `[transformIgnorePatterns](https://jestjs.io/docs/configuration#transformignorepatterns-arraystring)` jest configuration property. If need be this can be extended in the following way:
+
+```javascript
+process.env.TZ = 'UTC';
+const { grafanaESModules, nodeModulesToTransform } = require('./config/jest/utils');
+
+module.exports = {
+  // Jest configuration provided by Grafana
+  ...require('./.config/jest.config'),
+  // Inform jest to only transform specific node_module packages.
+  transformIgnorePatterns: [nodeModulesToTransform([...grafanaESModules, 'packageName'])],
+};
+```
+
+---
+
+### Extending the TypeScript config
+
+Edit the `tsconfig.json` file in the project root in order to extend the TypeScript configuration.
+
+**Example:**
+
+```json
+{
+  "extends": "./.config/tsconfig.json",
+  "compilerOptions": {
+    "preserveConstEnums": true
+  }
+}
+```
+
+---
+
+### Extending the Webpack config
+
+Follow these steps to extend the basic Webpack configuration that lives under `.config/`:
+
+#### 1. Create a new Webpack configuration file
+
+Create a new config file that is going to extend the basic one provided by Grafana.
+It can live in the project root, e.g. `webpack.config.ts`.
+
+#### 2. Merge the basic config provided by Grafana and your custom setup
+
+We are going to use [`webpack-merge`](https://github.com/survivejs/webpack-merge) for this.
+
+```typescript
+// webpack.config.ts
+import type { Configuration } from 'webpack';
+import { merge } from 'webpack-merge';
+import grafanaConfig from './.config/webpack/webpack.config';
+
+const config = async (env): Promise<Configuration> => {
+  const baseConfig = await grafanaConfig(env);
+
+  return merge(baseConfig, {
+    // Add custom config here...
+    output: {
+      asyncChunks: true,
+    },
+  });
+};
+
+export default config;
+```
+
+#### 3. Update the `package.json` to use the new Webpack config
+
+We need to update the `scripts` in the `package.json` to use the extended Webpack configuration.
+
+**Update for `build`:**
+
+```diff
+-"build": "webpack -c ./.config/webpack/webpack.config.ts --env production",
++"build": "webpack -c ./webpack.config.ts --env production",
+```
+
+**Update for `dev`:**
+
+```diff
+-"dev": "webpack -w -c ./.config/webpack/webpack.config.ts --env development",
++"dev": "webpack -w -c ./webpack.config.ts --env development",
+```
+
+### Configure grafana image to use when running docker
+
+By default `grafana-enterprise` will be used as the docker image for all docker related commands. If you want to override this behaviour simply alter the `docker-compose.yaml` by adding the following build arg `grafana_image`. 
+
+**Example:**
+
+```yaml
+version: '3.7'
+
+services:
+  grafana:
+    container_name: 'myorg-basic-app'
+    build:
+      context: ./.config
+      args:
+        grafana_version: ${GRAFANA_VERSION:-9.1.2}
+        grafana_image: ${GRAFANA_IMAGE:-grafana}
+```
+
+In this example we are assigning the environment variable `GRAFANA_IMAGE` to the build arg `grafana_image` with a default value of `grafana`. This will give you the possibility to set the value while running the docker-compose commands which might be convinent in some scenarios.
+
+---
diff --git a/grafana_plugins/svg_plugin/.config/jest-setup.js b/grafana_plugins/svg_plugin/.config/jest-setup.js
new file mode 100644
index 000000000..575b354fe
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest-setup.js
@@ -0,0 +1,25 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-jest-config
+ */
+
+import '@testing-library/jest-dom';
+
+// https://jestjs.io/docs/manual-mocks#mocking-methods-which-are-not-implemented-in-jsdom
+Object.defineProperty(global, 'matchMedia', {
+  writable: true,
+  value: jest.fn().mockImplementation((query) => ({
+    matches: false,
+    media: query,
+    onchange: null,
+    addListener: jest.fn(), // deprecated
+    removeListener: jest.fn(), // deprecated
+    addEventListener: jest.fn(),
+    removeEventListener: jest.fn(),
+    dispatchEvent: jest.fn(),
+  })),
+});
+
+HTMLCanvasElement.prototype.getContext = () => {};
diff --git a/grafana_plugins/svg_plugin/.config/jest.config.js b/grafana_plugins/svg_plugin/.config/jest.config.js
new file mode 100644
index 000000000..3cb011e08
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest.config.js
@@ -0,0 +1,43 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-jest-config
+ */
+
+const path = require('path');
+const { grafanaESModules, nodeModulesToTransform } = require('./jest/utils');
+
+module.exports = {
+  moduleNameMapper: {
+    '\\.(css|scss|sass)$': 'identity-obj-proxy',
+    'react-inlinesvg': path.resolve(__dirname, 'jest', 'mocks', 'react-inlinesvg.tsx'),
+  },
+  modulePaths: ['<rootDir>/src'],
+  setupFilesAfterEnv: ['<rootDir>/jest-setup.js'],
+  testEnvironment: 'jest-environment-jsdom',
+  testMatch: [
+    '<rootDir>/src/**/__tests__/**/*.{js,jsx,ts,tsx}',
+    '<rootDir>/src/**/*.{spec,test,jest}.{js,jsx,ts,tsx}',
+    '<rootDir>/src/**/*.{spec,test,jest}.{js,jsx,ts,tsx}',
+  ],
+  transform: {
+    '^.+\\.(t|j)sx?$': [
+      '@swc/jest',
+      {
+        sourceMaps: 'inline',
+        jsc: {
+          parser: {
+            syntax: 'typescript',
+            tsx: true,
+            decorators: false,
+            dynamicImport: true,
+          },
+        },
+      },
+    ],
+  },
+  // Jest will throw `Cannot use import statement outside module` if it tries to load an
+  // ES module without it being transformed first. ./config/README.md#esm-errors-with-jest
+  transformIgnorePatterns: [nodeModulesToTransform(grafanaESModules)],
+};
diff --git a/grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx b/grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx
new file mode 100644
index 000000000..d540f3aab
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx
@@ -0,0 +1,25 @@
+// Due to the grafana/ui Icon component making fetch requests to
+// `/public/img/icon/<icon_name>.svg` we need to mock react-inlinesvg to prevent
+// the failed fetch requests from displaying errors in console.
+
+import React from 'react';
+
+type Callback = (...args: any[]) => void;
+
+export interface StorageItem {
+  content: string;
+  queue: Callback[];
+  status: string;
+}
+
+export const cacheStore: { [key: string]: StorageItem } = Object.create(null);
+
+const SVG_FILE_NAME_REGEX = /(.+)\/(.+)\.svg$/;
+
+const InlineSVG = ({ src }: { src: string }) => {
+  // testId will be the file name without extension (e.g. `public/img/icons/angle-double-down.svg` -> `angle-double-down`)
+  const testId = src.replace(SVG_FILE_NAME_REGEX, '$2');
+  return <svg xmlns="http://www.w3.org/2000/svg" data-testid={testId} viewBox="0 0 24 24" />;
+};
+
+export default InlineSVG;
diff --git a/grafana_plugins/svg_plugin/.config/jest/utils.js b/grafana_plugins/svg_plugin/.config/jest/utils.js
new file mode 100644
index 000000000..1c1088497
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest/utils.js
@@ -0,0 +1,31 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in .config/README.md
+ */
+
+/*
+ * This utility function is useful in combination with jest `transformIgnorePatterns` config
+ * to transform specific packages (e.g.ES modules) in a projects node_modules folder.
+ */
+const nodeModulesToTransform = (moduleNames) => `node_modules\/(?!(${moduleNames.join('|')})\/)`;
+
+// Array of known nested grafana package dependencies that only bundle an ESM version
+const grafanaESModules = [
+  '.pnpm', // Support using pnpm symlinked packages
+  '@grafana/schema',
+  'd3',
+  'd3-color',
+  'd3-force',
+  'd3-interpolate',
+  'd3-scale-chromatic',
+  'ol',
+  'react-colorful',
+  'rxjs',
+  'uuid',
+];
+
+module.exports = {
+  nodeModulesToTransform,
+  grafanaESModules,
+};
diff --git a/grafana_plugins/svg_plugin/.config/types/custom.d.ts b/grafana_plugins/svg_plugin/.config/types/custom.d.ts
new file mode 100644
index 000000000..64e6eaa6f
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/types/custom.d.ts
@@ -0,0 +1,37 @@
+// Image declarations
+declare module '*.gif' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.jpg' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.jpeg' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.png' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.webp' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.svg' {
+  const content: string;
+  export default content;
+}
+
+// Font declarations
+declare module '*.woff';
+declare module '*.woff2';
+declare module '*.eot';
+declare module '*.ttf';
+declare module '*.otf';
diff --git a/grafana_plugins/svg_plugin/.config/webpack/constants.ts b/grafana_plugins/svg_plugin/.config/webpack/constants.ts
new file mode 100644
index 000000000..071e4fd34
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/webpack/constants.ts
@@ -0,0 +1,2 @@
+export const SOURCE_DIR = 'src';
+export const DIST_DIR = 'dist';
diff --git a/grafana_plugins/svg_plugin/.config/webpack/utils.ts b/grafana_plugins/svg_plugin/.config/webpack/utils.ts
new file mode 100644
index 000000000..c2691e693
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/webpack/utils.ts
@@ -0,0 +1,40 @@
+import fs from 'fs';
+import path from 'path';
+import util from 'util';
+import { glob } from 'glob';
+import { SOURCE_DIR } from './constants';
+
+export function getPackageJson() {
+  return require(path.resolve(process.cwd(), 'package.json'));
+}
+
+export function getPluginJson() {
+  return require(path.resolve(process.cwd(), `${SOURCE_DIR}/plugin.json`));
+}
+
+export function hasReadme() {
+  return fs.existsSync(path.resolve(process.cwd(), SOURCE_DIR, 'README.md'));
+}
+
+// Support bundling nested plugins by finding all plugin.json files in src directory
+// then checking for a sibling module.[jt]sx? file.
+export async function getEntries(): Promise<Record<string, string>> {
+  const pluginsJson = await glob('**/src/**/plugin.json', { absolute: true });
+
+  const plugins = await Promise.all(pluginsJson.map((pluginJson) => {
+      const folder = path.dirname(pluginJson);
+      return glob(`${folder}/module.{ts,tsx,js,jsx}`, { absolute: true });
+    })
+  );
+
+  return plugins.reduce((result, modules) => {
+    return modules.reduce((result, module) => {
+      const pluginPath = path.dirname(module);
+      const pluginName = path.relative(process.cwd(), pluginPath).replace(/src\/?/i, '');
+      const entryName = pluginName === '' ? 'module' : `${pluginName}/module`;
+
+      result[entryName] = module;
+      return result;
+    }, result);
+  }, {});
+}
diff --git a/grafana_plugins/svg_plugin/.eslintrc b/grafana_plugins/svg_plugin/.eslintrc
new file mode 100644
index 000000000..01e61dfcb
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.eslintrc
@@ -0,0 +1,3 @@
+{
+  "extends": "./.config/.eslintrc"
+}
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/.nvmrc b/grafana_plugins/svg_plugin/.nvmrc
new file mode 100644
index 000000000..19c7bdba7
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.nvmrc
@@ -0,0 +1 @@
+16
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/docker-compose.yaml b/grafana_plugins/svg_plugin/docker-compose.yaml
new file mode 100644
index 000000000..96498f050
--- /dev/null
+++ b/grafana_plugins/svg_plugin/docker-compose.yaml
@@ -0,0 +1,15 @@
+version: '3.0'
+
+services:
+  grafana:
+    container_name: 'amd-custom-svg'
+    build:
+      context: ./.config
+      args:
+        grafana_image: ${GRAFANA_IMAGE:-grafana-enterprise}
+        grafana_version: ${GRAFANA_VERSION:-9.5.3}
+    ports:
+      - 3000:3000/tcp
+    volumes:
+      - ./dist:/var/lib/grafana/plugins/amd-custom-svg
+      - ./provisioning:/etc/grafana/provisioning
diff --git a/grafana_plugins/svg_plugin/jest-setup.js b/grafana_plugins/svg_plugin/jest-setup.js
new file mode 100644
index 000000000..35a700b73
--- /dev/null
+++ b/grafana_plugins/svg_plugin/jest-setup.js
@@ -0,0 +1,2 @@
+// Jest setup provided by Grafana scaffolding
+import './.config/jest-setup';
diff --git a/grafana_plugins/svg_plugin/jest.config.js b/grafana_plugins/svg_plugin/jest.config.js
new file mode 100644
index 000000000..79fd52a1b
--- /dev/null
+++ b/grafana_plugins/svg_plugin/jest.config.js
@@ -0,0 +1,8 @@
+// force timezone to UTC to allow tests to work regardless of local timezone
+// generally used by snapshots, but can affect specific tests
+process.env.TZ = 'UTC';
+
+module.exports = {
+  // Jest configuration provided by Grafana scaffolding
+  ...require('./.config/jest.config'),
+};

From 479eb66d955838d92e4ab6fac3e56f829737c0cc Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 18 Jul 2023 16:03:21 -0500
Subject: [PATCH 24/81] Add figure styling to YML configs

Signed-off-by: colramos-amd <colramos@amd.com>
---
 .../configs/gfx906/1000_compute-unit-instruction-mix.yaml  | 6 ++++++
 .../configs/gfx906/1100_compute-unit-compute-pipeline.yaml | 5 +++++
 src/omniperf_analyze/configs/gfx906/1200_lds.yaml          | 5 +++++
 .../configs/gfx906/1300_instruction-cache.yaml             | 5 +++++
 .../configs/gfx906/1400_constant-cache.yaml                | 5 +++++
 src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml     | 7 +++++++
 src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml     | 4 ++++
 .../configs/gfx908/1000_compute-unit-instruction-mix.yaml  | 6 ++++++
 .../configs/gfx908/1100_compute-unit-compute-pipeline.yaml | 5 +++++
 src/omniperf_analyze/configs/gfx908/1200_lds.yaml          | 5 +++++
 .../configs/gfx908/1300_instruction-cache.yaml             | 5 +++++
 .../configs/gfx908/1400_constant-cache.yaml                | 5 +++++
 src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml     | 7 +++++++
 src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml     | 4 ++++
 .../configs/gfx90a/1000_compute-unit-instruction-mix.yaml  | 6 ++++++
 .../configs/gfx90a/1100_compute-unit-compute-pipeline.yaml | 5 +++++
 src/omniperf_analyze/configs/gfx90a/1200_lds.yaml          | 5 +++++
 .../configs/gfx90a/1300_instruction-cache.yaml             | 5 +++++
 .../configs/gfx90a/1400_constant-cache.yaml                | 5 +++++
 src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml     | 7 +++++++
 src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml     | 4 ++++
 21 files changed, 111 insertions(+)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index fd4653c23..88386a3de 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -16,6 +16,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           VALU - Vector:
             count: None # No HW module
@@ -58,6 +61,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           INT-32:
             count: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml
index e7f537290..8cffb24c7 100644
--- a/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           valu_flops_pop:
             value: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
index 4f12a2ac3..218ad2cda 100644
--- a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
diff --git a/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
index f40d3546e..fb9f384e1 100644
--- a/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
index 4583c1012..91a2d6c9f 100644
--- a/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           mertic: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
index 1e05b3e4c..c734e21c8 100644
--- a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
@@ -248,6 +253,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_multi_bar
         metric:
           NC - Read:
             xfer: Read
diff --git a/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml b/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml
index 53235ca14..0b5f5e827 100644
--- a/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml
@@ -16,6 +16,8 @@ Panel Config:
           value: Value
           unit: Unit
           tips: Tips
+        style:
+          type: simple_bar
         metric:
           L2 Util:
             value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
@@ -301,6 +303,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style:
+          type: simple_multi_bar
         metric:
           Read - Remote Socket Stall:
             type: Remote Socket Stall
diff --git a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
index b1977ce49..a6e831446 100644
--- a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
@@ -16,6 +16,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           VALU - Vector:
             count: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
@@ -58,6 +61,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           INT-32:
             count: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml
index 9316d1d25..061311d62 100644
--- a/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           valu_flops_pop:
             value: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
index 4f12a2ac3..218ad2cda 100644
--- a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
diff --git a/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
index f40d3546e..fb9f384e1 100644
--- a/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
index 4583c1012..91a2d6c9f 100644
--- a/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           mertic: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
index f65309a31..7eeed0477 100644
--- a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
@@ -248,6 +253,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_multi_bar
         metric:
           NC - Read:
             xfer: Read
diff --git a/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml b/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml
index 9e76a39b6..0c7b03811 100644
--- a/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml
@@ -16,6 +16,8 @@ Panel Config:
           value: Value
           unit: Unit
           tips: Tips
+        style:
+          type: simple_bar
         metric:
           L2 Util:
             value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
@@ -301,6 +303,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style:
+          type: simple_multi_bar
         metric:
           Read - Remote Socket Stall:
             type: Remote Socket Stall
diff --git a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
index cd3f6161c..35401d2ed 100644
--- a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
@@ -16,6 +16,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           VALU - Vector:
             count: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
@@ -58,6 +61,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           INT32:
             count: AVG((SQ_INSTS_VALU_INT32 / $denom))
diff --git a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
index 4ed332e3f..3e29bc4a2 100644
--- a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           valu_flops_pop:
             value: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
diff --git a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
index 4f12a2ac3..218ad2cda 100644
--- a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
diff --git a/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
index cd0af3bfa..1a7000e93 100644
--- a/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
index 4583c1012..91a2d6c9f 100644
--- a/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           mertic: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
index 917cb3aa0..7ea26db05 100644
--- a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
@@ -248,6 +253,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_multi_bar
         metric:
           NC - Read:
             xfer: Read
diff --git a/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml
index 7be18091a..ddbaf9155 100644
--- a/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml
@@ -16,6 +16,8 @@ Panel Config:
           value: Value
           unit: Unit
           tips: Tips
+        style:
+          type: simple_bar
         metric:
           L2 Util:
             value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
@@ -301,6 +303,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style:
+          type: simple_multi_bar
         metric:
           Read - Remote Socket Stall:
             type: Remote Socket Stall

From 6487ba3853deaf97a91e03c0dc4d8a8109099546 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 18 Jul 2023 16:04:31 -0500
Subject: [PATCH 25/81] New simple_charts utility

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/simple_charts.py | 119 ++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 src/omniperf_analyze/utils/simple_charts.py

diff --git a/src/omniperf_analyze/utils/simple_charts.py b/src/omniperf_analyze/utils/simple_charts.py
new file mode 100644
index 000000000..8a853f987
--- /dev/null
+++ b/src/omniperf_analyze/utils/simple_charts.py
@@ -0,0 +1,119 @@
+##############################################################################bl
+# MIT License
+#
+# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+##############################################################################el
+
+import plotly.express as px
+import pandas as pd
+
+
+# Notes:
+#   This file includes implementation of a few simple but common charts in CLI.
+#   We try to auto-size the layout to cover most of the cases as default. If it
+#   doesn't work, apply style config in yaml for each dashboard.
+
+
+def simple_bar(df, title: str = None, id=None, style: dict = None, orientation="h"):
+    """
+    Plot data with simple bar chart
+    """
+
+    # TODO: handle None properly
+    if "Metric" in df.columns and ("Count" in df.columns or "Value" in df.columns):
+        detected_label = "Count" if "Count" in df.columns else "Value"
+        df[detected_label] = [
+            x.astype(int) if x != "" else int(0) for x in df[detected_label]
+        ]
+    else:
+        raise NameError("simple_bar: No Metric or Count in df columns!")
+
+    # Assign figure characteristics
+    range_color = style.get("range_color", None)
+    label_txt = style.get("label_txt", None)
+    xrange = style.get("xrange", None)
+    if label_txt is not None:
+        label_txt = label_txt.strip("()")
+        try:
+            label_txt = label_txt.replace("+ $normUnit", df["Unit"][0])
+        except KeyError:
+            print("No units found in df. Auto labeling.")
+
+    # Overrides for figure chatacteristics
+    if id == 1701.1:
+        label_txt = "%"
+        range_color = [0, 100]
+        xrange = [0, 110]
+    if id == 1701.2:
+        label_txt = "Gb/s"
+        range_color = [0, 1638]
+        xrange = [0, 1638]
+
+    fig = px.bar(
+        df,
+        title=title,
+        x=detected_label,
+        y="Metric",
+        color=detected_label,
+        range_color=range_color,
+        labels={detected_label: label_txt},
+        orientation=orientation,
+    ).update_xaxes(range=xrange)
+
+    return fig
+
+
+def simple_multi_bar(df, title=None, id=None):
+    """
+    Plot data with simple multiple bar chart
+    """
+
+    # TODO: handle Nan and None properly
+    if "Metric" in df.columns and "Avg" in df.columns:
+        df["Avg"] = [x.astype(int) if x != "" else int(0) for x in df["Avg"]]
+    else:
+        raise NameError("simple_multi_bar: No Metric or Count in df columns!")
+
+    dfigs = []
+    nested_bar = {}
+    df_unit = df["Unit"][0]
+    if id == 1604:
+        nested_bar = {"NC": {}, "UC": {}, "RW": {}, "CC": {}}
+        for index, row in df.iterrows():
+            nested_bar[row["Coherency"]][row["Xfer"]] = row["Avg"]
+    if id == 1704:
+        nested_bar = {"Read": {}, "Write": {}}
+        for index, row in df.iterrows():
+            nested_bar[row["Transaction"]][row["Type"]] = row["Avg"]
+
+    for group, metric in nested_bar.items():
+        dfigs.append(
+            px.bar(
+                title=group,
+                x=metric.values(),
+                y=metric.keys(),
+                labels={"x": df_unit, "y": ""},
+                text=metric.values(),
+            )
+            .update_xaxes(showgrid=False, rangemode="nonnegative")
+            .update_yaxes(showgrid=False)
+        )
+    return dfigs

From 42248d53918b4a5b00845ddda16830ebc04de067 Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Fri, 28 Jul 2023 12:26:17 -0400
Subject: [PATCH 26/81] fix max BF16 flop rate on CDNA2

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 dashboards/Omniperf_v1.0.8_pub.json                           | 2 +-
 .../configs/gfx90a/0200_system-speed-of-light.yaml            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dashboards/Omniperf_v1.0.8_pub.json b/dashboards/Omniperf_v1.0.8_pub.json
index fbebb0d44..0412ba914 100644
--- a/dashboards/Omniperf_v1.0.8_pub.json
+++ b/dashboards/Omniperf_v1.0.8_pub.json
@@ -339,7 +339,7 @@
               "hide": false,
               "rawQuery": true,
               "refId": "A",
-              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n          \"valu_flops_val\": {\n            \"$avg\": { \"$divide\": [ { \"$add\": [\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n            ]}\n            ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n          }},\n    \n          \"valu_intOps_val\": {\n            \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n          },\n    \n          \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n    \n    \n          \"salu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"valu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"mfma_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n          },\n\n    \n          \"lds_bconf\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    \"&SQ_LDS_BANK_CONFLICT\",\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                                null\n                              ]\n                    }     \n          },\n    \n   \n          \"lds_bw\":  {\n            \"$avg\": {\"$divide\":[ \n                        { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                            4,  \n                                            {\"$toInt\": \"$L2Banks\"}\n                                        ]}, \n                        {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                    ]}\n         },\n    \n          \"lds_bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                    4,  \n                                                    {\"$toInt\": \"$L2Banks\"}\n                                                ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},            \n                             {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n    \n          \"unpredthreads_val\": {\n              \"$avg\": {\n                  \"$cond\": [\n                      {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n                      { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n                      null\n                  ]\n              }\n          },\n    \n          \"ipcIssue_val\": {\n              \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n          },\n          \"eaWriteLat_val\": {\n            \"$avg\": {\n                \"$cond\": [\n                  {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n                  { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n                  null                        \n                ]\n            }\n            },\n            \"eaReadLat_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                      {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n                      { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n                      null                        \n                    ]\n                }\n            },\n            \"eaWriteBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            }, \n            \"eaReadBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            },\n            \"l2_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n                        null                          \n                    ]\n                }\n            },\n            \"vecl1_cacheHits_val\": {\n                \"$avg\":  {\n                    \"$cond\": [\n                        {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n                        { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]  }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n                        null\n                    ]\n                }\n            },\n            \"vecl1_BW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n            },\n            \"l1k_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n                        null\n                    ]\n                }\n            },\n            \"l1i_hitRate_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n            },\n            \"l1i_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            },\n            \"l1k_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            }\n            \n        }},\n      {\"$set\": {\n        \"array\": [\n            {\n              \"Metric\": \"VALU FLOPs\",\n              \"Value\": \"&valu_flops_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"VALU IOPs\",\n              \"Value\": \"&valu_intOps_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (BF16)\",\n              \"Value\": \"&mfma_flops_bf16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F16)\",\n              \"Value\": \"&mfma_flops_f16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F32)\",\n              \"Value\": \"&mfma_flops_f32_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F64)\",\n              \"Value\": \"&mfma_flops_f64_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA IOPs (Int8)\",\n              \"Value\": \"&mfma_flops_i8_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"Active CUs\",\n              \"Value\": $numActiveCUs,\n              \"Unit\": \"CUs\",\n              \"peak\": $numCU,\n              \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n            },\n    \n            {\n              \"Metric\": \"SALU Util\",\n              \"Value\": \"&salu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&salu_val\"\n            },\n            {\n              \"Metric\": \"VALU Util\",\n              \"Value\": \"&valu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&valu_val\"\n            },\n            {\n              \"Metric\": \"MFMA Util\",\n              \"Value\": \"&mfma_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&mfma_val\"\n            },\n            {\n              \"Metric\": \"VALU Active Threads/Wave\",\n              \"Value\": \"&unpredthreads_val\",\n              \"Unit\": \"Threads\",\n              \"peak\": 64,\n              \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n            },\n            {\n              \"Metric\": \"IPC - Issue\",\n              \"Value\": \"&ipcIssue_val\",\n              \"Unit\": \"Instr/cycle\",\n              \"peak\": 5,\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n            },\n            {\n              \"Metric\": \"LDS BW\",\n              \"Value\": \"&lds_bw\",\n              \"Unit\": \"GB/sec\",\n              \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n              \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n            },\n            {\n              \"Metric\": \"LDS Bank Conflict\",\n              \"Value\": \"&lds_bconf\",\n              \"Unit\": \"Conflicts/access\",\n              \"peak\": \"32\",\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n            },\n            {\n                \"Metric\": \"Instr Cache Hit Rate\",\n                \"Value\": \"&l1i_hitRate_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n            }, \n            {\n                \"Metric\": \"Instr Cache BW\",\n                \"Value\": \"&l1i_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache Hit Rate\",\n                \"Value\": \"&l1k_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache BW\",\n                \"Value\": \"&l1k_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\":  { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n\n            {\n                \"Metric\": \"Vector L1D Cache Hit Rate\",\n                \"Value\": \"&vecl1_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Vector L1D Cache BW\",\n                \"Value\": \"&vecl1_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n            },\n            {\n                \"Metric\": \"L2 Cache Hit Rate\",\n                \"Value\": \"&l2_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Read BW\",\n                \"Value\": \"&eaReadBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Write BW\",\n                \"Value\": \"&eaWriteBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Read Latency\",\n                \"Value\": \"&eaReadLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Write Latency\",\n                \"Value\": \"&eaWriteLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            }\n\n          ]\n      }},\n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }},\n      {\"$unionWith\": {\n            \"coll\": \"SQ_LEVEL_WAVES\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"waveOcc_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n                    },\n                    \"waveOcc_pop\": {\n                        \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n                    }\n\n                }},\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Wave Occupancy\",\n                            \"Value\": \"&waveOcc_val\",\n                            \"Unit\": \"Wavefronts\",\n                            \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n                            \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n                        }\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }},\n        {\"$unionWith\": {\n            \"coll\": \"SQ_IFETCH_LEVEL\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"instrFetchBW_val\": {\n                        \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n                    },\n                    \"instrFetchLat_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n                    }\n                }},\n\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Instr Fetch BW\",\n                            \"Value\": \"&instrFetchBW_val\",\n                            \"Unit\": \"GB/s\",\n                            \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n                            \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n                        },\n                        {\n                            \"Metric\": \"Instr Fetch Latency\",\n                            \"Value\": \"&instrFetchLat_val\",\n                            \"Unit\": \"Cycles\",\n                            \"peak\": \"\",\n                            \"Percent of Peak - PoP\": \"\"\n\n                        }\n\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }}\n\n    ]);",
+              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n          \"valu_flops_val\": {\n            \"$avg\": { \"$divide\": [ { \"$add\": [\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n            ]}\n            ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n          }},\n    \n          \"valu_intOps_val\": {\n            \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n          },\n    \n          \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n    \n    \n          \"salu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"valu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"mfma_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n          },\n\n    \n          \"lds_bconf\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    \"&SQ_LDS_BANK_CONFLICT\",\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                                null\n                              ]\n                    }     \n          },\n    \n   \n          \"lds_bw\":  {\n            \"$avg\": {\"$divide\":[ \n                        { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                            4,  \n                                            {\"$toInt\": \"$L2Banks\"}\n                                        ]}, \n                        {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                    ]}\n         },\n    \n          \"lds_bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                    4,  \n                                                    {\"$toInt\": \"$L2Banks\"}\n                                                ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},            \n                             {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n    \n          \"unpredthreads_val\": {\n              \"$avg\": {\n                  \"$cond\": [\n                      {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n                      { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n                      null\n                  ]\n              }\n          },\n    \n          \"ipcIssue_val\": {\n              \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n          },\n          \"eaWriteLat_val\": {\n            \"$avg\": {\n                \"$cond\": [\n                  {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n                  { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n                  null                        \n                ]\n            }\n            },\n            \"eaReadLat_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                      {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n                      { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n                      null                        \n                    ]\n                }\n            },\n            \"eaWriteBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            }, \n            \"eaReadBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            },\n            \"l2_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n                        null                          \n                    ]\n                }\n            },\n            \"vecl1_cacheHits_val\": {\n                \"$avg\":  {\n                    \"$cond\": [\n                        {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n                        { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]  }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n                        null\n                    ]\n                }\n            },\n            \"vecl1_BW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n            },\n            \"l1k_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n                        null\n                    ]\n                }\n            },\n            \"l1i_hitRate_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n            },\n            \"l1i_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            },\n            \"l1k_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            }\n            \n        }},\n      {\"$set\": {\n        \"array\": [\n            {\n              \"Metric\": \"VALU FLOPs\",\n              \"Value\": \"&valu_flops_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"VALU IOPs\",\n              \"Value\": \"&valu_intOps_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (BF16)\",\n              \"Value\": \"&mfma_flops_bf16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F16)\",\n              \"Value\": \"&mfma_flops_f16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F32)\",\n              \"Value\": \"&mfma_flops_f32_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F64)\",\n              \"Value\": \"&mfma_flops_f64_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA IOPs (Int8)\",\n              \"Value\": \"&mfma_flops_i8_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"Active CUs\",\n              \"Value\": $numActiveCUs,\n              \"Unit\": \"CUs\",\n              \"peak\": $numCU,\n              \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n            },\n    \n            {\n              \"Metric\": \"SALU Util\",\n              \"Value\": \"&salu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&salu_val\"\n            },\n            {\n              \"Metric\": \"VALU Util\",\n              \"Value\": \"&valu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&valu_val\"\n            },\n            {\n              \"Metric\": \"MFMA Util\",\n              \"Value\": \"&mfma_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&mfma_val\"\n            },\n            {\n              \"Metric\": \"VALU Active Threads/Wave\",\n              \"Value\": \"&unpredthreads_val\",\n              \"Unit\": \"Threads\",\n              \"peak\": 64,\n              \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n            },\n            {\n              \"Metric\": \"IPC - Issue\",\n              \"Value\": \"&ipcIssue_val\",\n              \"Unit\": \"Instr/cycle\",\n              \"peak\": 5,\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n            },\n            {\n              \"Metric\": \"LDS BW\",\n              \"Value\": \"&lds_bw\",\n              \"Unit\": \"GB/sec\",\n              \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n              \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n            },\n            {\n              \"Metric\": \"LDS Bank Conflict\",\n              \"Value\": \"&lds_bconf\",\n              \"Unit\": \"Conflicts/access\",\n              \"peak\": \"32\",\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n            },\n            {\n                \"Metric\": \"Instr Cache Hit Rate\",\n                \"Value\": \"&l1i_hitRate_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n            }, \n            {\n                \"Metric\": \"Instr Cache BW\",\n                \"Value\": \"&l1i_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache Hit Rate\",\n                \"Value\": \"&l1k_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache BW\",\n                \"Value\": \"&l1k_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\":  { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n\n            {\n                \"Metric\": \"Vector L1D Cache Hit Rate\",\n                \"Value\": \"&vecl1_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Vector L1D Cache BW\",\n                \"Value\": \"&vecl1_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n            },\n            {\n                \"Metric\": \"L2 Cache Hit Rate\",\n                \"Value\": \"&l2_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Read BW\",\n                \"Value\": \"&eaReadBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Write BW\",\n                \"Value\": \"&eaWriteBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Read Latency\",\n                \"Value\": \"&eaReadLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Write Latency\",\n                \"Value\": \"&eaWriteLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            }\n\n          ]\n      }},\n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }},\n      {\"$unionWith\": {\n            \"coll\": \"SQ_LEVEL_WAVES\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"waveOcc_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n                    },\n                    \"waveOcc_pop\": {\n                        \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n                    }\n\n                }},\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Wave Occupancy\",\n                            \"Value\": \"&waveOcc_val\",\n                            \"Unit\": \"Wavefronts\",\n                            \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n                            \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n                        }\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }},\n        {\"$unionWith\": {\n            \"coll\": \"SQ_IFETCH_LEVEL\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"instrFetchBW_val\": {\n                        \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n                    },\n                    \"instrFetchLat_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n                    }\n                }},\n\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Instr Fetch BW\",\n                            \"Value\": \"&instrFetchBW_val\",\n                            \"Unit\": \"GB/s\",\n                            \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n                            \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n                        },\n                        {\n                            \"Metric\": \"Instr Fetch Latency\",\n                            \"Value\": \"&instrFetchLat_val\",\n                            \"Unit\": \"Cycles\",\n                            \"peak\": \"\",\n                            \"Percent of Peak - PoP\": \"\"\n\n                        }\n\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }}\n\n    ]);",
               "type": "table"
             },
             {
diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
index 20721ee1f..c197c0fc5 100644
--- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
@@ -45,9 +45,9 @@ Panel Config:
           MFMA FLOPs (BF16):
             value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
             unit: GFLOP
-            peak: ((($sclk * $numCU) * 512) / 1000)
+            peak: ((($sclk * $numCU) * 1024) / 1000)
             pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 512) / 1000))
+              / ((($sclk * $numCU) * 1024) / 1000))
             tips: 
           MFMA FLOPs (F16):
             value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))

From 9360ed8b0ce8aea316543701b53d5fbf2fdab136 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 31 Jul 2023 14:19:26 -0500
Subject: [PATCH 27/81] Use llvm-cxxfilt to demangle names in
 kernel_name_shortener

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 src/utils/csv_converter.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index b178439ac..b3d4db711 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -32,6 +32,7 @@
 from pymongo import MongoClient
 from tqdm import tqdm
 import shutil
+import subprocess
 
 cache = dict()
 supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
@@ -54,6 +55,13 @@ def kernel_name_shortener(df, cache, level):
             original_name = df.loc[index, columnName]
             if original_name in cache:
                 continue
+            
+            cmd = ["llvm-cxxfilt", original_name]
+            
+            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+            demangled_name, e = proc.communicate()
+            demangled_name = str(demangled_name, 'UTF-8').strip()
 
             # cache miss, add the shortened name to the dictionary
             new_name = ""
@@ -62,14 +70,14 @@ def kernel_name_shortener(df, cache, level):
             names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
 
             # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
-            if names_and_args.search(original_name):
-                matches = names_and_args.findall(original_name)
+            if names_and_args.search(demangled_name):
+                matches = names_and_args.findall(demangled_name)
             else:
                 # Works for first case  '__amd_rocclr_fillBuffer.kd'
                 # remove .kd and then parse through original regex
                 first_case = re.compile(r"([^\s]+)(.kd)")
                 Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
-                interim_name = first_case.search(original_name).group(1)
+                interim_name = first_case.search(demangled_name).group(1)
                 matches = Mod_name_and_args.findall(interim_name)
 
             current_level = 0
@@ -103,7 +111,7 @@ def kernel_name_shortener(df, cache, level):
 
             cache[original_name] = new_name
             if new_name == None or new_name == "":
-                cache[original_name] = original_name
+                cache[original_name] = demangled_name
 
         df[columnName] = df[columnName].map(cache)
 

From b63332a31702b6da293ab0e7b9943598c4a99469 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Wed, 2 Aug 2023 12:38:56 -0500
Subject: [PATCH 28/81] Use llvm-cxxfilt to demangle names Show typed text in
 in dash-dropdown input box

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 src/omniperf_analyze/assets/layout.css | 9 +++++++++
 src/utils/csv_converter.py             | 9 ++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/omniperf_analyze/assets/layout.css b/src/omniperf_analyze/assets/layout.css
index 4fcaad24e..7fdc373f6 100644
--- a/src/omniperf_analyze/assets/layout.css
+++ b/src/omniperf_analyze/assets/layout.css
@@ -213,6 +213,15 @@ ul#nav li {
    font-size: 14px;
    text-align: left;
 }
+
+.dash-dropdown input{
+   color: black;
+}
+
+.dash-dropdown .Select-placeholder{
+   color: black;
+}
+
 .VirtualizedSelectOption {
    overflow: hidden;
 }
diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index b3d4db711..a3c08c276 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -74,11 +74,10 @@ def kernel_name_shortener(df, cache, level):
                 matches = names_and_args.findall(demangled_name)
             else:
                 # Works for first case  '__amd_rocclr_fillBuffer.kd'
-                # remove .kd and then parse through original regex
-                first_case = re.compile(r"([^\s]+)(.kd)")
-                Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
-                interim_name = first_case.search(demangled_name).group(1)
-                matches = Mod_name_and_args.findall(interim_name)
+                cache[original_name] = new_name
+                if new_name == None or new_name == "":
+                    cache[original_name] = demangled_name
+                continue
 
             current_level = 0
             for name in matches:

From 0f96a8d1e539344877a89daea4d08fa28dc83f11 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 2 Aug 2023 12:50:34 -0500
Subject: [PATCH 29/81] Fix incorrect ordering of args in perfagg func

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/utils/perfagg.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 1c21b1736..6ec4542a9 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -362,7 +362,7 @@ def update_pmc_bucket(
     return pmc_list
 
 
-def perfmon_coalesce(pmc_files_list, workload_dir, soc):
+def perfmon_coalesce(pmc_files_list, soc, workload_dir):
     workload_perfmon_dir = workload_dir + "/perfmon"
 
     # match pattern for pmc counters
@@ -424,7 +424,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     return pmc_list
 
 
-def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
+def perfmon_emit(pmc_list, soc, workload_dir=None):
     # Calculate the minimum number of iteration to save the pmc counters
     # non-TCC counters
     pmc_cnt = [
@@ -446,7 +446,7 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
     niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt)))
 
     # Emit PMC counters into pmc config file
-    if save_file:
+    if workload_dir:
         workload_perfmon_dir = workload_dir + "/perfmon"
         fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
     else:
@@ -480,14 +480,14 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
 
         # TCC aggregated counters
         line = line + " " + " ".join(tcc_counters)
-        if save_file:
+        if workload_dir:
             fd.write(line + "\n")
         else:
             b = line.split()
             b.remove("pmc:")
             batches.append(b)
 
-    if save_file:
+    if workload_dir:
         fd.write("\ngpu:\n")
         fd.write("range:\n")
         fd.write("kernel:\n")
@@ -533,8 +533,8 @@ def perfmon_filter(workload_dir, perfmon_dir, args):
         pmc_files_list = ref_pmc_files_list
 
     # Coalesce and writeback workload specific perfmon
-    pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
-    perfmon_emit(pmc_list, workload_dir, soc)
+    pmc_list = perfmon_coalesce(pmc_files_list, soc, workload_dir)
+    perfmon_emit(pmc_list, soc, workload_dir)
 
 
 def pmc_filter(workload_dir, perfmon_dir, soc):
@@ -551,5 +551,5 @@ def pmc_filter(workload_dir, perfmon_dir, soc):
     pmc_files_list = ref_pmc_files_list
 
     # Coalesce and writeback workload specific perfmon
-    pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
-    perfmon_emit(pmc_list, workload_dir, soc)
+    pmc_list = perfmon_coalesce(pmc_files_list, soc, workload_dir)
+    perfmon_emit(pmc_list, soc, workload_dir)

From 4e58f0ba2ee9805ea5681f35c0db9bed62bbc1cd Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 2 Aug 2023 12:51:51 -0500
Subject: [PATCH 30/81] Only load required archs into ArchConfig datastruct
 (#144)

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py | 95 ++++++++++++++++--------
 1 file changed, 63 insertions(+), 32 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 6415ed285..87fac064d 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -46,52 +46,45 @@
 from omniperf_analyze.utils import parser, file_io
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
 
+archConfigs = {}
+
 
 ################################################
 # Helper Functions
 ################################################
-def generate_configs(config_dir, list_kernels, filter_metrics):
+def generate_config(arch, config_dir, list_kernels, filter_metrics):
     from omniperf_analyze.utils import schema
 
     single_panel_config = file_io.is_single_panel_config(Path(config_dir))
     global archConfigs
-    archConfigs = {}
-    for arch in file_io.supported_arch.keys():
-        ac = schema.ArchConfig()
-        if list_kernels:
-            ac.panel_configs = file_io.top_stats_build_in_config
-        else:
-            arch_panel_config = (
-                config_dir if single_panel_config else config_dir.joinpath(arch)
-            )
-            ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
 
-        # TODO: filter_metrics should/might be one per arch
-        # print(ac)
+    ac = schema.ArchConfig()
+    if list_kernels:
+        ac.panel_configs = file_io.top_stats_build_in_config
+    else:
+        arch_panel_config = (
+            config_dir if single_panel_config else config_dir.joinpath(arch)
+        )
+        ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
+
+    # TODO: filter_metrics should/might be one per arch
+    # print(ac)
 
-        parser.build_dfs(ac, filter_metrics)
+    parser.build_dfs(ac, filter_metrics)
 
-        archConfigs[arch] = ac
+    archConfigs[arch] = ac
 
     return archConfigs  # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
 
 
-################################################
-# Core Functions
-################################################
-def initialize_run(args, normalization_filter=None):
+def list_metrics(args):
     import pandas as pd
-    from collections import OrderedDict
     from tabulate import tabulate
-    from omniperf_analyze.utils import schema
-
-    # Fixme: cur_root.parent.joinpath('soc_params')
-    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
-    soc_spec_df = file_io.load_soc_params(soc_params_dir)
-
-    generate_configs(args.config_dir, args.list_kernels, args.filter_metrics)
 
     if args.list_metrics in file_io.supported_arch.keys():
+        arch = args.list_metrics
+        if arch not in archConfigs.keys():
+            generate_config(arch, args.config_dir, args.list_kernels, args.filter_metrics)
         print(
             tabulate(
                 pd.DataFrame.from_dict(
@@ -105,7 +98,12 @@ def initialize_run(args, normalization_filter=None):
             file=output,
         )
         sys.exit(0)
+    else:
+        print("Error: Unsupported arch")
+        sys.exit(-1)
 
+
+def load_options(args, normalization_filter):
     # Use original normalization or user input from GUI
     if not normalization_filter:
         for k, v in archConfigs.items():
@@ -114,10 +112,7 @@ def initialize_run(args, normalization_filter=None):
         for k, v in archConfigs.items():
             parser.build_metric_value_string(v.dfs, v.dfs_type, normalization_filter)
 
-    runs = OrderedDict()
-
     # err checking for multiple runs and multiple gpu_kernel filter
-    # TODO: move it to util
     if args.gpu_kernel and (len(args.path) != len(args.gpu_kernel)):
         if len(args.gpu_kernel) == 1:
             for i in range(len(args.path) - 1):
@@ -129,6 +124,31 @@ def initialize_run(args, normalization_filter=None):
             )
             sys.exit(-1)
 
+
+################################################
+# Core Functions
+################################################
+def initialize_run(args, normalization_filter=None):
+    from collections import OrderedDict
+    from omniperf_analyze.utils import schema
+
+    # Fixme: cur_root.parent.joinpath('soc_params')
+    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
+    soc_spec_df = file_io.load_soc_params(soc_params_dir)
+
+    if args.list_metrics:
+        list_metrics(args)
+
+    # Load required configs
+    for d in args.path:
+        sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv"))
+        arch = sys_info.iloc[0]["gpu_soc"]
+        generate_config(arch, args.config_dir, args.list_kernels, args.filter_metrics)
+
+    load_options(args, normalization_filter)
+
+    runs = OrderedDict()
+
     # Todo: warning single -d with multiple dirs
     for d in args.path:
         w = schema.Workload()
@@ -215,10 +235,21 @@ def run_cli(args, runs):
         parser.load_table_data(
             runs[d[0]], d[0], is_gui, args.g, args.verbose
         )  # create the loaded table
+    # TODO: In show_* functions always assume newest architecture. This way newest configs/figures are loaded
     if args.list_kernels:
-        tty.show_kernels(args, runs, archConfigs["gfx90a"], output)
+        tty.show_kernels(
+            args,
+            runs,
+            archConfigs[runs[args.path[0][0]].sys_info.iloc[0]["gpu_soc"]],
+            output,
+        )
     else:
-        tty.show_all(args, runs, archConfigs["gfx90a"], output)
+        tty.show_all(
+            args,
+            runs,
+            archConfigs[runs[args.path[0][0]].sys_info.iloc[0]["gpu_soc"]],
+            output,
+        )
 
 
 def roofline_only(path_to_dir, dev_id, sort_type, mem_level, kernel_names, verbose):

From add68ded6739b02db8c6bec6a73d2e071b8a2683 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Wed, 2 Aug 2023 13:55:08 -0500
Subject: [PATCH 31/81] reformatting

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 src/utils/csv_converter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index a3c08c276..4f28d5388 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -55,13 +55,13 @@ def kernel_name_shortener(df, cache, level):
             original_name = df.loc[index, columnName]
             if original_name in cache:
                 continue
-            
+
             cmd = ["llvm-cxxfilt", original_name]
-            
+
             proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
             demangled_name, e = proc.communicate()
-            demangled_name = str(demangled_name, 'UTF-8').strip()
+            demangled_name = str(demangled_name, "UTF-8").strip()
 
             # cache miss, add the shortened name to the dictionary
             new_name = ""

From cc1950627002dedf0c8527aa83b4d08a8eabf409 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 7 Aug 2023 11:23:25 -0500
Subject: [PATCH 32/81] Adding min/max/avg breakdown across dispatches for
 instruction mix panels

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../1000_compute-unit-instruction-mix.yaml    | 37 ++++++++++----
 .../1000_compute-unit-instruction-mix.yaml    | 36 ++++++++++----
 .../1000_compute-unit-instruction-mix.yaml    | 36 ++++++++++----
 .../1100_compute-unit-compute-pipeline.yaml   | 48 ++++++++++++++++---
 4 files changed, 122 insertions(+), 35 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index b72344f3b..6d28cb2a4 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -12,41 +12,58 @@ Panel Config:
         id: 1001
         title: Instruction Mix
         header:
-          metric: Metric
-          count: Count
+          avg: Avg
+          min: Min
+          max: Max
           unit: Unit
           tips: Tips
         metric:
           VALU - Vector:
-            count: None # No HW module
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips: 
           VMEM:
-            count: None # No HW module
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips: 
           LDS:
-            count: AVG((SQ_INSTS_LDS / $denom))
+            avg: AVG((SQ_INSTS_LDS / $denom))
+            min: MIN((SQ_INSTS_LDS / $denom))
+            max: MAX((SQ_INSTS_LDS / $denom))
             unit: $normUnit
             tips: 
           VALU - MFMA:
-            count: None # No HW module
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips: 
           SALU:
-            count: AVG((SQ_INSTS_SALU / $denom))
+            avg: AVG((SQ_INSTS_SALU / $denom))
+            min: MIN((SQ_INSTS_SALU / $denom))
+            max: MAX((SQ_INSTS_SALU / $denom))
             unit: (instr + $normUnit)
             tips: 
           SMEM:
-            count: AVG((SQ_INSTS_SMEM / $denom))
+            avg: AVG((SQ_INSTS_SMEM / $denom))
+            min: MIN((SQ_INSTS_SMEM / $denom))
+            max: MAX((SQ_INSTS_SMEM / $denom))
             unit: (instr + $normUnit)
             tips: 
           Branch:
-            count: AVG((SQ_INSTS_BRANCH / $denom))
+            avg: AVG((SQ_INSTS_BRANCH / $denom))
+            min: MIN((SQ_INSTS_BRANCH / $denom))
+            max: MAX((SQ_INSTS_BRANCH / $denom))
             unit: (instr + $normUnit)
             tips: 
           GDS:
-            count: AVG((SQ_INSTS_GDS / $denom))
+            avg: AVG((SQ_INSTS_GDS / $denom))
+            min: MIN((SQ_INSTS_GDS / $denom))
+            max: MAX((SQ_INSTS_GDS / $denom))
             unit: (instr + $normUnit)
             tips: 
 
diff --git a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
index b1977ce49..5a9bdb456 100644
--- a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
@@ -13,40 +13,58 @@ Panel Config:
         title: Instruction Mix
         header:
           metric: Metric
-          count: Count
+          avg: Avg
+          min: Min
+          max: Max
           unit: Unit
           tips: Tips
         metric:
           VALU - Vector:
-            count: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
+            avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
+            min: MIN(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
+            max: MAX(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
             unit: (instr + $normUnit)
             tips: 
           VMEM:
-            count: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
+            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
             unit: (instr + $normUnit)
             tips: 
           LDS:
-            count: AVG((SQ_INSTS_LDS / $denom))
+            avg: AVG((SQ_INSTS_LDS / $denom))
+            min: MIN((SQ_INSTS_LDS / $denom))
+            max: MAX((SQ_INSTS_LDS / $denom))
             unit: (instr + $normUnit)
             tips: 
           VALU - MFMA:
-            count: None # No HW module
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips: 
           SALU:
-            count: AVG((SQ_INSTS_SALU / $denom))
+            avg: AVG((SQ_INSTS_SALU / $denom))
+            min: MIN((SQ_INSTS_SALU / $denom))
+            max: MAX((SQ_INSTS_SALU / $denom))
             unit: (instr + $normUnit)
             tips: 
           SMEM:
-            count: AVG((SQ_INSTS_SMEM / $denom))
+            avg: AVG((SQ_INSTS_SMEM / $denom))
+            min: MIN((SQ_INSTS_SMEM / $denom))
+            max: MAX((SQ_INSTS_SMEM / $denom))
             unit: (instr + $normUnit)
             tips: 
           Branch:
-            count: AVG((SQ_INSTS_BRANCH / $denom))
+            avg: AVG((SQ_INSTS_BRANCH / $denom))
+            min: MIN((SQ_INSTS_BRANCH / $denom))
+            max: MAX((SQ_INSTS_BRANCH / $denom))
             unit: (instr + $normUnit)
             tips: 
           GDS:
-            count: AVG((SQ_INSTS_GDS / $denom))
+            avg: AVG((SQ_INSTS_GDS / $denom))
+            min: MIN((SQ_INSTS_GDS / $denom))
+            max: MAX((SQ_INSTS_GDS / $denom))
             unit: (instr + $normUnit)
             tips: 
 
diff --git a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
index cd3f6161c..999d25e19 100644
--- a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
@@ -13,40 +13,58 @@ Panel Config:
         title: Instruction Mix
         header:
           metric: Metric
-          count: Count
+          avg: Avg
+          min: Min
+          max: Max
           unit: Unit
           tips: Tips
         metric:
           VALU - Vector:
-            count: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
+            avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
+            min: MIN(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
+            max: MAX(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
             unit: (instr + $normUnit)
             tips: 
           VMEM:
-            count: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
             unit: (instr + $normUnit)
             tips: 
           LDS:
-            count: AVG((SQ_INSTS_LDS / $denom))
+            avg: AVG((SQ_INSTS_LDS / $denom))
+            min: MIN((SQ_INSTS_LDS / $denom))
+            max: MAX((SQ_INSTS_LDS / $denom))
             unit: (instr + $normUnit)
             tips: 
           VALU - MFMA:
-            count: AVG((SQ_INSTS_MFMA / $denom))
+            avg: AVG((SQ_INSTS_MFMA / $denom))
+            min: MIN((SQ_INSTS_MFMA / $denom))
+            max: MAX((SQ_INSTS_MFMA / $denom))
             unit: (instr + $normUnit)
             tips: 
           SALU:
-            count: AVG((SQ_INSTS_SALU / $denom))
+            avg: AVG((SQ_INSTS_SALU / $denom))
+            min: MIN((SQ_INSTS_SALU / $denom))
+            max: MAX((SQ_INSTS_SALU / $denom))
             unit: (instr + $normUnit)
             tips: 
           SMEM:
-            count: AVG((SQ_INSTS_SMEM / $denom))
+            avg: AVG((SQ_INSTS_SMEM / $denom))
+            min: MIN((SQ_INSTS_SMEM / $denom))
+            max: MAX((SQ_INSTS_SMEM / $denom))
             unit: (instr + $normUnit)
             tips: 
           Branch:
-            count: AVG((SQ_INSTS_BRANCH / $denom))
+            avg: AVG((SQ_INSTS_BRANCH / $denom))
+            min: MIN((SQ_INSTS_BRANCH / $denom))
+            max: MAX((SQ_INSTS_BRANCH / $denom))
             unit: (instr + $normUnit)
             tips: 
           GDS:
-            count: AVG((SQ_INSTS_GDS / $denom))
+            avg: AVG((SQ_INSTS_GDS / $denom))
+            min: MIN((SQ_INSTS_GDS / $denom))
+            max: MAX((SQ_INSTS_GDS / $denom))
             unit: (instr + $normUnit)
             tips: 
 
diff --git a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
index 4ed332e3f..2866f9652 100644
--- a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
@@ -13,11 +13,25 @@ Panel Config:
         title: Speed-of-Light
         header:
           metric: Metric
-          value: Value
+          avg: Avg
+          min: Min
+          max: Max
           tips: Tips
         metric:
           valu_flops_pop:
-            value: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+            avg: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+              + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+              + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+              + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+              + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+              * $numCU) * 64) * 2) / 1000))
+            min: ((100 * MIN(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+              + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
+              + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
+              + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
+              + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
+              * $numCU) * 64) * 2) / 1000))
+            max: ((100 * MAX(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
               + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
               + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
               + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
@@ -25,23 +39,43 @@ Panel Config:
               * $numCU) * 64) * 2) / 1000))
             tips: 
           mfma_flops_bf16_pop:
-            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
+            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 512) / 1000))
+            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 512) / 1000))
+            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 512) / 1000))
             tips: 
           mfma_flops_f16_pop:
-            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
+            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 1024) / 1000))
+            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 1024) / 1000))
+            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 1024) / 1000))
             tips: 
           mfma_flops_f32_pop:
-            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
+            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 256) / 1000))
+            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 256) / 1000))
+            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 256) / 1000))
             tips: 
           mfma_flops_f64_pop:
-            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
+            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 256) / 1000))
+            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 256) / 1000))
+            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 256) / 1000))
             tips: 
           mfma_flops_i8_pop:
-            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
+            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 1024) / 1000))
+            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
+              / ((($sclk * $numCU) * 1024) / 1000))
+            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 1024) / 1000))
             tips: 
 

From c16656eb8713ed248ae867f2ff90d6f286c06f26 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Fri, 9 Jun 2023 10:00:56 -0500
Subject: [PATCH 33/81] Fix VGPR issue (#139)

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/utils/perfagg.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index e965b784c..651bcb86d 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -127,11 +127,17 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
         "wgr": [col for col in df.columns if "wgr" in col],
         "lds": [col for col in df.columns if "lds" in col],
         "scr": [col for col in df.columns if "scr" in col],
-        "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col],
-        "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col],
         "spgr": [col for col in df.columns if "sgpr" in col],
     }
+    # Check for vgpr counter in ROCm < 5.3
+    if "vgpr" in df.columns:
+        duplicate_cols["vgpr"] = [col for col in df.columns if "vgpr" in col]
+    # Check for vgpr counter in ROCm >= 5.3
+    else:
+        duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col]
+        duplicate_cols["accum_vgpr"] =  [col for col in df.columns if "accum_vgpr" in col]
     for key, cols in duplicate_cols.items():
+        print("Key is ", key)
         _df = df[cols]
         if not test_df_column_equality(_df):
             msg = (

From 599bc01310dd875ab6c1d68bc3fc93d384cb5b83 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Fri, 9 Jun 2023 10:01:37 -0500
Subject: [PATCH 34/81] Omniperf rocomni changes

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py | 38 +++++++++++++++-------
 src/omniperf_analyze/utils/parser.py     | 41 ++++++++++++++++++++++--
 src/omniperf_analyze/utils/schema.py     |  2 ++
 3 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 58991e8b3..c15181c6f 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -47,36 +47,50 @@
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
 
 
-def initialize_run(args, normalization_filter=None):
-    import pandas as pd
-    from collections import OrderedDict
+################################################
+# Helper Functions
+################################################
+def generate_configs(config_dir, list_kernels, filter_metrics):
     from omniperf_analyze.utils import schema
-    from tabulate import tabulate
 
-    # Fixme: cur_root.parent.joinpath('soc_params')
-    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
-    soc_spec_df = file_io.load_soc_params(soc_params_dir)
-
-    single_panel_config = file_io.is_single_panel_config(Path(args.config_dir))
+    single_panel_config = file_io.is_single_panel_config(Path(config_dir))
     global archConfigs
     archConfigs = {}
     for arch in file_io.supported_arch.keys():
         ac = schema.ArchConfig()
-        if args.list_kernels:
+        if list_kernels:
             ac.panel_configs = file_io.top_stats_build_in_config
         else:
             arch_panel_config = (
-                args.config_dir if single_panel_config else args.config_dir.joinpath(arch)
+                config_dir if single_panel_config else config_dir.joinpath(arch)
             )
             ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
 
         # TODO: filter_metrics should/might be one per arch
         # print(ac)
 
-        parser.build_dfs(ac, args.filter_metrics)
+        parser.build_dfs(ac, filter_metrics)
 
         archConfigs[arch] = ac
 
+    return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
+
+
+################################################
+# Core Functions
+################################################
+def initialize_run(args, normalization_filter=None):
+    import pandas as pd
+    from collections import OrderedDict
+    from tabulate import tabulate
+    from omniperf_analyze.utils import schema
+
+    # Fixme: cur_root.parent.joinpath('soc_params')
+    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
+    soc_spec_df = file_io.load_soc_params(soc_params_dir)
+
+    generate_configs(args.config_dir, args.list_kernels, args.filter_metrics)
+
     if args.list_metrics in file_io.supported_arch.keys():
         print(
             tabulate(
diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index d11cbbbfc..5fb03c39a 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -320,6 +320,26 @@ def update_normUnit_string(equation, unit):
         str(equation),
     ).capitalize()
 
+def gen_counter_list(formula):
+    function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None}
+
+    counters = []
+    if not isinstance(formula,str):
+        return counters
+    try:
+        tree = ast.parse(
+            formula
+            .replace("$normUnit", "SQ_WAVES")
+            .replace("$denom", "SQ_WAVES")
+            .replace("$","")
+        )
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter:
+                counters.append(node.id.rstrip("_sum"))
+    except:
+        pass
+    return counters
+            
 
 def build_dfs(archConfigs, filter_metrics):
     """
@@ -338,6 +358,7 @@ def build_dfs(archConfigs, filter_metrics):
     d = {}
     metric_list = {}
     dfs_type = {}
+    metric_counters = {}
     for panel_id, panel in archConfigs.panel_configs.items():
         for data_source in panel["data source"]:
             for type, data_cofig in data_source.items():
@@ -362,6 +383,7 @@ def build_dfs(archConfigs, filter_metrics):
                         )
                         metric_idx = data_source_idx + "." + str(i)
                         values = []
+                        eqn_content = []
 
                         if (
                             (not filter_metrics)
@@ -378,6 +400,7 @@ def build_dfs(archConfigs, filter_metrics):
                             for k, v in entries.items():
                                 if k != "tips" and k != "coll_level" and k != "alias":
                                     values.append(v)
+                                    eqn_content.append(v)
 
                             if "alias" in entries.keys():
                                 values.append(entries["alias"])
@@ -396,6 +419,15 @@ def build_dfs(archConfigs, filter_metrics):
 
                         # collect metric_list
                         metric_list[metric_idx] = key.replace(" ", "_")
+                        # generate mapping of counters and metrics
+                        filter = {}
+                        for formula in eqn_content:
+                            if formula is not None and formula != "None":
+                                for k in gen_counter_list(formula):
+                                    filter[k] = None
+                        if len(filter) > 0:
+                            metric_counters[key] = list(filter)
+
                         i += 1
 
                     df.set_index("Index", inplace=True)
@@ -431,6 +463,7 @@ def build_dfs(archConfigs, filter_metrics):
     setattr(archConfigs, "dfs", d)
     setattr(archConfigs, "metric_list", metric_list)
     setattr(archConfigs, "dfs_type", dfs_type)
+    setattr(archConfigs, "metric_counters", metric_counters)
 
 
 def build_metric_value_string(dfs, dfs_type, normal_unit):
@@ -469,7 +502,8 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug):
 
     # confirm no illogical counter values (only consider non-roofline runs)
     roof_only_run = sys_info.ip_blocks == "roofline"
-    if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any():
+    rocscope_run = sys_info.ip_blocks == "rocscope"
+    if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any():
         print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.")
         sys.exit(1)
 
@@ -711,12 +745,13 @@ def load_kernel_top(workload, dir):
     workload.dfs.update(tmp)
 
 
-def load_table_data(workload, dir, is_gui, debug, verbose):
+def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False):
     """
     Load data for all "raw_csv_table".
     Calculate mertric value for all "metric_table".
     """
-    load_kernel_top(workload, dir)
+    if not skipKernelTop:
+        load_kernel_top(workload, dir)
 
     eval_metric(
         workload.dfs,
diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py
index bcfc0bff5..6e147fcae 100644
--- a/src/omniperf_analyze/utils/schema.py
+++ b/src/omniperf_analyze/utils/schema.py
@@ -52,6 +52,8 @@ class ArchConfig:
     # [Index: Metric name] pairs
     metric_list: Dict[str, str] = field(default_factory=dict)
 
+    # [Metric name: Counters] pairs
+    metric_counters: Dict[str, list] = field(default_factory=dict)
 
 @dataclass
 class Workload:

From 48f037e2423e579c8156470abcac65530eacfc27 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Fri, 9 Jun 2023 10:04:32 -0500
Subject: [PATCH 35/81] Comply to Python formatting

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py |  2 +-
 src/omniperf_analyze/utils/parser.py     | 41 +++++++++++++++++++-----
 src/omniperf_analyze/utils/schema.py     |  1 +
 src/utils/perfagg.py                     |  2 +-
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index c15181c6f..6415ed285 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -73,7 +73,7 @@ def generate_configs(config_dir, list_kernels, filter_metrics):
 
         archConfigs[arch] = ac
 
-    return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
+    return archConfigs  # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
 
 
 ################################################
diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index 5fb03c39a..b6573566b 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -320,26 +320,47 @@ def update_normUnit_string(equation, unit):
         str(equation),
     ).capitalize()
 
+
 def gen_counter_list(formula):
-    function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None}
+    function_filter = {
+        "MIN": None,
+        "MAX": None,
+        "AVG": None,
+        "ROUND": None,
+        "TO_INT": None,
+        "GB": None,
+        "STD": None,
+        "GFLOP": None,
+        "GOP": None,
+        "OP": None,
+        "CU": None,
+        "NC": None,
+        "UC": None,
+        "CC": None,
+        "RW": None,
+        "GIOP": None,
+    }
 
     counters = []
-    if not isinstance(formula,str):
+    if not isinstance(formula, str):
         return counters
     try:
         tree = ast.parse(
-            formula
-            .replace("$normUnit", "SQ_WAVES")
+            formula.replace("$normUnit", "SQ_WAVES")
             .replace("$denom", "SQ_WAVES")
-            .replace("$","")
+            .replace("$", "")
         )
         for node in ast.walk(tree):
-            if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter:
+            if (
+                isinstance(node, ast.Name)
+                and node.id.rstrip("_sum").isupper()
+                and node.id not in function_filter
+            ):
                 counters.append(node.id.rstrip("_sum"))
     except:
         pass
     return counters
-            
+
 
 def build_dfs(archConfigs, filter_metrics):
     """
@@ -503,7 +524,11 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug):
     # confirm no illogical counter values (only consider non-roofline runs)
     roof_only_run = sys_info.ip_blocks == "roofline"
     rocscope_run = sys_info.ip_blocks == "rocscope"
-    if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any():
+    if (
+        not rocscope_run
+        and not roof_only_run
+        and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any()
+    ):
         print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.")
         sys.exit(1)
 
diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py
index 6e147fcae..f9b59868f 100644
--- a/src/omniperf_analyze/utils/schema.py
+++ b/src/omniperf_analyze/utils/schema.py
@@ -55,6 +55,7 @@ class ArchConfig:
     # [Metric name: Counters] pairs
     metric_counters: Dict[str, list] = field(default_factory=dict)
 
+
 @dataclass
 class Workload:
     sys_info: pd.DataFrame = None
diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 651bcb86d..109fdecda 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -135,7 +135,7 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
     # Check for vgpr counter in ROCm >= 5.3
     else:
         duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col]
-        duplicate_cols["accum_vgpr"] =  [col for col in df.columns if "accum_vgpr" in col]
+        duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col]
     for key, cols in duplicate_cols.items():
         print("Key is ", key)
         _df = df[cols]

From 0b135be559666c99b133167417e1888bc0ad5948 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Wed, 21 Jun 2023 11:06:03 -0500
Subject: [PATCH 36/81] Add subsection title to System Speed-of-Light

Signed-off-by: colramos-amd <colramos@amd.com>
---
 .../configs/gfx906/0200_system-speed-of-light.yaml               | 1 +
 .../configs/gfx908/0200_system-speed-of-light.yaml               | 1 +
 .../configs/gfx90a/0200_system-speed-of-light.yaml               | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml
index 74de040b2..986b2f0ae 100644
--- a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value
diff --git a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml
index 74de040b2..986b2f0ae 100644
--- a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value
diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
index f10d7630f..20721ee1f 100644
--- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value

From 5e52983bbf800eed0dfd32b4e0a27c3781713e57 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Mon, 26 Jun 2023 15:30:38 -0500
Subject: [PATCH 37/81] Extend filtering into timestamps.csv (#80)

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf         | 34 ++--------------------------------
 src/utils/perfagg.py |  9 +++++++++
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index 3b38e419b..e611547d4 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -439,23 +439,7 @@ def characterize_app(args, VER):
         else:
             run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose)
     
-    
-
-    # run again with timestamps
-    success, output = capture_subprocess_output(
-        [
-            rocprof_cmd,
-            # "-i", fname,
-            # "-m", perfmon_dir + "/" + "metrics.xml",
-            "--timestamp",
-            "on",
-            "-o",
-            workload_dir + "/" + "timestamps.csv",
-            '"' + app_cmd + '"',
-        ]
-    )
-    log.write(output)
-    # Update pmc_perf.csv timestamps
+    # Update timestamps
     replace_timestamps(workload_dir, log)
 
     # Manually join each pmc_perf*.csv output
@@ -676,21 +660,7 @@ def omniperf_profile(args, VER):
             else:
                 run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose)
 
-        # run again with timestamps
-        success, output = capture_subprocess_output(
-            [
-                rocprof_cmd,
-                # "-i", fname,
-                # "-m", perfmon_dir + "/" + "metrics.xml",
-                "--timestamp",
-                "on",
-                "-o",
-                workload_dir + "/" + "timestamps.csv",
-                '"' + args.remaining + '"',
-            ]
-        )
-        log.write(output)
-        # Update pmc_perf.csv timestamps
+        # Update timestamps
         replace_timestamps(workload_dir, log)
         
         # Manually join each pmc_perf*.csv output
diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 109fdecda..1c80a22a9 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -345,6 +345,15 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
                             # initial counter in this channel
                             pmc_list["TCC2"][str(ch)] = [counter]
 
+
+    # add a timestamp file
+    fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
+    fd.write("pmc:\n\n")
+    fd.write("gpu:\n")
+    fd.write("range:\n")
+    fd.write("kernel:\n")
+    fd.close()
+    
     # sort the per channel counter, so that same counter in all channels can be aligned
     for ch in range(perfmon_config[soc]["TCC_channels"]):
         pmc_list["TCC2"][str(ch)].sort()

From 8ae839533db7874880200c1fd98c0ee145e1dd4b Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Mon, 26 Jun 2023 15:38:51 -0500
Subject: [PATCH 38/81] Comply to Python formatting

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/utils/perfagg.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 1c80a22a9..59460bc80 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -345,7 +345,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
                             # initial counter in this channel
                             pmc_list["TCC2"][str(ch)] = [counter]
 
-
     # add a timestamp file
     fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
     fd.write("pmc:\n\n")
@@ -353,7 +352,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     fd.write("range:\n")
     fd.write("kernel:\n")
     fd.close()
-    
+
     # sort the per channel counter, so that same counter in all channels can be aligned
     for ch in range(perfmon_config[soc]["TCC_channels"]):
         pmc_list["TCC2"][str(ch)].sort()

From 607d9f79d3345abc60b1cc9d79b1ea4d6a98730c Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Wed, 7 Jun 2023 10:23:49 -0400
Subject: [PATCH 39/81] Add options to enable latexpdf builds

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 src/docs/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/docs/conf.py b/src/docs/conf.py
index af0003fb7..d97f79bb7 100644
--- a/src/docs/conf.py
+++ b/src/docs/conf.py
@@ -90,8 +90,8 @@ def install(package):
 pygments_style = None
 
 # options for latex output
-latex_engine = "lualatex"
-latex_show_urls = "footnote"
+latex_engine = 'lualatex'
+latex_show_urls = 'footnote'
 
 
 # -- Options for HTML output -------------------------------------------------

From a6a0fc4d5ea9faa7dd05da11e470296bbf252a63 Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Wed, 7 Jun 2023 15:19:56 -0400
Subject: [PATCH 40/81] apply formatting

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 src/docs/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/docs/conf.py b/src/docs/conf.py
index d97f79bb7..af0003fb7 100644
--- a/src/docs/conf.py
+++ b/src/docs/conf.py
@@ -90,8 +90,8 @@ def install(package):
 pygments_style = None
 
 # options for latex output
-latex_engine = 'lualatex'
-latex_show_urls = 'footnote'
+latex_engine = "lualatex"
+latex_show_urls = "footnote"
 
 
 # -- Options for HTML output -------------------------------------------------

From 931a333be16d53b1c3c586e87a02f0ee20f6447f Mon Sep 17 00:00:00 2001
From: "Karl W. Schulz" <karl.schulz@amd.com>
Date: Fri, 30 Jun 2023 15:01:57 -0500
Subject: [PATCH 41/81] updating path for rocm repo to supported rhel8 release
 (8.8)

Signed-off-by: Karl W. Schulz <karl.schulz@amd.com>
---
 docker/rhel8/rocm.repo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/rhel8/rocm.repo b/docker/rhel8/rocm.repo
index 17171d755..8b2048978 100644
--- a/docker/rhel8/rocm.repo
+++ b/docker/rhel8/rocm.repo
@@ -7,7 +7,7 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
 
 [amdgpu]
 name=amdgpu
-baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.5/main/x86_64
+baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64
 enabled=1
 gpgcheck=1
 gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
\ No newline at end of file

From dcd2595412d085886c771d4cef6ddbfda20e73e8 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 10 Jul 2023 16:26:56 -0500
Subject: [PATCH 42/81] Fixed Units inconsistencies -  Table 10: Units were
 output as "$normUnit" now they are instr + normUnit

-  Table 16: Changed to Req per $normUnit

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../gfx906/1000_compute-unit-instruction-mix.yaml  |  4 ++--
 .../configs/gfx906/1600_L1_cache.yaml              |  8 ++++----
 .../configs/gfx906/1800_L2_cache_per_channel.yaml  | 14 +++++++-------
 .../configs/gfx908/1600_L1_cache.yaml              |  8 ++++----
 .../configs/gfx908/1800_L2_cache_per_channel.yaml  | 14 +++++++-------
 .../configs/gfx90a/1600_L1_cache.yaml              |  8 ++++----
 .../configs/gfx90a/1800_L2_cache_per_channel.yaml  | 14 +++++++-------
 7 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index 6d28cb2a4..c2e2f7f3a 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -34,7 +34,7 @@ Panel Config:
             avg: AVG((SQ_INSTS_LDS / $denom))
             min: MIN((SQ_INSTS_LDS / $denom))
             max: MAX((SQ_INSTS_LDS / $denom))
-            unit: $normUnit
+            unit: (instr + $normUnit)
             tips: 
           VALU - MFMA:
             avg: None # No HW module
@@ -78,7 +78,7 @@ Panel Config:
         metric:
           INT-32:
             count: None # No perf counter
-            unit: $normUnit
+            unit: (instr + $normUnit)
             tips:
           INT-64:
             count: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
index 1713068d2..1e05b3e4c 100644
--- a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
@@ -361,7 +361,7 @@ Panel Config:
             mean: AVG((TCP_UTCL1_REQUEST_sum / $denom))
             min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
             max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           Hit Ratio:
             mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
@@ -376,17 +376,17 @@ Panel Config:
             mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
-            units: ( + $normUnit)
+            units: (Hits + $normUnit)
             tips: 
           Misses (Translation):
             mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
           Misses (Permission):
             mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
diff --git a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml
index 95bba22e8..08a9a9f76 100644
--- a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])
diff --git a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
index 4ff3fd4d4..f65309a31 100644
--- a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
@@ -361,7 +361,7 @@ Panel Config:
             mean: AVG((TCP_UTCL1_REQUEST_sum / $denom))
             min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
             max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           Hit Ratio:
             mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
@@ -376,17 +376,17 @@ Panel Config:
             mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
-            units: ( + $normUnit)
+            units: (Hits + $normUnit)
             tips: 
           Misses (Translation):
             mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
           Misses (Permission):
             mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
diff --git a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml
index e68511e9e..3acee5740 100644
--- a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])
diff --git a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
index 985be3803..917cb3aa0 100644
--- a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
@@ -361,7 +361,7 @@ Panel Config:
             mean: AVG((TCP_UTCL1_REQUEST_sum / $denom))
             min: MIN((TCP_UTCL1_REQUEST_sum / $denom))
             max: MAX((TCP_UTCL1_REQUEST_sum / $denom))
-            units: ( + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           Hit Ratio:
             mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if
@@ -376,17 +376,17 @@ Panel Config:
             mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom))
-            units: ( + $normUnit)
+            units: (Hits + $normUnit)
             tips: 
           Misses (Translation):
             mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
           Misses (Permission):
             mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
             max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom))
-            units: ( + $normUnit)
+            units: (Misses + $normUnit)
             tips: 
diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
index a5bf6fa25..094df5b19 100644
--- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: ( + $normUnit)
+            units: (req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])

From 02c647301d817092a3a2176550278efc26701a40 Mon Sep 17 00:00:00 2001
From: Cole Ramos <colramos@amd.com>
Date: Tue, 11 Jul 2023 13:27:46 -0500
Subject: [PATCH 43/81] Update 1800_L2_cache_per_channel.yaml

Capitalizing for consistency

Signed-off-by: Cole Ramos <colramos@amd.com>
---
 .../configs/gfx90a/1800_L2_cache_per_channel.yaml      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
index 094df5b19..93fc2b412 100644
--- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
@@ -204,7 +204,7 @@ Panel Config:
               + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25]))
               + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29]))
               + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1]))
@@ -294,7 +294,7 @@ Panel Config:
               + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26]))
               + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29]))
               + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1]))
@@ -396,7 +396,7 @@ Panel Config:
               + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28]))
               + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1]))
@@ -447,7 +447,7 @@ Panel Config:
               + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28]))
               + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Atomic Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1]))
@@ -498,7 +498,7 @@ Panel Config:
               + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28]))
               + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Lat:
             mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1])

From 3daad292f92d7fec2eaf0150abb09b8f0aee3ef8 Mon Sep 17 00:00:00 2001
From: Cole Ramos <colramos@amd.com>
Date: Tue, 11 Jul 2023 13:29:07 -0500
Subject: [PATCH 44/81] Update 1800_L2_cache_per_channel.yaml

Capitalizing for consistency

Signed-off-by: Cole Ramos <colramos@amd.com>
---
 .../configs/gfx90a/1800_L2_cache_per_channel.yaml             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
index 93fc2b412..f13647847 100644
--- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml
@@ -247,7 +247,7 @@ Panel Config:
               + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) +
               TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30]))
               + TO_INT(TCC_READ[31])) / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L1 - L2 Write Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1]))
@@ -345,7 +345,7 @@ Panel Config:
               + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28]))
               + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31]))
               / 32) / $denom))
-            units: (req + $normUnit)
+            units: (Req + $normUnit)
             tips: 
           L2 - EA Read Req:
             mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1]))

From 33ad9ffca327aec1e72bbfd8c87cd7ad056343e5 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 11 Jul 2023 14:11:38 -0500
Subject: [PATCH 45/81] Rearranging build_df func to optimize ArchConfig for
 rocomni plugin

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/parser.py | 57 +++++++++++++++++++---------
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index b6573566b..025745b17 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -339,11 +339,25 @@ def gen_counter_list(formula):
         "CC": None,
         "RW": None,
         "GIOP": None,
+        "GFLOPs": None,
     }
 
+    built_in_counter=[
+        "lds",
+        "grd",
+        "wgr",
+        "arch_vgpr",
+        "accum_vgpr",
+        "sgpr",
+        "scr",
+        "BeginNs",
+        "EndNs"
+    ]
+
+    visited = False
     counters = []
     if not isinstance(formula, str):
-        return counters
+        return visited, counters
     try:
         tree = ast.parse(
             formula.replace("$normUnit", "SQ_WAVES")
@@ -351,15 +365,17 @@ def gen_counter_list(formula):
             .replace("$", "")
         )
         for node in ast.walk(tree):
-            if (
-                isinstance(node, ast.Name)
-                and node.id.rstrip("_sum").isupper()
-                and node.id not in function_filter
-            ):
-                counters.append(node.id.rstrip("_sum"))
+            if isinstance(node, ast.Name):
+                val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
+                if (val.isupper() and val not in function_filter):
+                    counters.append(val)
+                    visited = True
+                if val in built_in_counter:
+                    visited = True
     except:
         pass
-    return counters
+
+    return visited, counters
 
 
 def build_dfs(archConfigs, filter_metrics):
@@ -381,9 +397,14 @@ def build_dfs(archConfigs, filter_metrics):
     dfs_type = {}
     metric_counters = {}
     for panel_id, panel in archConfigs.panel_configs.items():
+        panel_idx = str(panel_id // 100)
         for data_source in panel["data source"]:
             for type, data_cofig in data_source.items():
                 if type == "metric_table":
+                    metric_list[panel_idx] = panel["title"]
+                    table_idx = panel_idx + "." + str(data_cofig["id"] % 100)
+                    metric_list[table_idx] = data_cofig["title"]
+                    
                     headers = ["Index"]
                     for key, tile in data_cofig["header"].items():
                         if key != "tips":
@@ -397,12 +418,7 @@ def build_dfs(archConfigs, filter_metrics):
 
                     i = 0
                     for key, entries in data_cofig["metric"].items():
-                        data_source_idx = (
-                            str(data_cofig["id"] // 100)
-                            + "."
-                            + str(data_cofig["id"] % 100)
-                        )
-                        metric_idx = data_source_idx + "." + str(i)
+                        metric_idx = table_idx + "." + str(i)
                         values = []
                         eqn_content = []
 
@@ -411,7 +427,7 @@ def build_dfs(archConfigs, filter_metrics):
                             or (metric_idx in filter_metrics)  # no filter
                             or  # metric in filter
                             # the whole table in filter
-                            (data_source_idx in filter_metrics)
+                            (table_idx in filter_metrics)
                             or
                             # the whole IP block in filter
                             (str(panel_id // 100) in filter_metrics)
@@ -439,14 +455,19 @@ def build_dfs(archConfigs, filter_metrics):
                             df = pd.concat([df, df_new_row])
 
                         # collect metric_list
-                        metric_list[metric_idx] = key.replace(" ", "_")
+                        metric_list[metric_idx] = key
                         # generate mapping of counters and metrics
                         filter = {}
+                        _visited = False
                         for formula in eqn_content:
                             if formula is not None and formula != "None":
-                                for k in gen_counter_list(formula):
+                                visited, counters = gen_counter_list(formula)
+                                if visited:
+                                    _visited = True
+                                for k in counters:
                                     filter[k] = None
-                        if len(filter) > 0:
+
+                        if len(filter) > 0 or _visited:
                             metric_counters[key] = list(filter)
 
                         i += 1

From 2c2b384ca236dd8c9b56f7740c5eb3fff816121d Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 11 Jul 2023 14:13:09 -0500
Subject: [PATCH 46/81] Abstract perfmon coalesing for useage in rocomni plugin

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/utils/perfagg.py | 173 ++++++++++++++++++++++++++++---------------
 1 file changed, 115 insertions(+), 58 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 59460bc80..8e95482c5 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -256,6 +256,96 @@ def pmc_perf_split(workload_dir):
     os.remove(workload_perfmon_dir + "/pmc_perf.txt")
 
 
+def update_pmc_bucket(
+        counters, 
+        save_file,
+        soc,
+        pmc_list=None,
+        stext=None, 
+        workload_perfmon_dir=None
+    ):
+    # Verify inputs.
+    # If save_file is True, we're being called internally, from perfmon_coalesce
+    # Else we're being called externally, from rocomni
+    detected_extermal_call = False
+    if save_file and (stext is None or workload_perfmon_dir is None):
+        raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True")
+    if pmc_list is None:
+        detected_extermal_call = True
+        pmc_list = dict(
+            [
+                ("SQ", []),
+                ("GRBM", []),
+                ("TCP", []),
+                ("TA", []),
+                ("TD", []),
+                ("TCC", []),
+                ("SPI", []),
+                ("CPC", []),
+                ("CPF", []),
+                ("GDS", []),
+                ("TCC2", {}),  # per-channel TCC perfmon
+            ]
+        )
+        for ch in range(perfmon_config[soc]["TCC_channels"]):
+            pmc_list["TCC2"][str(ch)] = []
+    
+    if "SQ_ACCUM_PREV_HIRES" in counters:
+        # save  all level counters separately
+        nindex = counters.index("SQ_ACCUM_PREV_HIRES")
+        level_counter = counters[nindex - 1]
+
+        if save_file:
+            # Save to level counter file, file name = level counter name
+            fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w")
+            fd.write(stext + "\n\n")
+            fd.write("gpu:\n")
+            fd.write("range:\n")
+            fd.write("kernel:\n")
+            fd.close()
+
+        return pmc_list
+    
+    # save normal pmc counters in matching buckets
+    for counter in counters:
+        IP_block = counter.split(sep="_")[0].upper()
+        # SQC and SQ belong to the IP block, coalesce them
+        if IP_block == "SQC":
+            IP_block = "SQ"
+
+        if IP_block != "TCC":
+            # Insert unique pmc counters into its bucket
+            if counter not in pmc_list[IP_block]:
+                pmc_list[IP_block].append(counter)
+
+        else:
+            # TCC counters processing
+            m = re.match(r"[\s\S]+\[(\d+)\]", counter)
+            if m is None:
+                # Aggregated TCC counters
+                if counter not in pmc_list[IP_block]:
+                    pmc_list[IP_block].append(counter)
+
+            else:
+                # TCC channel ID
+                ch = m.group(1)
+
+                # fake IP block for per channel TCC
+                if str(ch) in pmc_list["TCC2"]:
+                    # append unique counter into the channel
+                    if counter not in pmc_list["TCC2"][str(ch)]:
+                        pmc_list["TCC2"][str(ch)].append(counter)
+                else:
+                    # initial counter in this channel
+                    pmc_list["TCC2"][str(ch)] = [counter]
+
+    if detected_extermal_call:
+        # sort the per channel counter, so that same counter in all channels can be aligned
+        for ch in range(perfmon_config[soc]["TCC_channels"]):
+            pmc_list["TCC2"][str(ch)].sort()
+    return pmc_list
+
+
 def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     workload_perfmon_dir = workload_dir + "/perfmon"
 
@@ -296,55 +386,11 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
 
             # we have found all the counters, store them in buckets
             counters = m.group(1).split()
-            if "SQ_ACCUM_PREV_HIRES" in counters:
-                # save  all level counters separately
-
-                nindex = counters.index("SQ_ACCUM_PREV_HIRES")
-                level_counter = counters[nindex - 1]
-
-                # Save to level counter file, file name = level counter name
-                fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w")
-                fd.write(stext + "\n\n")
-                fd.write("gpu:\n")
-                fd.write("range:\n")
-                fd.write("kernel:\n")
-                fd.close()
-
-                continue
-
-            # save normal pmc counters in matching buckets
-            for counter in counters:
-                IP_block = counter.split(sep="_")[0].upper()
-                # SQC and SQ belong to the IP block, coalesce them
-                if IP_block == "SQC":
-                    IP_block = "SQ"
-
-                if IP_block != "TCC":
-                    # Insert unique pmc counters into its bucket
-                    if counter not in pmc_list[IP_block]:
-                        pmc_list[IP_block].append(counter)
-
-                else:
-                    # TCC counters processing
-                    m = re.match(r"[\s\S]+\[(\d+)\]", counter)
-                    if m is None:
-                        # Aggregated TCC counters
-                        if counter not in pmc_list[IP_block]:
-                            pmc_list[IP_block].append(counter)
-
-                    else:
-                        # TCC channel ID
-                        ch = m.group(1)
-
-                        # fake IP block for per channel TCC
-                        if str(ch) in pmc_list["TCC2"]:
-                            # append unique counter into the channel
-                            if counter not in pmc_list["TCC2"][str(ch)]:
-                                pmc_list["TCC2"][str(ch)].append(counter)
-                        else:
-                            # initial counter in this channel
-                            pmc_list["TCC2"][str(ch)] = [counter]
-
+            
+            # Utilitze helper function once a list of counters has be extracted
+            save_file = True
+            pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir)
+    
     # add a timestamp file
     fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
     fd.write("pmc:\n\n")
@@ -360,9 +406,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     return pmc_list
 
 
-def perfmon_emit(pmc_list, workload_dir, soc):
-    workload_perfmon_dir = workload_dir + "/perfmon"
-
+def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
     # Calculate the minimum number of iteration to save the pmc counters
     # non-TCC counters
     pmc_cnt = [
@@ -384,7 +428,12 @@ def perfmon_emit(pmc_list, workload_dir, soc):
     niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt)))
 
     # Emit PMC counters into pmc config file
-    fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
+    if save_file:
+        workload_perfmon_dir = workload_dir + "/perfmon"
+        fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
+    else:
+        batches = []
+
 
     tcc2_index = 0
     for iter in range(niter):
@@ -414,12 +463,20 @@ def perfmon_emit(pmc_list, workload_dir, soc):
 
         # TCC aggregated counters
         line = line + " " + " ".join(tcc_counters)
-        fd.write(line + "\n")
+        if save_file:
+            fd.write(line + "\n")
+        else:
+            b = line.split()
+            b.remove("pmc:")
+            batches.append(b)
 
-    fd.write("\ngpu:\n")
-    fd.write("range:\n")
-    fd.write("kernel:\n")
-    fd.close()
+    if save_file:
+        fd.write("\ngpu:\n")
+        fd.write("range:\n")
+        fd.write("kernel:\n")
+        fd.close()
+    else:
+        return batches
 
 
 def perfmon_filter(workload_dir, perfmon_dir, args):

From 35daf8c120260121802d100e21f1e664b4a2202b Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 11 Jul 2023 14:14:10 -0500
Subject: [PATCH 47/81] Comply to Python formatting

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/parser.py |  8 ++++----
 src/utils/perfagg.py                 | 26 ++++++++++++--------------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index 025745b17..0328d7aa8 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -342,7 +342,7 @@ def gen_counter_list(formula):
         "GFLOPs": None,
     }
 
-    built_in_counter=[
+    built_in_counter = [
         "lds",
         "grd",
         "wgr",
@@ -351,7 +351,7 @@ def gen_counter_list(formula):
         "sgpr",
         "scr",
         "BeginNs",
-        "EndNs"
+        "EndNs",
     ]
 
     visited = False
@@ -367,7 +367,7 @@ def gen_counter_list(formula):
         for node in ast.walk(tree):
             if isinstance(node, ast.Name):
                 val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
-                if (val.isupper() and val not in function_filter):
+                if val.isupper() and val not in function_filter:
                     counters.append(val)
                     visited = True
                 if val in built_in_counter:
@@ -404,7 +404,7 @@ def build_dfs(archConfigs, filter_metrics):
                     metric_list[panel_idx] = panel["title"]
                     table_idx = panel_idx + "." + str(data_cofig["id"] % 100)
                     metric_list[table_idx] = data_cofig["title"]
-                    
+
                     headers = ["Index"]
                     for key, tile in data_cofig["header"].items():
                         if key != "tips":
diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 8e95482c5..04658795c 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -257,19 +257,16 @@ def pmc_perf_split(workload_dir):
 
 
 def update_pmc_bucket(
-        counters, 
-        save_file,
-        soc,
-        pmc_list=None,
-        stext=None, 
-        workload_perfmon_dir=None
-    ):
+    counters, save_file, soc, pmc_list=None, stext=None, workload_perfmon_dir=None
+):
     # Verify inputs.
     # If save_file is True, we're being called internally, from perfmon_coalesce
     # Else we're being called externally, from rocomni
     detected_extermal_call = False
     if save_file and (stext is None or workload_perfmon_dir is None):
-        raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True")
+        raise ValueError(
+            "stext and workload_perfmon_dir must be specified if save_file is True"
+        )
     if pmc_list is None:
         detected_extermal_call = True
         pmc_list = dict(
@@ -289,7 +286,7 @@ def update_pmc_bucket(
         )
         for ch in range(perfmon_config[soc]["TCC_channels"]):
             pmc_list["TCC2"][str(ch)] = []
-    
+
     if "SQ_ACCUM_PREV_HIRES" in counters:
         # save  all level counters separately
         nindex = counters.index("SQ_ACCUM_PREV_HIRES")
@@ -305,7 +302,7 @@ def update_pmc_bucket(
             fd.close()
 
         return pmc_list
-    
+
     # save normal pmc counters in matching buckets
     for counter in counters:
         IP_block = counter.split(sep="_")[0].upper()
@@ -386,11 +383,13 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
 
             # we have found all the counters, store them in buckets
             counters = m.group(1).split()
-            
+
             # Utilitze helper function once a list of counters has be extracted
             save_file = True
-            pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir)
-    
+            pmc_list = update_pmc_bucket(
+                counters, save_file, soc, pmc_list, stext, workload_perfmon_dir
+            )
+
     # add a timestamp file
     fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
     fd.write("pmc:\n\n")
@@ -434,7 +433,6 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
     else:
         batches = []
 
-
     tcc2_index = 0
     for iter in range(niter):
         # Prefix

From 626fc7f18f04af78d7d66de0860c4f1d4b528476 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Mon, 17 Jul 2023 13:12:22 -0500
Subject: [PATCH 48/81] Filter additional ops in gen_counter_list fucn

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/parser.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
index 0328d7aa8..bff3314b2 100644
--- a/src/omniperf_analyze/utils/parser.py
+++ b/src/omniperf_analyze/utils/parser.py
@@ -340,6 +340,8 @@ def gen_counter_list(formula):
         "RW": None,
         "GIOP": None,
         "GFLOPs": None,
+        "CONCAT": None,
+        "MOD": None,
     }
 
     built_in_counter = [
@@ -362,6 +364,12 @@ def gen_counter_list(formula):
         tree = ast.parse(
             formula.replace("$normUnit", "SQ_WAVES")
             .replace("$denom", "SQ_WAVES")
+            .replace(
+                "$numActiveCUs",
+                "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / GRBM_GUI_ACTIVE)), \
+              0) / $maxWavesPerCU) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
+              / GRBM_GUI_ACTIVE)), 0), $maxWavesPerCU), 8)), $numCU))",
+            )
             .replace("$", "")
         )
         for node in ast.walk(tree):

From e6ca8e0361aabbdaedd58128426c0e0370894f85 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Mon, 17 Jul 2023 13:12:56 -0500
Subject: [PATCH 49/81] Enable join_prof() merge util to be called from outside
 Omniperf

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/utils/perfagg.py | 57 +++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 19 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 04658795c..1c21b1736 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -95,13 +95,19 @@ def test_df_column_equality(df):
 # joins disparate runs less dumbly than rocprof
 def join_prof(workload_dir, join_type, log_file, verbose, out=None):
     # Set default output directory if not specified
-    if out == None:
-        out = workload_dir + "/pmc_perf.csv"
-    files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv")
-    df = None
+    if type(workload_dir) == str:
+        if out is None:
+            out = workload_dir + "/pmc_perf.csv"
+        files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv")
+    elif type(workload_dir) == list:
+        files = workload_dir
+    else:
+        print("ERROR: Invalid workload_dir")
+        sys.exit(1)
 
+    df = None
     for i, file in enumerate(files):
-        _df = pd.read_csv(file)
+        _df = pd.read_csv(file) if type(workload_dir) == str else file
         if join_type == "kernel":
             key = _df.groupby("KernelName").cumcount()
             _df["key"] = _df.KernelName + " - " + key.astype(str)
@@ -137,7 +143,6 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
         duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col]
         duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col]
     for key, cols in duplicate_cols.items():
-        print("Key is ", key)
         _df = df[cols]
         if not test_df_column_equality(_df):
             msg = (
@@ -146,10 +151,12 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
                 )
             )
             warnings.warn(msg)
-            log_file.write(msg + "\n")
+            if log_file:
+                log_file.write(msg + "\n")
         else:
             msg = "Successfully joined {} in pmc_perf.csv".format(key)
-            log_file.write(msg + "\n")
+            if log_file:
+                log_file.write(msg + "\n")
         if test_df_column_equality(_df) and verbose:
             print(msg)
 
@@ -179,6 +186,8 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
                     "fbar",
                     "sig",
                     "obj",
+                    # rocscope specific merged counters, keep original
+                    "dispatch_",
                 ]
             )
         ]
@@ -189,7 +198,15 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
         [
             k
             for k in df.keys()
-            if not any(check in k for check in ["DispatchNs", "CompleteNs"])
+            if not any(
+                check in k
+                for check in [
+                    "DispatchNs",
+                    "CompleteNs",
+                    # rocscope specific timestamp
+                    "HostDuration",
+                ]
+            )
         ]
     ]
     #   C) sanity check the name and key
@@ -216,12 +233,14 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
     df["EndNs"] = endNs
     # finally, join the drop key
     df = df.drop(columns=["key"])
-    # and save to file
-    df.to_csv(out, index=False)
-    # and delete old file(s)
-    if not verbose:
-        for file in files:
-            os.remove(file)
+    # save to file and delete old file(s), skip if we're being called outside of Omniperf
+    if type(workload_dir) == str:
+        df.to_csv(out, index=False)
+        if not verbose:
+            for file in files:
+                os.remove(file)
+    else:
+        return df
 
 
 def pmc_perf_split(workload_dir):
@@ -262,13 +281,13 @@ def update_pmc_bucket(
     # Verify inputs.
     # If save_file is True, we're being called internally, from perfmon_coalesce
     # Else we're being called externally, from rocomni
-    detected_extermal_call = False
+    detected_external_call = False
     if save_file and (stext is None or workload_perfmon_dir is None):
         raise ValueError(
             "stext and workload_perfmon_dir must be specified if save_file is True"
         )
     if pmc_list is None:
-        detected_extermal_call = True
+        detected_external_call = True
         pmc_list = dict(
             [
                 ("SQ", []),
@@ -287,7 +306,7 @@ def update_pmc_bucket(
         for ch in range(perfmon_config[soc]["TCC_channels"]):
             pmc_list["TCC2"][str(ch)] = []
 
-    if "SQ_ACCUM_PREV_HIRES" in counters:
+    if "SQ_ACCUM_PREV_HIRES" in counters and not detected_external_call:
         # save  all level counters separately
         nindex = counters.index("SQ_ACCUM_PREV_HIRES")
         level_counter = counters[nindex - 1]
@@ -336,7 +355,7 @@ def update_pmc_bucket(
                     # initial counter in this channel
                     pmc_list["TCC2"][str(ch)] = [counter]
 
-    if detected_extermal_call:
+    if detected_external_call:
         # sort the per channel counter, so that same counter in all channels can be aligned
         for ch in range(perfmon_config[soc]["TCC_channels"]):
             pmc_list["TCC2"][str(ch)].sort()

From 424dd6a4a8a65d8c4a1647ff29208b743ab066d3 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 18 Jul 2023 16:03:21 -0500
Subject: [PATCH 50/81] Add figure styling to YML configs

Signed-off-by: colramos-amd <colramos@amd.com>
---
 .../configs/gfx906/1000_compute-unit-instruction-mix.yaml  | 6 ++++++
 .../configs/gfx906/1100_compute-unit-compute-pipeline.yaml | 5 +++++
 src/omniperf_analyze/configs/gfx906/1200_lds.yaml          | 5 +++++
 .../configs/gfx906/1300_instruction-cache.yaml             | 5 +++++
 .../configs/gfx906/1400_constant-cache.yaml                | 5 +++++
 src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml     | 7 +++++++
 src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml     | 4 ++++
 .../configs/gfx908/1000_compute-unit-instruction-mix.yaml  | 6 ++++++
 .../configs/gfx908/1100_compute-unit-compute-pipeline.yaml | 5 +++++
 src/omniperf_analyze/configs/gfx908/1200_lds.yaml          | 5 +++++
 .../configs/gfx908/1300_instruction-cache.yaml             | 5 +++++
 .../configs/gfx908/1400_constant-cache.yaml                | 5 +++++
 src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml     | 7 +++++++
 src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml     | 4 ++++
 .../configs/gfx90a/1000_compute-unit-instruction-mix.yaml  | 6 ++++++
 .../configs/gfx90a/1100_compute-unit-compute-pipeline.yaml | 5 +++++
 src/omniperf_analyze/configs/gfx90a/1200_lds.yaml          | 5 +++++
 .../configs/gfx90a/1300_instruction-cache.yaml             | 5 +++++
 .../configs/gfx90a/1400_constant-cache.yaml                | 5 +++++
 src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml     | 7 +++++++
 src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml     | 4 ++++
 21 files changed, 111 insertions(+)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index c2e2f7f3a..eeec25221 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -17,6 +17,9 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           VALU - Vector:
             avg: None # No HW module
@@ -75,6 +78,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           INT-32:
             count: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml
index e7f537290..8cffb24c7 100644
--- a/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1100_compute-unit-compute-pipeline.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           valu_flops_pop:
             value: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
index 4f12a2ac3..218ad2cda 100644
--- a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
diff --git a/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
index f40d3546e..fb9f384e1 100644
--- a/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
index 4583c1012..91a2d6c9f 100644
--- a/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           mertic: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
index 1e05b3e4c..c734e21c8 100644
--- a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
@@ -248,6 +253,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_multi_bar
         metric:
           NC - Read:
             xfer: Read
diff --git a/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml b/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml
index 53235ca14..0b5f5e827 100644
--- a/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1700_L2_cache.yaml
@@ -16,6 +16,8 @@ Panel Config:
           value: Value
           unit: Unit
           tips: Tips
+        style:
+          type: simple_bar
         metric:
           L2 Util:
             value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
@@ -301,6 +303,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style:
+          type: simple_multi_bar
         metric:
           Read - Remote Socket Stall:
             type: Remote Socket Stall
diff --git a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
index 5a9bdb456..75eca82b8 100644
--- a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
@@ -18,6 +18,9 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           VALU - Vector:
             avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
@@ -76,6 +79,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           INT-32:
             count: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml
index 9316d1d25..061311d62 100644
--- a/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1100_compute-unit-compute-pipeline.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           valu_flops_pop:
             value: None # No perf counter
diff --git a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
index 4f12a2ac3..218ad2cda 100644
--- a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
diff --git a/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
index f40d3546e..fb9f384e1 100644
--- a/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
index 4583c1012..91a2d6c9f 100644
--- a/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           mertic: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
index f65309a31..7eeed0477 100644
--- a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
@@ -248,6 +253,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_multi_bar
         metric:
           NC - Read:
             xfer: Read
diff --git a/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml b/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml
index 9e76a39b6..0c7b03811 100644
--- a/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1700_L2_cache.yaml
@@ -16,6 +16,8 @@ Panel Config:
           value: Value
           unit: Unit
           tips: Tips
+        style:
+          type: simple_bar
         metric:
           L2 Util:
             value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
@@ -301,6 +303,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style:
+          type: simple_multi_bar
         metric:
           Read - Remote Socket Stall:
             type: Remote Socket Stall
diff --git a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
index 999d25e19..811ebca11 100644
--- a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
@@ -18,6 +18,9 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           VALU - Vector:
             avg: AVG(((SQ_INSTS_VALU - SQ_INSTS_MFMA) / $denom))
@@ -76,6 +79,9 @@ Panel Config:
           count: Count
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_bar
+          label_txt: (# of instr + $normUnit)
         metric:
           INT32:
             count: AVG((SQ_INSTS_VALU_INT32 / $denom))
diff --git a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
index 2866f9652..301217fde 100644
--- a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
@@ -17,6 +17,11 @@ Panel Config:
           min: Min
           max: Max
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           valu_flops_pop:
             avg: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
diff --git a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
index 4f12a2ac3..218ad2cda 100644
--- a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
diff --git a/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
index cd0af3bfa..1a7000e93 100644
--- a/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
index 4583c1012..91a2d6c9f 100644
--- a/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           mertic: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
diff --git a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
index 917cb3aa0..7ea26db05 100644
--- a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
@@ -248,6 +253,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style: 
+          type: simple_multi_bar
         metric:
           NC - Read:
             xfer: Read
diff --git a/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml
index 7be18091a..ddbaf9155 100644
--- a/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1700_L2_cache.yaml
@@ -16,6 +16,8 @@ Panel Config:
           value: Value
           unit: Unit
           tips: Tips
+        style:
+          type: simple_bar
         metric:
           L2 Util:
             value: AVG(((TCC_BUSY_sum * 100) / (TO_INT($L2Banks) * GRBM_GUI_ACTIVE)))
@@ -301,6 +303,8 @@ Panel Config:
           max: Max
           unit: Unit
           tips: Tips
+        style:
+          type: simple_multi_bar
         metric:
           Read - Remote Socket Stall:
             type: Remote Socket Stall

From a61d509739b5f8ff86be95c93cb1854095ee6019 Mon Sep 17 00:00:00 2001
From: colramos-amd <colramos@amd.com>
Date: Tue, 18 Jul 2023 16:04:31 -0500
Subject: [PATCH 51/81] New simple_charts utility

Signed-off-by: colramos-amd <colramos@amd.com>
---
 src/omniperf_analyze/utils/simple_charts.py | 119 ++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 src/omniperf_analyze/utils/simple_charts.py

diff --git a/src/omniperf_analyze/utils/simple_charts.py b/src/omniperf_analyze/utils/simple_charts.py
new file mode 100644
index 000000000..8a853f987
--- /dev/null
+++ b/src/omniperf_analyze/utils/simple_charts.py
@@ -0,0 +1,119 @@
+##############################################################################bl
+# MIT License
+#
+# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+##############################################################################el
+
+import plotly.express as px
+import pandas as pd
+
+
+# Notes:
+#   This file includes implementation of a few simple but common charts in CLI.
+#   We try to auto-size the layout to cover most of the cases as default. If it
+#   doesn't work, apply style config in yaml for each dashboard.
+
+
+def simple_bar(df, title: str = None, id=None, style: dict = None, orientation="h"):
+    """
+    Plot data with simple bar chart
+    """
+
+    # TODO: handle None properly
+    if "Metric" in df.columns and ("Count" in df.columns or "Value" in df.columns):
+        detected_label = "Count" if "Count" in df.columns else "Value"
+        df[detected_label] = [
+            x.astype(int) if x != "" else int(0) for x in df[detected_label]
+        ]
+    else:
+        raise NameError("simple_bar: No Metric or Count in df columns!")
+
+    # Assign figure characteristics
+    range_color = style.get("range_color", None)
+    label_txt = style.get("label_txt", None)
+    xrange = style.get("xrange", None)
+    if label_txt is not None:
+        label_txt = label_txt.strip("()")
+        try:
+            label_txt = label_txt.replace("+ $normUnit", df["Unit"][0])
+        except KeyError:
+            print("No units found in df. Auto labeling.")
+
+    # Overrides for figure chatacteristics
+    if id == 1701.1:
+        label_txt = "%"
+        range_color = [0, 100]
+        xrange = [0, 110]
+    if id == 1701.2:
+        label_txt = "Gb/s"
+        range_color = [0, 1638]
+        xrange = [0, 1638]
+
+    fig = px.bar(
+        df,
+        title=title,
+        x=detected_label,
+        y="Metric",
+        color=detected_label,
+        range_color=range_color,
+        labels={detected_label: label_txt},
+        orientation=orientation,
+    ).update_xaxes(range=xrange)
+
+    return fig
+
+
+def simple_multi_bar(df, title=None, id=None):
+    """
+    Plot data with simple multiple bar chart
+    """
+
+    # TODO: handle Nan and None properly
+    if "Metric" in df.columns and "Avg" in df.columns:
+        df["Avg"] = [x.astype(int) if x != "" else int(0) for x in df["Avg"]]
+    else:
+        raise NameError("simple_multi_bar: No Metric or Count in df columns!")
+
+    dfigs = []
+    nested_bar = {}
+    df_unit = df["Unit"][0]
+    if id == 1604:
+        nested_bar = {"NC": {}, "UC": {}, "RW": {}, "CC": {}}
+        for index, row in df.iterrows():
+            nested_bar[row["Coherency"]][row["Xfer"]] = row["Avg"]
+    if id == 1704:
+        nested_bar = {"Read": {}, "Write": {}}
+        for index, row in df.iterrows():
+            nested_bar[row["Transaction"]][row["Type"]] = row["Avg"]
+
+    for group, metric in nested_bar.items():
+        dfigs.append(
+            px.bar(
+                title=group,
+                x=metric.values(),
+                y=metric.keys(),
+                labels={"x": df_unit, "y": ""},
+                text=metric.values(),
+            )
+            .update_xaxes(showgrid=False, rangemode="nonnegative")
+            .update_yaxes(showgrid=False)
+        )
+    return dfigs

From 08f4684620aad6bb1203887f0ddeaa99a3747eb5 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 17 Jul 2023 12:46:05 -0500
Subject: [PATCH 52/81] Migrate to  @grafana/create-plugin

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 grafana_plugins/svg_plugin/.prettierrc.js |  5 +-
 grafana_plugins/svg_plugin/package.json   | 65 +++++++++++++++++++----
 grafana_plugins/svg_plugin/tsconfig.json  | 12 +----
 3 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/grafana_plugins/svg_plugin/.prettierrc.js b/grafana_plugins/svg_plugin/.prettierrc.js
index f60eb1d25..aaa5045c6 100644
--- a/grafana_plugins/svg_plugin/.prettierrc.js
+++ b/grafana_plugins/svg_plugin/.prettierrc.js
@@ -1,3 +1,4 @@
 module.exports = {
-    ...require("./node_modules/@grafana/toolkit/src/config/prettier.plugin.config.json"),
-  };
\ No newline at end of file
+  // Prettier configuration provided by Grafana scaffolding
+  ...require("./.config/.prettierrc.js")
+};
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/package.json b/grafana_plugins/svg_plugin/package.json
index fb88025c1..aa445bd25 100644
--- a/grafana_plugins/svg_plugin/package.json
+++ b/grafana_plugins/svg_plugin/package.json
@@ -3,29 +3,72 @@
   "version": "1.0.0",
   "description": "",
   "scripts": {
-    "build": "grafana-toolkit plugin:build",
-    "test": "grafana-toolkit plugin:test",
-    "dev": "grafana-toolkit plugin:dev",
-    "watch": "grafana-toolkit plugin:dev --watch",
-    "sign": "grafana-toolkit plugin:sign",
-    "start": "yarn watch"
+    "build": "webpack -c ./.config/webpack/webpack.config.ts --env production",
+    "dev": "webpack -w -c ./.config/webpack/webpack.config.ts --env development",
+    "e2e": "yarn exec cypress install && yarn exec grafana-e2e run",
+    "e2e:update": "yarn exec cypress install && yarn exec grafana-e2e run --update-screenshots",
+    "lint": "eslint --cache --ignore-path ./.gitignore --ext .js,.jsx,.ts,.tsx .",
+    "lint:fix": "yarn run lint --fix",
+    "server": "docker-compose up --build",
+    "sign": "npx --yes @grafana/sign-plugin@latest",
+    "start": "yarn watch",
+    "test": "jest --watch --onlyChanged",
+    "test:ci": "jest --passWithNoTests --maxWorkers 4",
+    "typecheck": "tsc --noEmit"
   },
   "author": "Audacious Software Group",
   "license": "MIT",
   "devDependencies": {
-    "@grafana/toolkit": "latest",
+    "@babel/core": "^7.21.4",
+    "@grafana/e2e": "9.5.3",
+    "@grafana/e2e-selectors": "9.5.3",
+    "@grafana/eslint-config": "^6.0.0",
+    "@grafana/tsconfig": "^1.2.0-rc1",
+    "@swc/core": "^1.3.62",
+    "@swc/helpers": "^0.5.0",
+    "@swc/jest": "^0.2.26",
+    "@testing-library/jest-dom": "^5.16.5",
+    "@testing-library/react": "^12.1.4",
+    "@types/jest": "^29.5.0",
+    "@types/lodash": "^4.14.194",
+    "@types/node": "^18.15.11",
+    "copy-webpack-plugin": "^11.0.0",
+    "css-loader": "^6.7.3",
     "emotion": "10.0.27",
+    "eslint-webpack-plugin": "^4.0.1",
+    "fork-ts-checker-webpack-plugin": "^8.0.0",
+    "glob": "^10.2.7",
+    "identity-obj-proxy": "3.0.0",
+    "jest": "^29.5.0",
+    "jest-environment-jsdom": "^29.5.0",
+    "prettier": "^2.8.7",
     "react-monaco-editor": "^0.44.0",
-    "tslib": "^2.3.1"
+    "replace-in-file-webpack-plugin": "^1.0.6",
+    "sass": "1.63.2",
+    "sass-loader": "13.3.1",
+    "style-loader": "3.3.3",
+    "swc-loader": "^0.2.3",
+    "ts-node": "^10.9.1",
+    "tsconfig-paths": "^4.2.0",
+    "tslib": "^2.3.1",
+    "typescript": "4.8.4",
+    "webpack": "^5.86.0",
+    "webpack-cli": "^5.1.4",
+    "webpack-livereload-plugin": "^3.0.2"
   },
   "engines": {
     "node": ">=14"
   },
   "dependencies": {
-    "@grafana/runtime": "9.1.2",
+    "@emotion/css": "^11.1.3",
     "@grafana/data": "9.1.2",
+    "@grafana/runtime": "9.1.2",
     "@grafana/ui": "9.1.2",
-    "@svgdotjs/svg.js": "^3.1.1"
+    "@svgdotjs/svg.js": "^3.1.1",
+    "react": "17.0.2",
+    "react-dom": "17.0.2",
+    "tslib": "2.5.3"
   },
-  "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project."
+  "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project.",
+  "packageManager": "yarn@1.22.19"
 }
diff --git a/grafana_plugins/svg_plugin/tsconfig.json b/grafana_plugins/svg_plugin/tsconfig.json
index 7e6657d2f..d294745aa 100644
--- a/grafana_plugins/svg_plugin/tsconfig.json
+++ b/grafana_plugins/svg_plugin/tsconfig.json
@@ -1,11 +1,3 @@
 {
-  "extends": "./node_modules/@grafana/toolkit/src/config/tsconfig.plugin.json",
-  "include": ["src", "types"],
-  "compilerOptions": {
-    "types": ["@emotion/core"],
-    "rootDir": "./src",
-    "baseUrl": "./src",
-    "typeRoots": ["./node_modules/@types"],
-    "jsx": "react"
-  }
-}
+  "extends": "./.config/tsconfig.json"
+}
\ No newline at end of file

From c209583e9878de76914693f1ca8def68efc39f1a Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 17 Jul 2023 13:02:00 -0500
Subject: [PATCH 53/81] Adding config files

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../svg_plugin/.config/tsconfig.json          |  26 +++
 .../.config/webpack/webpack.config.ts         | 201 ++++++++++++++++++
 2 files changed, 227 insertions(+)
 create mode 100644 grafana_plugins/svg_plugin/.config/tsconfig.json
 create mode 100644 grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts

diff --git a/grafana_plugins/svg_plugin/.config/tsconfig.json b/grafana_plugins/svg_plugin/.config/tsconfig.json
new file mode 100644
index 000000000..64b376907
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/tsconfig.json
@@ -0,0 +1,26 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-typescript-config
+ */
+ {
+  "compilerOptions": {
+    "alwaysStrict": true,
+    "declaration": false,
+    "rootDir": "../src",
+    "baseUrl": "../src",
+    "typeRoots": ["../node_modules/@types"],
+    "resolveJsonModule": true
+  },
+  "ts-node": {
+    "compilerOptions": {
+      "module": "commonjs",
+      "target": "es5",
+      "esModuleInterop": true
+    },
+    "transpileOnly": true
+  },
+  "include": ["../src", "./types"],
+  "extends": "@grafana/tsconfig"
+}
diff --git a/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts b/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts
new file mode 100644
index 000000000..22cb86ca4
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts
@@ -0,0 +1,201 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-webpack-config
+ */
+
+import CopyWebpackPlugin from 'copy-webpack-plugin';
+import ESLintPlugin from 'eslint-webpack-plugin';
+import ForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin';
+import LiveReloadPlugin from 'webpack-livereload-plugin';
+import path from 'path';
+import ReplaceInFileWebpackPlugin from 'replace-in-file-webpack-plugin';
+import { Configuration } from 'webpack';
+
+import { getPackageJson, getPluginJson, hasReadme, getEntries } from './utils';
+import { SOURCE_DIR, DIST_DIR } from './constants';
+
+const pluginJson = getPluginJson();
+
+const config = async (env): Promise<Configuration> => ({
+  cache: {
+    type: 'filesystem',
+    buildDependencies: {
+      config: [__filename],
+    },
+  },
+
+  context: path.join(process.cwd(), SOURCE_DIR),
+
+  devtool: env.production ? 'source-map' : 'eval-source-map',
+
+  entry: await getEntries(),
+
+  externals: [
+    'lodash',
+    'jquery',
+    'moment',
+    'slate',
+    'emotion',
+    '@emotion/react',
+    '@emotion/css',
+    'prismjs',
+    'slate-plain-serializer',
+    '@grafana/slate-react',
+    'react',
+    'react-dom',
+    'react-redux',
+    'redux',
+    'rxjs',
+    'react-router',
+    'react-router-dom',
+    'd3',
+    'angular',
+    '@grafana/ui',
+    '@grafana/runtime',
+    '@grafana/data',
+
+    // Mark legacy SDK imports as external if their name starts with the "grafana/" prefix
+    ({ request }, callback) => {
+      const prefix = 'grafana/';
+      const hasPrefix = (request) => request.indexOf(prefix) === 0;
+      const stripPrefix = (request) => request.substr(prefix.length);
+
+      if (hasPrefix(request)) {
+        return callback(undefined, stripPrefix(request));
+      }
+
+      callback();
+    },
+  ],
+
+  mode: env.production ? 'production' : 'development',
+
+  module: {
+    rules: [
+      {
+        exclude: /(node_modules)/,
+        test: /\.[tj]sx?$/,
+        use: {
+          loader: 'swc-loader',
+          options: {
+            jsc: {
+              baseUrl: './src',
+              target: 'es2015',
+              loose: false,
+              parser: {
+                syntax: 'typescript',
+                tsx: true,
+                decorators: false,
+                dynamicImport: true,
+              },
+            },
+          },
+        },
+      },
+      {
+        test: /\.css$/,
+        use: ["style-loader", "css-loader"]
+      },
+      {
+        test: /\.s[ac]ss$/,
+        use: ['style-loader', 'css-loader', 'sass-loader'],
+      },
+      {
+        test: /\.(png|jpe?g|gif|svg)$/,
+        type: 'asset/resource',
+        generator: {
+          // Keep publicPath relative for host.com/grafana/ deployments
+          publicPath: `public/plugins/${pluginJson.id}/img/`,
+          outputPath: 'img/',
+          filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]',
+        },
+      },
+      {
+        test: /\.(woff|woff2|eot|ttf|otf)(\?v=\d+\.\d+\.\d+)?$/,
+        type: 'asset/resource',
+        generator: {
+          // Keep publicPath relative for host.com/grafana/ deployments
+          publicPath: `public/plugins/${pluginJson.id}/fonts/`,
+          outputPath: 'fonts/',
+          filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]',
+        },
+      },
+    ],
+  },
+
+  output: {
+    clean: {
+      keep: new RegExp(`.*?_(amd64|arm(64)?)(.exe)?`),
+    },
+    filename: '[name].js',
+    library: {
+      type: 'amd',
+    },
+    path: path.resolve(process.cwd(), DIST_DIR),
+    publicPath: '/',
+  },
+
+  plugins: [
+    new CopyWebpackPlugin({
+      patterns: [
+        // If src/README.md exists use it; otherwise the root README
+        // To `compiler.options.output`
+        { from: hasReadme() ? 'README.md' : '../README.md', to: '.', force: true },
+        { from: 'plugin.json', to: '.' },
+        { from: '../LICENSE', to: '.' },
+        { from: '../CHANGELOG.md', to: '.', force: true },
+        { from: '**/*.json', to: '.' }, // TODO<Add an error for checking the basic structure of the repo>
+        { from: '**/*.svg', to: '.', noErrorOnMissing: true }, // Optional
+        { from: '**/*.png', to: '.', noErrorOnMissing: true }, // Optional
+        { from: '**/*.html', to: '.', noErrorOnMissing: true }, // Optional
+        { from: 'img/**/*', to: '.', noErrorOnMissing: true }, // Optional
+        { from: 'libs/**/*', to: '.', noErrorOnMissing: true }, // Optional
+        { from: 'static/**/*', to: '.', noErrorOnMissing: true }, // Optional
+      ],
+    }),
+    // Replace certain template-variables in the README and plugin.json
+    new ReplaceInFileWebpackPlugin([
+      {
+        dir: DIST_DIR,
+        files: ['plugin.json', 'README.md'],
+        rules: [
+          {
+            search: /\%VERSION\%/g,
+            replace: getPackageJson().version,
+          },
+          {
+            search: /\%TODAY\%/g,
+            replace: new Date().toISOString().substring(0, 10),
+          },
+          {
+            search: /\%PLUGIN_ID\%/g,
+            replace: pluginJson.id,
+          },
+        ],
+      },
+    ]),
+    new ForkTsCheckerWebpackPlugin({
+      async: Boolean(env.development),
+      issue: {
+        include: [{ file: '**/*.{ts,tsx}' }],
+      },
+      typescript: { configFile: path.join(process.cwd(), 'tsconfig.json') },
+    }),
+    new ESLintPlugin({
+      extensions: ['.ts', '.tsx'],
+      lintDirtyModulesOnly: Boolean(env.development), // don't lint on start, only lint changed files
+    }),
+    ...(env.development ? [new LiveReloadPlugin()] : []),
+  ],
+
+  resolve: {
+    extensions: ['.js', '.jsx', '.ts', '.tsx'],
+    // handle resolving "rootDir" paths
+    modules: [path.resolve(process.cwd(), 'src'), 'node_modules'],
+    unsafeCache: true,
+  },
+});
+
+export default config;

From 7ed6c620aa25505da968e1db4f3f10504b4b1536 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 17 Jul 2023 13:19:03 -0500
Subject: [PATCH 54/81] Adding config files

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 grafana_plugins/svg_plugin/.config/.eslintrc  |  13 ++
 .../svg_plugin/.config/.prettierrc.js         |  16 ++
 grafana_plugins/svg_plugin/.config/Dockerfile |  16 ++
 grafana_plugins/svg_plugin/.config/README.md  | 164 ++++++++++++++++++
 .../svg_plugin/.config/jest-setup.js          |  25 +++
 .../svg_plugin/.config/jest.config.js         |  43 +++++
 .../.config/jest/mocks/react-inlinesvg.tsx    |  25 +++
 .../svg_plugin/.config/jest/utils.js          |  31 ++++
 .../svg_plugin/.config/types/custom.d.ts      |  37 ++++
 .../svg_plugin/.config/webpack/constants.ts   |   2 +
 .../svg_plugin/.config/webpack/utils.ts       |  40 +++++
 grafana_plugins/svg_plugin/.eslintrc          |   3 +
 grafana_plugins/svg_plugin/.nvmrc             |   1 +
 .../svg_plugin/docker-compose.yaml            |  15 ++
 grafana_plugins/svg_plugin/jest-setup.js      |   2 +
 grafana_plugins/svg_plugin/jest.config.js     |   8 +
 16 files changed, 441 insertions(+)
 create mode 100644 grafana_plugins/svg_plugin/.config/.eslintrc
 create mode 100644 grafana_plugins/svg_plugin/.config/.prettierrc.js
 create mode 100644 grafana_plugins/svg_plugin/.config/Dockerfile
 create mode 100644 grafana_plugins/svg_plugin/.config/README.md
 create mode 100644 grafana_plugins/svg_plugin/.config/jest-setup.js
 create mode 100644 grafana_plugins/svg_plugin/.config/jest.config.js
 create mode 100644 grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx
 create mode 100644 grafana_plugins/svg_plugin/.config/jest/utils.js
 create mode 100644 grafana_plugins/svg_plugin/.config/types/custom.d.ts
 create mode 100644 grafana_plugins/svg_plugin/.config/webpack/constants.ts
 create mode 100644 grafana_plugins/svg_plugin/.config/webpack/utils.ts
 create mode 100644 grafana_plugins/svg_plugin/.eslintrc
 create mode 100644 grafana_plugins/svg_plugin/.nvmrc
 create mode 100644 grafana_plugins/svg_plugin/docker-compose.yaml
 create mode 100644 grafana_plugins/svg_plugin/jest-setup.js
 create mode 100644 grafana_plugins/svg_plugin/jest.config.js

diff --git a/grafana_plugins/svg_plugin/.config/.eslintrc b/grafana_plugins/svg_plugin/.config/.eslintrc
new file mode 100644
index 000000000..3f8c381a4
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/.eslintrc
@@ -0,0 +1,13 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-eslint-config
+ */
+ {
+  "extends": ["@grafana/eslint-config"],
+  "root": true,
+  "rules": {
+    "react/prop-types": "off"
+  }
+}
diff --git a/grafana_plugins/svg_plugin/.config/.prettierrc.js b/grafana_plugins/svg_plugin/.config/.prettierrc.js
new file mode 100644
index 000000000..66a76ec5b
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/.prettierrc.js
@@ -0,0 +1,16 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in .config/README.md
+ */
+
+module.exports = {
+  "endOfLine": "auto",
+  "printWidth": 120,
+  "trailingComma": "es5",
+  "semi": true,
+  "jsxSingleQuote": false,
+  "singleQuote": true,
+  "useTabs": false,
+  "tabWidth": 2
+};
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/.config/Dockerfile b/grafana_plugins/svg_plugin/.config/Dockerfile
new file mode 100644
index 000000000..35d89bd1c
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/Dockerfile
@@ -0,0 +1,16 @@
+ARG grafana_version=latest
+ARG grafana_image=grafana-enterprise
+
+FROM grafana/${grafana_image}:${grafana_version}
+
+# Make it as simple as possible to access the grafana instance for development purposes
+# Do NOT enable these settings in a public facing / production grafana instance
+ENV GF_AUTH_ANONYMOUS_ORG_ROLE "Admin"
+ENV GF_AUTH_ANONYMOUS_ENABLED "true"
+ENV GF_AUTH_BASIC_ENABLED "false"
+# Set development mode so plugins can be loaded without the need to sign
+ENV GF_DEFAULT_APP_MODE "development"
+
+# Inject livereload script into grafana index.html
+USER root
+RUN sed -i 's/<\/body><\/html>/<script src=\"http:\/\/localhost:35729\/livereload.js\"><\/script><\/body><\/html>/g' /usr/share/grafana/public/views/index.html
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/.config/README.md b/grafana_plugins/svg_plugin/.config/README.md
new file mode 100644
index 000000000..f1ded3464
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/README.md
@@ -0,0 +1,164 @@
+# Default build configuration by Grafana
+
+**This is an auto-generated directory and is not intended to be changed! ⚠️**
+
+The `.config/` directory holds basic configuration for the different tools
+that are used to develop, test and build the project. In order to make it updates easier we ask you to
+not edit files in this folder to extend configuration.
+
+## How to extend the basic configs?
+
+Bear in mind that you are doing it at your own risk, and that extending any of the basic configuration can lead
+to issues around working with the project.
+
+### Extending the ESLint config
+
+Edit the `.eslintrc` file in the project root in order to extend the ESLint configuration.
+
+**Example:**
+
+```json
+{
+  "extends": "./.config/.eslintrc",
+  "rules": {
+    "react/prop-types": "off"
+  }
+}
+```
+
+---
+
+### Extending the Prettier config
+
+Edit the `.prettierrc.js` file in the project root in order to extend the Prettier configuration.
+
+**Example:**
+
+```javascript
+module.exports = {
+  // Prettier configuration provided by Grafana scaffolding
+  ...require('./.config/.prettierrc.js'),
+
+  semi: false,
+};
+```
+
+---
+
+### Extending the Jest config
+
+There are two configuration in the project root that belong to Jest: `jest-setup.js` and `jest.config.js`.
+
+**`jest-setup.js`:** A file that is run before each test file in the suite is executed. We are using it to
+set up the Jest DOM for the testing library and to apply some polyfills. ([link to Jest docs](https://jestjs.io/docs/configuration#setupfilesafterenv-array))
+
+**`jest.config.js`:** The main Jest configuration file that extends the Grafana recommended setup. ([link to Jest docs](https://jestjs.io/docs/configuration))
+
+#### ESM errors with Jest
+
+A common issue found with the current jest config involves importing an npm package which only offers an ESM build. These packages cause jest to error with `SyntaxError: Cannot use import statement outside a module`. To work around this we provide a list of known packages to pass to the `[transformIgnorePatterns](https://jestjs.io/docs/configuration#transformignorepatterns-arraystring)` jest configuration property. If need be this can be extended in the following way:
+
+```javascript
+process.env.TZ = 'UTC';
+const { grafanaESModules, nodeModulesToTransform } = require('./config/jest/utils');
+
+module.exports = {
+  // Jest configuration provided by Grafana
+  ...require('./.config/jest.config'),
+  // Inform jest to only transform specific node_module packages.
+  transformIgnorePatterns: [nodeModulesToTransform([...grafanaESModules, 'packageName'])],
+};
+```
+
+---
+
+### Extending the TypeScript config
+
+Edit the `tsconfig.json` file in the project root in order to extend the TypeScript configuration.
+
+**Example:**
+
+```json
+{
+  "extends": "./.config/tsconfig.json",
+  "compilerOptions": {
+    "preserveConstEnums": true
+  }
+}
+```
+
+---
+
+### Extending the Webpack config
+
+Follow these steps to extend the basic Webpack configuration that lives under `.config/`:
+
+#### 1. Create a new Webpack configuration file
+
+Create a new config file that is going to extend the basic one provided by Grafana.
+It can live in the project root, e.g. `webpack.config.ts`.
+
+#### 2. Merge the basic config provided by Grafana and your custom setup
+
+We are going to use [`webpack-merge`](https://github.com/survivejs/webpack-merge) for this.
+
+```typescript
+// webpack.config.ts
+import type { Configuration } from 'webpack';
+import { merge } from 'webpack-merge';
+import grafanaConfig from './.config/webpack/webpack.config';
+
+const config = async (env): Promise<Configuration> => {
+  const baseConfig = await grafanaConfig(env);
+
+  return merge(baseConfig, {
+    // Add custom config here...
+    output: {
+      asyncChunks: true,
+    },
+  });
+};
+
+export default config;
+```
+
+#### 3. Update the `package.json` to use the new Webpack config
+
+We need to update the `scripts` in the `package.json` to use the extended Webpack configuration.
+
+**Update for `build`:**
+
+```diff
+-"build": "webpack -c ./.config/webpack/webpack.config.ts --env production",
++"build": "webpack -c ./webpack.config.ts --env production",
+```
+
+**Update for `dev`:**
+
+```diff
+-"dev": "webpack -w -c ./.config/webpack/webpack.config.ts --env development",
++"dev": "webpack -w -c ./webpack.config.ts --env development",
+```
+
+### Configure grafana image to use when running docker
+
+By default `grafana-enterprise` will be used as the docker image for all docker related commands. If you want to override this behaviour simply alter the `docker-compose.yaml` by adding the following build arg `grafana_image`. 
+
+**Example:**
+
+```yaml
+version: '3.7'
+
+services:
+  grafana:
+    container_name: 'myorg-basic-app'
+    build:
+      context: ./.config
+      args:
+        grafana_version: ${GRAFANA_VERSION:-9.1.2}
+        grafana_image: ${GRAFANA_IMAGE:-grafana}
+```
+
+In this example we are assigning the environment variable `GRAFANA_IMAGE` to the build arg `grafana_image` with a default value of `grafana`. This will give you the possibility to set the value while running the docker-compose commands which might be convinent in some scenarios.
+
+---
diff --git a/grafana_plugins/svg_plugin/.config/jest-setup.js b/grafana_plugins/svg_plugin/.config/jest-setup.js
new file mode 100644
index 000000000..575b354fe
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest-setup.js
@@ -0,0 +1,25 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-jest-config
+ */
+
+import '@testing-library/jest-dom';
+
+// https://jestjs.io/docs/manual-mocks#mocking-methods-which-are-not-implemented-in-jsdom
+Object.defineProperty(global, 'matchMedia', {
+  writable: true,
+  value: jest.fn().mockImplementation((query) => ({
+    matches: false,
+    media: query,
+    onchange: null,
+    addListener: jest.fn(), // deprecated
+    removeListener: jest.fn(), // deprecated
+    addEventListener: jest.fn(),
+    removeEventListener: jest.fn(),
+    dispatchEvent: jest.fn(),
+  })),
+});
+
+HTMLCanvasElement.prototype.getContext = () => {};
diff --git a/grafana_plugins/svg_plugin/.config/jest.config.js b/grafana_plugins/svg_plugin/.config/jest.config.js
new file mode 100644
index 000000000..3cb011e08
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest.config.js
@@ -0,0 +1,43 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in
+ * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-jest-config
+ */
+
+const path = require('path');
+const { grafanaESModules, nodeModulesToTransform } = require('./jest/utils');
+
+module.exports = {
+  moduleNameMapper: {
+    '\\.(css|scss|sass)$': 'identity-obj-proxy',
+    'react-inlinesvg': path.resolve(__dirname, 'jest', 'mocks', 'react-inlinesvg.tsx'),
+  },
+  modulePaths: ['<rootDir>/src'],
+  setupFilesAfterEnv: ['<rootDir>/jest-setup.js'],
+  testEnvironment: 'jest-environment-jsdom',
+  testMatch: [
+    '<rootDir>/src/**/__tests__/**/*.{js,jsx,ts,tsx}',
+    '<rootDir>/src/**/*.{spec,test,jest}.{js,jsx,ts,tsx}',
+    '<rootDir>/src/**/*.{spec,test,jest}.{js,jsx,ts,tsx}',
+  ],
+  transform: {
+    '^.+\\.(t|j)sx?$': [
+      '@swc/jest',
+      {
+        sourceMaps: 'inline',
+        jsc: {
+          parser: {
+            syntax: 'typescript',
+            tsx: true,
+            decorators: false,
+            dynamicImport: true,
+          },
+        },
+      },
+    ],
+  },
+  // Jest will throw `Cannot use import statement outside module` if it tries to load an
+  // ES module without it being transformed first. ./config/README.md#esm-errors-with-jest
+  transformIgnorePatterns: [nodeModulesToTransform(grafanaESModules)],
+};
diff --git a/grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx b/grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx
new file mode 100644
index 000000000..d540f3aab
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx
@@ -0,0 +1,25 @@
+// Due to the grafana/ui Icon component making fetch requests to
+// `/public/img/icon/<icon_name>.svg` we need to mock react-inlinesvg to prevent
+// the failed fetch requests from displaying errors in console.
+
+import React from 'react';
+
+type Callback = (...args: any[]) => void;
+
+export interface StorageItem {
+  content: string;
+  queue: Callback[];
+  status: string;
+}
+
+export const cacheStore: { [key: string]: StorageItem } = Object.create(null);
+
+const SVG_FILE_NAME_REGEX = /(.+)\/(.+)\.svg$/;
+
+const InlineSVG = ({ src }: { src: string }) => {
+  // testId will be the file name without extension (e.g. `public/img/icons/angle-double-down.svg` -> `angle-double-down`)
+  const testId = src.replace(SVG_FILE_NAME_REGEX, '$2');
+  return <svg xmlns="http://www.w3.org/2000/svg" data-testid={testId} viewBox="0 0 24 24" />;
+};
+
+export default InlineSVG;
diff --git a/grafana_plugins/svg_plugin/.config/jest/utils.js b/grafana_plugins/svg_plugin/.config/jest/utils.js
new file mode 100644
index 000000000..1c1088497
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/jest/utils.js
@@ -0,0 +1,31 @@
+/*
+ * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️
+ *
+ * In order to extend the configuration follow the steps in .config/README.md
+ */
+
+/*
+ * This utility function is useful in combination with jest `transformIgnorePatterns` config
+ * to transform specific packages (e.g.ES modules) in a projects node_modules folder.
+ */
+const nodeModulesToTransform = (moduleNames) => `node_modules\/(?!(${moduleNames.join('|')})\/)`;
+
+// Array of known nested grafana package dependencies that only bundle an ESM version
+const grafanaESModules = [
+  '.pnpm', // Support using pnpm symlinked packages
+  '@grafana/schema',
+  'd3',
+  'd3-color',
+  'd3-force',
+  'd3-interpolate',
+  'd3-scale-chromatic',
+  'ol',
+  'react-colorful',
+  'rxjs',
+  'uuid',
+];
+
+module.exports = {
+  nodeModulesToTransform,
+  grafanaESModules,
+};
diff --git a/grafana_plugins/svg_plugin/.config/types/custom.d.ts b/grafana_plugins/svg_plugin/.config/types/custom.d.ts
new file mode 100644
index 000000000..64e6eaa6f
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/types/custom.d.ts
@@ -0,0 +1,37 @@
+// Image declarations
+declare module '*.gif' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.jpg' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.jpeg' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.png' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.webp' {
+  const src: string;
+  export default src;
+}
+
+declare module '*.svg' {
+  const content: string;
+  export default content;
+}
+
+// Font declarations
+declare module '*.woff';
+declare module '*.woff2';
+declare module '*.eot';
+declare module '*.ttf';
+declare module '*.otf';
diff --git a/grafana_plugins/svg_plugin/.config/webpack/constants.ts b/grafana_plugins/svg_plugin/.config/webpack/constants.ts
new file mode 100644
index 000000000..071e4fd34
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/webpack/constants.ts
@@ -0,0 +1,2 @@
+export const SOURCE_DIR = 'src';
+export const DIST_DIR = 'dist';
diff --git a/grafana_plugins/svg_plugin/.config/webpack/utils.ts b/grafana_plugins/svg_plugin/.config/webpack/utils.ts
new file mode 100644
index 000000000..c2691e693
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.config/webpack/utils.ts
@@ -0,0 +1,40 @@
+import fs from 'fs';
+import path from 'path';
+import util from 'util';
+import { glob } from 'glob';
+import { SOURCE_DIR } from './constants';
+
+export function getPackageJson() {
+  return require(path.resolve(process.cwd(), 'package.json'));
+}
+
+export function getPluginJson() {
+  return require(path.resolve(process.cwd(), `${SOURCE_DIR}/plugin.json`));
+}
+
+export function hasReadme() {
+  return fs.existsSync(path.resolve(process.cwd(), SOURCE_DIR, 'README.md'));
+}
+
+// Support bundling nested plugins by finding all plugin.json files in src directory
+// then checking for a sibling module.[jt]sx? file.
+export async function getEntries(): Promise<Record<string, string>> {
+  const pluginsJson = await glob('**/src/**/plugin.json', { absolute: true });
+
+  const plugins = await Promise.all(pluginsJson.map((pluginJson) => {
+      const folder = path.dirname(pluginJson);
+      return glob(`${folder}/module.{ts,tsx,js,jsx}`, { absolute: true });
+    })
+  );
+
+  return plugins.reduce((result, modules) => {
+    return modules.reduce((result, module) => {
+      const pluginPath = path.dirname(module);
+      const pluginName = path.relative(process.cwd(), pluginPath).replace(/src\/?/i, '');
+      const entryName = pluginName === '' ? 'module' : `${pluginName}/module`;
+
+      result[entryName] = module;
+      return result;
+    }, result);
+  }, {});
+}
diff --git a/grafana_plugins/svg_plugin/.eslintrc b/grafana_plugins/svg_plugin/.eslintrc
new file mode 100644
index 000000000..01e61dfcb
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.eslintrc
@@ -0,0 +1,3 @@
+{
+  "extends": "./.config/.eslintrc"
+}
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/.nvmrc b/grafana_plugins/svg_plugin/.nvmrc
new file mode 100644
index 000000000..19c7bdba7
--- /dev/null
+++ b/grafana_plugins/svg_plugin/.nvmrc
@@ -0,0 +1 @@
+16
\ No newline at end of file
diff --git a/grafana_plugins/svg_plugin/docker-compose.yaml b/grafana_plugins/svg_plugin/docker-compose.yaml
new file mode 100644
index 000000000..96498f050
--- /dev/null
+++ b/grafana_plugins/svg_plugin/docker-compose.yaml
@@ -0,0 +1,15 @@
+version: '3.0'
+
+services:
+  grafana:
+    container_name: 'amd-custom-svg'
+    build:
+      context: ./.config
+      args:
+        grafana_image: ${GRAFANA_IMAGE:-grafana-enterprise}
+        grafana_version: ${GRAFANA_VERSION:-9.5.3}
+    ports:
+      - 3000:3000/tcp
+    volumes:
+      - ./dist:/var/lib/grafana/plugins/amd-custom-svg
+      - ./provisioning:/etc/grafana/provisioning
diff --git a/grafana_plugins/svg_plugin/jest-setup.js b/grafana_plugins/svg_plugin/jest-setup.js
new file mode 100644
index 000000000..35a700b73
--- /dev/null
+++ b/grafana_plugins/svg_plugin/jest-setup.js
@@ -0,0 +1,2 @@
+// Jest setup provided by Grafana scaffolding
+import './.config/jest-setup';
diff --git a/grafana_plugins/svg_plugin/jest.config.js b/grafana_plugins/svg_plugin/jest.config.js
new file mode 100644
index 000000000..79fd52a1b
--- /dev/null
+++ b/grafana_plugins/svg_plugin/jest.config.js
@@ -0,0 +1,8 @@
+// force timezone to UTC to allow tests to work regardless of local timezone
+// generally used by snapshots, but can affect specific tests
+process.env.TZ = 'UTC';
+
+module.exports = {
+  // Jest configuration provided by Grafana scaffolding
+  ...require('./.config/jest.config'),
+};

From db9cc25be15b310f914843ecf53b217dbfb7ccb2 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 2 Aug 2023 12:50:34 -0500
Subject: [PATCH 55/81] Fix incorrect ordering of args in perfagg func

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/utils/perfagg.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
index 1c21b1736..6ec4542a9 100755
--- a/src/utils/perfagg.py
+++ b/src/utils/perfagg.py
@@ -362,7 +362,7 @@ def update_pmc_bucket(
     return pmc_list
 
 
-def perfmon_coalesce(pmc_files_list, workload_dir, soc):
+def perfmon_coalesce(pmc_files_list, soc, workload_dir):
     workload_perfmon_dir = workload_dir + "/perfmon"
 
     # match pattern for pmc counters
@@ -424,7 +424,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
     return pmc_list
 
 
-def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
+def perfmon_emit(pmc_list, soc, workload_dir=None):
     # Calculate the minimum number of iteration to save the pmc counters
     # non-TCC counters
     pmc_cnt = [
@@ -446,7 +446,7 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
     niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt)))
 
     # Emit PMC counters into pmc config file
-    if save_file:
+    if workload_dir:
         workload_perfmon_dir = workload_dir + "/perfmon"
         fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w")
     else:
@@ -480,14 +480,14 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None):
 
         # TCC aggregated counters
         line = line + " " + " ".join(tcc_counters)
-        if save_file:
+        if workload_dir:
             fd.write(line + "\n")
         else:
             b = line.split()
             b.remove("pmc:")
             batches.append(b)
 
-    if save_file:
+    if workload_dir:
         fd.write("\ngpu:\n")
         fd.write("range:\n")
         fd.write("kernel:\n")
@@ -533,8 +533,8 @@ def perfmon_filter(workload_dir, perfmon_dir, args):
         pmc_files_list = ref_pmc_files_list
 
     # Coalesce and writeback workload specific perfmon
-    pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
-    perfmon_emit(pmc_list, workload_dir, soc)
+    pmc_list = perfmon_coalesce(pmc_files_list, soc, workload_dir)
+    perfmon_emit(pmc_list, soc, workload_dir)
 
 
 def pmc_filter(workload_dir, perfmon_dir, soc):
@@ -551,5 +551,5 @@ def pmc_filter(workload_dir, perfmon_dir, soc):
     pmc_files_list = ref_pmc_files_list
 
     # Coalesce and writeback workload specific perfmon
-    pmc_list = perfmon_coalesce(pmc_files_list, workload_dir, soc)
-    perfmon_emit(pmc_list, workload_dir, soc)
+    pmc_list = perfmon_coalesce(pmc_files_list, soc, workload_dir)
+    perfmon_emit(pmc_list, soc, workload_dir)

From b1a7a2684896956730f6482ad5c46739fcfbba8b Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 2 Aug 2023 12:51:51 -0500
Subject: [PATCH 56/81] Only load required archs into ArchConfig datastruct
 (#144)

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py | 95 ++++++++++++++++--------
 1 file changed, 63 insertions(+), 32 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 6415ed285..87fac064d 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -46,52 +46,45 @@
 from omniperf_analyze.utils import parser, file_io
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
 
+archConfigs = {}
+
 
 ################################################
 # Helper Functions
 ################################################
-def generate_configs(config_dir, list_kernels, filter_metrics):
+def generate_config(arch, config_dir, list_kernels, filter_metrics):
     from omniperf_analyze.utils import schema
 
     single_panel_config = file_io.is_single_panel_config(Path(config_dir))
     global archConfigs
-    archConfigs = {}
-    for arch in file_io.supported_arch.keys():
-        ac = schema.ArchConfig()
-        if list_kernels:
-            ac.panel_configs = file_io.top_stats_build_in_config
-        else:
-            arch_panel_config = (
-                config_dir if single_panel_config else config_dir.joinpath(arch)
-            )
-            ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
 
-        # TODO: filter_metrics should/might be one per arch
-        # print(ac)
+    ac = schema.ArchConfig()
+    if list_kernels:
+        ac.panel_configs = file_io.top_stats_build_in_config
+    else:
+        arch_panel_config = (
+            config_dir if single_panel_config else config_dir.joinpath(arch)
+        )
+        ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
+
+    # TODO: filter_metrics should/might be one per arch
+    # print(ac)
 
-        parser.build_dfs(ac, filter_metrics)
+    parser.build_dfs(ac, filter_metrics)
 
-        archConfigs[arch] = ac
+    archConfigs[arch] = ac
 
     return archConfigs  # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
 
 
-################################################
-# Core Functions
-################################################
-def initialize_run(args, normalization_filter=None):
+def list_metrics(args):
     import pandas as pd
-    from collections import OrderedDict
     from tabulate import tabulate
-    from omniperf_analyze.utils import schema
-
-    # Fixme: cur_root.parent.joinpath('soc_params')
-    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
-    soc_spec_df = file_io.load_soc_params(soc_params_dir)
-
-    generate_configs(args.config_dir, args.list_kernels, args.filter_metrics)
 
     if args.list_metrics in file_io.supported_arch.keys():
+        arch = args.list_metrics
+        if arch not in archConfigs.keys():
+            generate_config(arch, args.config_dir, args.list_kernels, args.filter_metrics)
         print(
             tabulate(
                 pd.DataFrame.from_dict(
@@ -105,7 +98,12 @@ def initialize_run(args, normalization_filter=None):
             file=output,
         )
         sys.exit(0)
+    else:
+        print("Error: Unsupported arch")
+        sys.exit(-1)
 
+
+def load_options(args, normalization_filter):
     # Use original normalization or user input from GUI
     if not normalization_filter:
         for k, v in archConfigs.items():
@@ -114,10 +112,7 @@ def initialize_run(args, normalization_filter=None):
         for k, v in archConfigs.items():
             parser.build_metric_value_string(v.dfs, v.dfs_type, normalization_filter)
 
-    runs = OrderedDict()
-
     # err checking for multiple runs and multiple gpu_kernel filter
-    # TODO: move it to util
     if args.gpu_kernel and (len(args.path) != len(args.gpu_kernel)):
         if len(args.gpu_kernel) == 1:
             for i in range(len(args.path) - 1):
@@ -129,6 +124,31 @@ def initialize_run(args, normalization_filter=None):
             )
             sys.exit(-1)
 
+
+################################################
+# Core Functions
+################################################
+def initialize_run(args, normalization_filter=None):
+    from collections import OrderedDict
+    from omniperf_analyze.utils import schema
+
+    # Fixme: cur_root.parent.joinpath('soc_params')
+    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
+    soc_spec_df = file_io.load_soc_params(soc_params_dir)
+
+    if args.list_metrics:
+        list_metrics(args)
+
+    # Load required configs
+    for d in args.path:
+        sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv"))
+        arch = sys_info.iloc[0]["gpu_soc"]
+        generate_config(arch, args.config_dir, args.list_kernels, args.filter_metrics)
+
+    load_options(args, normalization_filter)
+
+    runs = OrderedDict()
+
     # Todo: warning single -d with multiple dirs
     for d in args.path:
         w = schema.Workload()
@@ -215,10 +235,21 @@ def run_cli(args, runs):
         parser.load_table_data(
             runs[d[0]], d[0], is_gui, args.g, args.verbose
         )  # create the loaded table
+    # TODO: In show_* functions always assume newest architecture. This way newest configs/figures are loaded
     if args.list_kernels:
-        tty.show_kernels(args, runs, archConfigs["gfx90a"], output)
+        tty.show_kernels(
+            args,
+            runs,
+            archConfigs[runs[args.path[0][0]].sys_info.iloc[0]["gpu_soc"]],
+            output,
+        )
     else:
-        tty.show_all(args, runs, archConfigs["gfx90a"], output)
+        tty.show_all(
+            args,
+            runs,
+            archConfigs[runs[args.path[0][0]].sys_info.iloc[0]["gpu_soc"]],
+            output,
+        )
 
 
 def roofline_only(path_to_dir, dev_id, sort_type, mem_level, kernel_names, verbose):

From 428d07a379cbcee128e0552cbc5bb1f907a6029c Mon Sep 17 00:00:00 2001
From: Nicholas Curtis <nicurtis@amd.com>
Date: Fri, 28 Jul 2023 12:26:17 -0400
Subject: [PATCH 57/81] fix max BF16 flop rate on CDNA2

Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---
 dashboards/Omniperf_v1.0.8_pub.json                           | 2 +-
 .../configs/gfx90a/0200_system-speed-of-light.yaml            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dashboards/Omniperf_v1.0.8_pub.json b/dashboards/Omniperf_v1.0.8_pub.json
index fbebb0d44..0412ba914 100644
--- a/dashboards/Omniperf_v1.0.8_pub.json
+++ b/dashboards/Omniperf_v1.0.8_pub.json
@@ -339,7 +339,7 @@
               "hide": false,
               "rawQuery": true,
               "refId": "A",
-              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n          \"valu_flops_val\": {\n            \"$avg\": { \"$divide\": [ { \"$add\": [\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n            ]}\n            ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n          }},\n    \n          \"valu_intOps_val\": {\n            \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n          },\n    \n          \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n    \n    \n          \"salu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"valu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"mfma_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n          },\n\n    \n          \"lds_bconf\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    \"&SQ_LDS_BANK_CONFLICT\",\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                                null\n                              ]\n                    }     \n          },\n    \n   \n          \"lds_bw\":  {\n            \"$avg\": {\"$divide\":[ \n                        { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                            4,  \n                                            {\"$toInt\": \"$L2Banks\"}\n                                        ]}, \n                        {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                    ]}\n         },\n    \n          \"lds_bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                    4,  \n                                                    {\"$toInt\": \"$L2Banks\"}\n                                                ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},            \n                             {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n    \n          \"unpredthreads_val\": {\n              \"$avg\": {\n                  \"$cond\": [\n                      {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n                      { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n                      null\n                  ]\n              }\n          },\n    \n          \"ipcIssue_val\": {\n              \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n          },\n          \"eaWriteLat_val\": {\n            \"$avg\": {\n                \"$cond\": [\n                  {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n                  { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n                  null                        \n                ]\n            }\n            },\n            \"eaReadLat_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                      {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n                      { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n                      null                        \n                    ]\n                }\n            },\n            \"eaWriteBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            }, \n            \"eaReadBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            },\n            \"l2_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n                        null                          \n                    ]\n                }\n            },\n            \"vecl1_cacheHits_val\": {\n                \"$avg\":  {\n                    \"$cond\": [\n                        {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n                        { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]  }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n                        null\n                    ]\n                }\n            },\n            \"vecl1_BW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n            },\n            \"l1k_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n                        null\n                    ]\n                }\n            },\n            \"l1i_hitRate_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n            },\n            \"l1i_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            },\n            \"l1k_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            }\n            \n        }},\n      {\"$set\": {\n        \"array\": [\n            {\n              \"Metric\": \"VALU FLOPs\",\n              \"Value\": \"&valu_flops_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"VALU IOPs\",\n              \"Value\": \"&valu_intOps_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (BF16)\",\n              \"Value\": \"&mfma_flops_bf16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F16)\",\n              \"Value\": \"&mfma_flops_f16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F32)\",\n              \"Value\": \"&mfma_flops_f32_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F64)\",\n              \"Value\": \"&mfma_flops_f64_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA IOPs (Int8)\",\n              \"Value\": \"&mfma_flops_i8_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"Active CUs\",\n              \"Value\": $numActiveCUs,\n              \"Unit\": \"CUs\",\n              \"peak\": $numCU,\n              \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n            },\n    \n            {\n              \"Metric\": \"SALU Util\",\n              \"Value\": \"&salu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&salu_val\"\n            },\n            {\n              \"Metric\": \"VALU Util\",\n              \"Value\": \"&valu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&valu_val\"\n            },\n            {\n              \"Metric\": \"MFMA Util\",\n              \"Value\": \"&mfma_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&mfma_val\"\n            },\n            {\n              \"Metric\": \"VALU Active Threads/Wave\",\n              \"Value\": \"&unpredthreads_val\",\n              \"Unit\": \"Threads\",\n              \"peak\": 64,\n              \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n            },\n            {\n              \"Metric\": \"IPC - Issue\",\n              \"Value\": \"&ipcIssue_val\",\n              \"Unit\": \"Instr/cycle\",\n              \"peak\": 5,\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n            },\n            {\n              \"Metric\": \"LDS BW\",\n              \"Value\": \"&lds_bw\",\n              \"Unit\": \"GB/sec\",\n              \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n              \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n            },\n            {\n              \"Metric\": \"LDS Bank Conflict\",\n              \"Value\": \"&lds_bconf\",\n              \"Unit\": \"Conflicts/access\",\n              \"peak\": \"32\",\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n            },\n            {\n                \"Metric\": \"Instr Cache Hit Rate\",\n                \"Value\": \"&l1i_hitRate_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n            }, \n            {\n                \"Metric\": \"Instr Cache BW\",\n                \"Value\": \"&l1i_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache Hit Rate\",\n                \"Value\": \"&l1k_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache BW\",\n                \"Value\": \"&l1k_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\":  { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n\n            {\n                \"Metric\": \"Vector L1D Cache Hit Rate\",\n                \"Value\": \"&vecl1_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Vector L1D Cache BW\",\n                \"Value\": \"&vecl1_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n            },\n            {\n                \"Metric\": \"L2 Cache Hit Rate\",\n                \"Value\": \"&l2_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Read BW\",\n                \"Value\": \"&eaReadBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Write BW\",\n                \"Value\": \"&eaWriteBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Read Latency\",\n                \"Value\": \"&eaReadLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Write Latency\",\n                \"Value\": \"&eaWriteLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            }\n\n          ]\n      }},\n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }},\n      {\"$unionWith\": {\n            \"coll\": \"SQ_LEVEL_WAVES\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"waveOcc_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n                    },\n                    \"waveOcc_pop\": {\n                        \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n                    }\n\n                }},\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Wave Occupancy\",\n                            \"Value\": \"&waveOcc_val\",\n                            \"Unit\": \"Wavefronts\",\n                            \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n                            \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n                        }\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }},\n        {\"$unionWith\": {\n            \"coll\": \"SQ_IFETCH_LEVEL\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"instrFetchBW_val\": {\n                        \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n                    },\n                    \"instrFetchLat_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n                    }\n                }},\n\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Instr Fetch BW\",\n                            \"Value\": \"&instrFetchBW_val\",\n                            \"Unit\": \"GB/s\",\n                            \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n                            \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n                        },\n                        {\n                            \"Metric\": \"Instr Fetch Latency\",\n                            \"Value\": \"&instrFetchLat_val\",\n                            \"Unit\": \"Cycles\",\n                            \"peak\": \"\",\n                            \"Percent of Peak - PoP\": \"\"\n\n                        }\n\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }}\n\n    ]);",
+              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n          \"valu_flops_val\": {\n            \"$avg\": { \"$divide\": [ { \"$add\": [\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n            ]}\n            ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n          }},\n    \n          \"valu_intOps_val\": {\n            \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n          },\n    \n          \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n    \n    \n          \"salu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"valu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"mfma_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n          },\n\n    \n          \"lds_bconf\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    \"&SQ_LDS_BANK_CONFLICT\",\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                                null\n                              ]\n                    }     \n          },\n    \n   \n          \"lds_bw\":  {\n            \"$avg\": {\"$divide\":[ \n                        { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                            4,  \n                                            {\"$toInt\": \"$L2Banks\"}\n                                        ]}, \n                        {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                    ]}\n         },\n    \n          \"lds_bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                    4,  \n                                                    {\"$toInt\": \"$L2Banks\"}\n                                                ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},            \n                             {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n    \n          \"unpredthreads_val\": {\n              \"$avg\": {\n                  \"$cond\": [\n                      {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n                      { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n                      null\n                  ]\n              }\n          },\n    \n          \"ipcIssue_val\": {\n              \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n          },\n          \"eaWriteLat_val\": {\n            \"$avg\": {\n                \"$cond\": [\n                  {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n                  { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n                  null                        \n                ]\n            }\n            },\n            \"eaReadLat_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                      {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n                      { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n                      null                        \n                    ]\n                }\n            },\n            \"eaWriteBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            }, \n            \"eaReadBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            },\n            \"l2_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n                        null                          \n                    ]\n                }\n            },\n            \"vecl1_cacheHits_val\": {\n                \"$avg\":  {\n                    \"$cond\": [\n                        {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n                        { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]  }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n                        null\n                    ]\n                }\n            },\n            \"vecl1_BW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n            },\n            \"l1k_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n                        null\n                    ]\n                }\n            },\n            \"l1i_hitRate_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n            },\n            \"l1i_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            },\n            \"l1k_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            }\n            \n        }},\n      {\"$set\": {\n        \"array\": [\n            {\n              \"Metric\": \"VALU FLOPs\",\n              \"Value\": \"&valu_flops_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"VALU IOPs\",\n              \"Value\": \"&valu_intOps_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (BF16)\",\n              \"Value\": \"&mfma_flops_bf16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F16)\",\n              \"Value\": \"&mfma_flops_f16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F32)\",\n              \"Value\": \"&mfma_flops_f32_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F64)\",\n              \"Value\": \"&mfma_flops_f64_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA IOPs (Int8)\",\n              \"Value\": \"&mfma_flops_i8_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"Active CUs\",\n              \"Value\": $numActiveCUs,\n              \"Unit\": \"CUs\",\n              \"peak\": $numCU,\n              \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n            },\n    \n            {\n              \"Metric\": \"SALU Util\",\n              \"Value\": \"&salu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&salu_val\"\n            },\n            {\n              \"Metric\": \"VALU Util\",\n              \"Value\": \"&valu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&valu_val\"\n            },\n            {\n              \"Metric\": \"MFMA Util\",\n              \"Value\": \"&mfma_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&mfma_val\"\n            },\n            {\n              \"Metric\": \"VALU Active Threads/Wave\",\n              \"Value\": \"&unpredthreads_val\",\n              \"Unit\": \"Threads\",\n              \"peak\": 64,\n              \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n            },\n            {\n              \"Metric\": \"IPC - Issue\",\n              \"Value\": \"&ipcIssue_val\",\n              \"Unit\": \"Instr/cycle\",\n              \"peak\": 5,\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n            },\n            {\n              \"Metric\": \"LDS BW\",\n              \"Value\": \"&lds_bw\",\n              \"Unit\": \"GB/sec\",\n              \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n              \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n            },\n            {\n              \"Metric\": \"LDS Bank Conflict\",\n              \"Value\": \"&lds_bconf\",\n              \"Unit\": \"Conflicts/access\",\n              \"peak\": \"32\",\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n            },\n            {\n                \"Metric\": \"Instr Cache Hit Rate\",\n                \"Value\": \"&l1i_hitRate_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n            }, \n            {\n                \"Metric\": \"Instr Cache BW\",\n                \"Value\": \"&l1i_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache Hit Rate\",\n                \"Value\": \"&l1k_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache BW\",\n                \"Value\": \"&l1k_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\":  { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n\n            {\n                \"Metric\": \"Vector L1D Cache Hit Rate\",\n                \"Value\": \"&vecl1_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Vector L1D Cache BW\",\n                \"Value\": \"&vecl1_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n            },\n            {\n                \"Metric\": \"L2 Cache Hit Rate\",\n                \"Value\": \"&l2_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Read BW\",\n                \"Value\": \"&eaReadBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Write BW\",\n                \"Value\": \"&eaWriteBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Read Latency\",\n                \"Value\": \"&eaReadLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Write Latency\",\n                \"Value\": \"&eaWriteLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            }\n\n          ]\n      }},\n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }},\n      {\"$unionWith\": {\n            \"coll\": \"SQ_LEVEL_WAVES\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"waveOcc_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n                    },\n                    \"waveOcc_pop\": {\n                        \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n                    }\n\n                }},\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Wave Occupancy\",\n                            \"Value\": \"&waveOcc_val\",\n                            \"Unit\": \"Wavefronts\",\n                            \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n                            \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n                        }\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }},\n        {\"$unionWith\": {\n            \"coll\": \"SQ_IFETCH_LEVEL\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"instrFetchBW_val\": {\n                        \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n                    },\n                    \"instrFetchLat_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n                    }\n                }},\n\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Instr Fetch BW\",\n                            \"Value\": \"&instrFetchBW_val\",\n                            \"Unit\": \"GB/s\",\n                            \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n                            \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n                        },\n                        {\n                            \"Metric\": \"Instr Fetch Latency\",\n                            \"Value\": \"&instrFetchLat_val\",\n                            \"Unit\": \"Cycles\",\n                            \"peak\": \"\",\n                            \"Percent of Peak - PoP\": \"\"\n\n                        }\n\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }}\n\n    ]);",
               "type": "table"
             },
             {
diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
index 20721ee1f..c197c0fc5 100644
--- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
@@ -45,9 +45,9 @@ Panel Config:
           MFMA FLOPs (BF16):
             value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs)))
             unit: GFLOP
-            peak: ((($sclk * $numCU) * 512) / 1000)
+            peak: ((($sclk * $numCU) * 1024) / 1000)
             pop: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 512) / 1000))
+              / ((($sclk * $numCU) * 1024) / 1000))
             tips: 
           MFMA FLOPs (F16):
             value: AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs)))

From ef8ef2291b17a01457c5b4a92272b4c54ee3afb8 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 7 Aug 2023 12:31:27 -0500
Subject: [PATCH 58/81] Fix syntax error in yaml

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../configs/gfx908/1000_compute-unit-instruction-mix.yaml   | 4 ++--
 .../configs/gfx90a/1000_compute-unit-instruction-mix.yaml   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
index 75eca82b8..1a05a8042 100644
--- a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
@@ -30,8 +30,8 @@ Panel Config:
             tips: 
           VMEM:
             avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
-            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
-            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
+            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
             unit: (instr + $normUnit)
             tips: 
           LDS:
diff --git a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
index 811ebca11..67d3ad729 100644
--- a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
@@ -29,9 +29,9 @@ Panel Config:
             unit: (instr + $normUnit)
             tips: 
           VMEM:
-            avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
-            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
-            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)))
+            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)))
+            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)))
             unit: (instr + $normUnit)
             tips: 
           LDS:

From d6fb0183a486c9884c06d84b9734a92388811750 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 7 Aug 2023 12:41:15 -0500
Subject: [PATCH 59/81] Fixing yaml syntax errors

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../configs/gfx90a/1000_compute-unit-instruction-mix.yaml   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
index 67d3ad729..811ebca11 100644
--- a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
@@ -29,9 +29,9 @@ Panel Config:
             unit: (instr + $normUnit)
             tips: 
           VMEM:
-            avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)))
-            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)))
-            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)))
+            avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
             unit: (instr + $normUnit)
             tips: 
           LDS:

From 11b5f65c1c5c7b45600749bb9507d6a01be476e7 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Tue, 8 Aug 2023 11:04:35 -0500
Subject: [PATCH 60/81] Fix yaml syntax errors

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../1000_compute-unit-instruction-mix.yaml    | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index eeec25221..c9e8edd38 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -12,6 +12,7 @@ Panel Config:
         id: 1001
         title: Instruction Mix
         header:
+          metric: Metric
           avg: Avg
           min: Min
           max: Max
@@ -35,8 +36,8 @@ Panel Config:
             tips: 
           LDS:
             avg: AVG((SQ_INSTS_LDS / $denom))
-            min: MIN((SQ_INSTS_LDS / $denom))
-            max: MAX((SQ_INSTS_LDS / $denom))
+            min: AVG((SQ_INSTS_LDS / $denom))
+            max: AVG((SQ_INSTS_LDS / $denom))
             unit: (instr + $normUnit)
             tips: 
           VALU - MFMA:
@@ -47,26 +48,26 @@ Panel Config:
             tips: 
           SALU:
             avg: AVG((SQ_INSTS_SALU / $denom))
-            min: MIN((SQ_INSTS_SALU / $denom))
-            max: MAX((SQ_INSTS_SALU / $denom))
+            min: AVG((SQ_INSTS_SALU / $denom))
+            max: AVG((SQ_INSTS_SALU / $denom))
             unit: (instr + $normUnit)
             tips: 
           SMEM:
             avg: AVG((SQ_INSTS_SMEM / $denom))
-            min: MIN((SQ_INSTS_SMEM / $denom))
-            max: MAX((SQ_INSTS_SMEM / $denom))
+            min: AVG((SQ_INSTS_SMEM / $denom))
+            max: AVG((SQ_INSTS_SMEM / $denom))
             unit: (instr + $normUnit)
             tips: 
           Branch:
             avg: AVG((SQ_INSTS_BRANCH / $denom))
-            min: MIN((SQ_INSTS_BRANCH / $denom))
-            max: MAX((SQ_INSTS_BRANCH / $denom))
+            min: AVG((SQ_INSTS_BRANCH / $denom))
+            max: AVG((SQ_INSTS_BRANCH / $denom))
             unit: (instr + $normUnit)
             tips: 
           GDS:
             avg: AVG((SQ_INSTS_GDS / $denom))
-            min: MIN((SQ_INSTS_GDS / $denom))
-            max: MAX((SQ_INSTS_GDS / $denom))
+            min: AVG((SQ_INSTS_GDS / $denom))
+            max: AVG((SQ_INSTS_GDS / $denom))
             unit: (instr + $normUnit)
             tips: 
 

From 4e51c122d17eb450e48699d99bde712fd5bcfc32 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <josantos@amd.com>
Date: Tue, 8 Aug 2023 12:06:39 -0500
Subject: [PATCH 61/81] removing calls to kernel_name_shortener in mongo
 shortening now in profile

Signed-off-by: JoseSantosAMD <josantos@amd.com>
---
 src/omniperf               | 104 +++++++++++++++++
 src/utils/csv_converter.py | 229 ++++++++++++++++++-------------------
 2 files changed, 215 insertions(+), 118 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index 3b38e419b..7db92004d 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -53,6 +53,8 @@ from common import (
 
 from common import getVersion
 
+cache = dict()
+
 ################################################
 # Helper Functions
 ################################################
@@ -260,7 +262,76 @@ def mongo_import(args, profileAndImport):
     csv_converter.convert_folder(connectionInfo, Extractionlvl)
     print("-- Complete! --")
 
+def kernel_name_shortener(df, cache, level):
+    if level >= 5:
+        return df
+
+    columnName = ""
+    if "KernelName" in df:
+        columnName = "KernelName"
+    if "Name" in df:
+        columnName = "Name"
+
+    if columnName == "KernelName" or columnName == "Name":
+        # loop through all indices
+        for index in df.index:
+            original_name = df.loc[index, columnName]
+            if original_name in cache:
+                continue
 
+            # cache miss, add the shortened name to the dictionary
+            new_name = ""
+            matches = ""
+
+            names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
+
+            # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
+            if names_and_args.search(original_name):
+                matches = names_and_args.findall(original_name)
+            else:
+                # Works for first case  '__amd_rocclr_fillBuffer.kd'
+                # remove .kd and then parse through original regex
+                first_case = re.compile(r"([^\s]+)(.kd)")
+                Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
+                interim_name = first_case.search(original_name).group(1)
+                matches = Mod_name_and_args.findall(interim_name)
+
+            current_level = 0
+            for name in matches:
+                ##can cause errors if a function name or argument is equal to 'clone'
+                if name[0] == "clone":
+                    continue
+                if len(name) == 3:
+                    if name[2] == "::":
+                        continue
+
+                if current_level < level:
+                    new_name += name[0]
+                # closing '>' is to be taken account by the while loop
+                if name[1].count(">") == 0:
+                    if current_level < level:
+                        if not (current_level == level - 1 and name[1].count("<") > 0):
+                            new_name += name[1]
+                    current_level += name[1].count("<")
+
+                curr_index = 0
+                # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
+                while name[1].count(">") > 0 and curr_index < len(name[1]):
+                    if current_level < level:
+                        new_name += name[1][curr_index:]
+                        current_level -= name[1][curr_index:].count(">")
+                        curr_index = len(name[1])
+                    elif name[1][curr_index] == (">"):
+                        current_level -= 1
+                    curr_index += 1
+
+            cache[original_name] = new_name
+            if new_name == None or new_name == "":
+                cache[original_name] = original_name
+
+        df[columnName] = df[columnName].map(cache)
+
+    return df
 ################################################
 # Roofline Helpers
 ################################################
@@ -768,6 +839,7 @@ def main():
     # PROFILE MODE
     ##############
     if args.mode == "profile":
+        Extractionlvl = 3 #args.extraction_level
         print("Resolving rocprof")
         resolve_rocprof()
         # Cannot access parent directories
@@ -805,11 +877,43 @@ def main():
             roof_setup(args, my_parser, VER)
             # Generate roofline
             roofline_only(args.path, args.device, args.sort, args.mem_level, args.kernel_names, args.verbose)
+            #demangle
+            for file in os.listdir(args.path):
+                if file.endswith(".csv"):
+                    try:
+                        fileName = file[0 : file.find(".")]
+                        # Only shorten KernelNames if instructed to
+                        if Extractionlvl < 5:
+                            t1 = pd.read_csv(
+                                os.listdir(args.path) + "/" + file,
+                                on_bad_lines="skip",
+                                engine="python",
+                            )
+
+                            t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                    except pd.errors.EmptyDataError:
+                        print("Skipping empty csv " + file)
 
         # Profile only
         else:
             print("\n-------------\nProfile only\n-------------\n")
             omniperf_profile(args, VER)
+            #demangle
+            for file in os.listdir(args.path):
+                if file.endswith(".csv"):
+                    try:
+                        fileName = file[0 : file.find(".")]
+                        # Only shorten KernelNames if instructed to
+                        if Extractionlvl < 5:
+                            t1 = pd.read_csv(
+                                os.listdir(args.path) + "/" + file,
+                                on_bad_lines="skip",
+                                engine="python",
+                            )
+
+                            t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                    except pd.errors.EmptyDataError:
+                        print("Skipping empty csv " + file)
 
     ##############
     # DATABASE MODE
diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index 4f28d5388..9709ab9d8 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -32,89 +32,82 @@
 from pymongo import MongoClient
 from tqdm import tqdm
 import shutil
-import subprocess
 
-cache = dict()
+# cache = dict()
 supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
 MAX_SERVER_SEL_DELAY = 5000  # 5 sec connection timeout
 
 
-def kernel_name_shortener(df, cache, level):
-    if level >= 5:
-        return df
-
-    columnName = ""
-    if "KernelName" in df:
-        columnName = "KernelName"
-    if "Name" in df:
-        columnName = "Name"
-
-    if columnName == "KernelName" or columnName == "Name":
-        # loop through all indices
-        for index in df.index:
-            original_name = df.loc[index, columnName]
-            if original_name in cache:
-                continue
-
-            cmd = ["llvm-cxxfilt", original_name]
-
-            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-            demangled_name, e = proc.communicate()
-            demangled_name = str(demangled_name, "UTF-8").strip()
-
-            # cache miss, add the shortened name to the dictionary
-            new_name = ""
-            matches = ""
-
-            names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
-
-            # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
-            if names_and_args.search(demangled_name):
-                matches = names_and_args.findall(demangled_name)
-            else:
-                # Works for first case  '__amd_rocclr_fillBuffer.kd'
-                cache[original_name] = new_name
-                if new_name == None or new_name == "":
-                    cache[original_name] = demangled_name
-                continue
-
-            current_level = 0
-            for name in matches:
-                ##can cause errors if a function name or argument is equal to 'clone'
-                if name[0] == "clone":
-                    continue
-                if len(name) == 3:
-                    if name[2] == "::":
-                        continue
-
-                if current_level < level:
-                    new_name += name[0]
-                # closing '>' is to be taken account by the while loop
-                if name[1].count(">") == 0:
-                    if current_level < level:
-                        if not (current_level == level - 1 and name[1].count("<") > 0):
-                            new_name += name[1]
-                    current_level += name[1].count("<")
-
-                curr_index = 0
-                # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
-                while name[1].count(">") > 0 and curr_index < len(name[1]):
-                    if current_level < level:
-                        new_name += name[1][curr_index:]
-                        current_level -= name[1][curr_index:].count(">")
-                        curr_index = len(name[1])
-                    elif name[1][curr_index] == (">"):
-                        current_level -= 1
-                    curr_index += 1
-
-            cache[original_name] = new_name
-            if new_name == None or new_name == "":
-                cache[original_name] = demangled_name
-
-        df[columnName] = df[columnName].map(cache)
-
-    return df
+# def kernel_name_shortener(df, cache, level):
+#     if level >= 5:
+#         return df
+
+#     columnName = ""
+#     if "KernelName" in df:
+#         columnName = "KernelName"
+#     if "Name" in df:
+#         columnName = "Name"
+
+#     if columnName == "KernelName" or columnName == "Name":
+#         # loop through all indices
+#         for index in df.index:
+#             original_name = df.loc[index, columnName]
+#             if original_name in cache:
+#                 continue
+
+#             # cache miss, add the shortened name to the dictionary
+#             new_name = ""
+#             matches = ""
+
+#             names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
+
+#             # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
+#             if names_and_args.search(original_name):
+#                 matches = names_and_args.findall(original_name)
+#             else:
+#                 # Works for first case  '__amd_rocclr_fillBuffer.kd'
+#                 # remove .kd and then parse through original regex
+#                 first_case = re.compile(r"([^\s]+)(.kd)")
+#                 Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
+#                 interim_name = first_case.search(original_name).group(1)
+#                 matches = Mod_name_and_args.findall(interim_name)
+
+#             current_level = 0
+#             for name in matches:
+#                 ##can cause errors if a function name or argument is equal to 'clone'
+#                 if name[0] == "clone":
+#                     continue
+#                 if len(name) == 3:
+#                     if name[2] == "::":
+#                         continue
+
+#                 if current_level < level:
+#                     new_name += name[0]
+#                 # closing '>' is to be taken account by the while loop
+#                 if name[1].count(">") == 0:
+#                     if current_level < level:
+#                         if not (current_level == level - 1 and name[1].count("<") > 0):
+#                             new_name += name[1]
+#                     current_level += name[1].count("<")
+
+#                 curr_index = 0
+#                 # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
+#                 while name[1].count(">") > 0 and curr_index < len(name[1]):
+#                     if current_level < level:
+#                         new_name += name[1][curr_index:]
+#                         current_level -= name[1][curr_index:].count(">")
+#                         curr_index = len(name[1])
+#                     elif name[1][curr_index] == (">"):
+#                         current_level -= 1
+#                     curr_index += 1
+
+#             cache[original_name] = new_name
+#             if new_name == None or new_name == "":
+#                 cache[original_name] = original_name
+
+#         df[columnName] = df[columnName].map(cache)
+
+#     return df
 
 
 # Verify target directory and setup connection
@@ -151,12 +144,12 @@ def parse(args, profileAndExport):
 
     db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc
 
-    if Extractionlvl >= 5:
-        print("KernelName shortening disabled")
-    else:
-        print("KernelName shortening enabled")
+    # if Extractionlvl >= 5:
+    #     print("KernelName shortening disabled")
+    # else:
+    #     print("KernelName shortening enabled")
 
-    print("Kernel name verbose level:", Extractionlvl)
+    # print("Kernel name verbose level:", Extractionlvl)
 
     if args.password == "":
         try:
@@ -203,14 +196,14 @@ def convert_folder(connectionInfo, Extractionlvl):
         print("ERROR: Unable to connect to the server")
         sys.exit(1)
     # Set up directories
-    if Extractionlvl < 5:
-        newfilepath = connectionInfo["workload"]
-        newfilepath_h = newfilepath + "/renamedFiles/"
-        if not os.path.exists(newfilepath_h):
-            os.mkdir(newfilepath_h)
-        newfilepath = newfilepath_h + connectionInfo["db"] + "/"
-        if not os.path.exists(newfilepath):
-            os.mkdir(newfilepath)
+    # if Extractionlvl < 5:
+    #     newfilepath = connectionInfo["workload"]
+    #     newfilepath_h = newfilepath + "/renamedFiles/"
+    #     if not os.path.exists(newfilepath_h):
+    #         os.mkdir(newfilepath_h)
+    #     newfilepath = newfilepath_h + connectionInfo["db"] + "/"
+    #     if not os.path.exists(newfilepath):
+    #         os.mkdir(newfilepath)
     # Upload files
     i = 0
     file = "blank"
@@ -220,30 +213,30 @@ def convert_folder(connectionInfo, Extractionlvl):
             try:
                 fileName = file[0 : file.find(".")]
                 # Only shorten KernelNames if instructed to
-                if Extractionlvl < 5:
-                    t1 = pd.read_csv(
-                        connectionInfo["workload"] + "/" + file,
-                        on_bad_lines="skip",
-                        engine="python",
-                    )
-
-                    t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
-                    df_saved_file = t2.to_csv(newfilepath + file)
-
-                    cmd = (
-                        "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
-                    ).format(
-                        connectionInfo["username"],
-                        connectionInfo["password"],
-                        connectionInfo["host"],
-                        connectionInfo["port"],
-                        connectionInfo["db"],
-                        newfilepath + file,
-                        fileName,
-                    )
-                    os.system(cmd)
-                else:
-                    cmd = (
+                # if Extractionlvl < 5:
+                #     t1 = pd.read_csv(
+                #         connectionInfo["workload"] + "/" + file,
+                #         on_bad_lines="skip",
+                #         engine="python",
+                #     )
+
+                #     t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                #     df_saved_file = t2.to_csv(newfilepath + file)
+
+                #     cmd = (
+                #         "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
+                #     ).format(
+                #         connectionInfo["username"],
+                #         connectionInfo["password"],
+                #         connectionInfo["host"],
+                #         connectionInfo["port"],
+                #         connectionInfo["db"],
+                #         newfilepath + file,
+                #         fileName,
+                #     )
+                #     os.system(cmd)
+                # else:
+                cmd = (
                         "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
                     ).format(
                         connectionInfo["username"],
@@ -254,7 +247,7 @@ def convert_folder(connectionInfo, Extractionlvl):
                         connectionInfo["workload"] + "/" + file,
                         fileName,
                     )
-                    os.system(cmd)
+                os.system(cmd)
                 i += 1
             except pd.errors.EmptyDataError:
                 print("Skipping empty csv " + file)
@@ -265,7 +258,7 @@ def convert_folder(connectionInfo, Extractionlvl):
     newValue = {"name": connectionInfo["db"]}
     mycol.replace_one(value, newValue, upsert=True)
     # Remove tmp directory if we shortened KernelNames
-    if Extractionlvl < 5:
-        shutil.rmtree(newfilepath_h)
+    # if Extractionlvl < 5:
+    #     shutil.rmtree(newfilepath_h)
     print("{} collections added.".format(i))
     print("Workload name uploaded")

From 2948f73ae889df82bbc78db78ab0cfa77ef45e68 Mon Sep 17 00:00:00 2001
From: josantos <josantos@amd.com>
Date: Wed, 9 Aug 2023 10:46:57 -0500
Subject: [PATCH 62/81] keep converter in csv_converter.py

Signed-off-by: josantos <josantos@amd.com>
---
 src/omniperf               |  74 +------------------
 src/utils/csv_converter.py | 142 ++++++++++++++++++-------------------
 2 files changed, 73 insertions(+), 143 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index 6232ed67b..acdfbc3bf 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -262,76 +262,6 @@ def mongo_import(args, profileAndImport):
     csv_converter.convert_folder(connectionInfo, Extractionlvl)
     print("-- Complete! --")
 
-def kernel_name_shortener(df, cache, level):
-    if level >= 5:
-        return df
-
-    columnName = ""
-    if "KernelName" in df:
-        columnName = "KernelName"
-    if "Name" in df:
-        columnName = "Name"
-
-    if columnName == "KernelName" or columnName == "Name":
-        # loop through all indices
-        for index in df.index:
-            original_name = df.loc[index, columnName]
-            if original_name in cache:
-                continue
-
-            # cache miss, add the shortened name to the dictionary
-            new_name = ""
-            matches = ""
-
-            names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
-
-            # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
-            if names_and_args.search(original_name):
-                matches = names_and_args.findall(original_name)
-            else:
-                # Works for first case  '__amd_rocclr_fillBuffer.kd'
-                # remove .kd and then parse through original regex
-                first_case = re.compile(r"([^\s]+)(.kd)")
-                Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
-                interim_name = first_case.search(original_name).group(1)
-                matches = Mod_name_and_args.findall(interim_name)
-
-            current_level = 0
-            for name in matches:
-                ##can cause errors if a function name or argument is equal to 'clone'
-                if name[0] == "clone":
-                    continue
-                if len(name) == 3:
-                    if name[2] == "::":
-                        continue
-
-                if current_level < level:
-                    new_name += name[0]
-                # closing '>' is to be taken account by the while loop
-                if name[1].count(">") == 0:
-                    if current_level < level:
-                        if not (current_level == level - 1 and name[1].count("<") > 0):
-                            new_name += name[1]
-                    current_level += name[1].count("<")
-
-                curr_index = 0
-                # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
-                while name[1].count(">") > 0 and curr_index < len(name[1]):
-                    if current_level < level:
-                        new_name += name[1][curr_index:]
-                        current_level -= name[1][curr_index:].count(">")
-                        curr_index = len(name[1])
-                    elif name[1][curr_index] == (">"):
-                        current_level -= 1
-                    curr_index += 1
-
-            cache[original_name] = new_name
-            if new_name == None or new_name == "":
-                cache[original_name] = original_name
-
-        df[columnName] = df[columnName].map(cache)
-
-    return df
 ################################################
 # Roofline Helpers
 ################################################
@@ -860,7 +790,7 @@ def main():
                                 engine="python",
                             )
 
-                            t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=Extractionlvl)
                     except pd.errors.EmptyDataError:
                         print("Skipping empty csv " + file)
 
@@ -881,7 +811,7 @@ def main():
                                 engine="python",
                             )
 
-                            t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=Extractionlvl)
                     except pd.errors.EmptyDataError:
                         print("Skipping empty csv " + file)
 
diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index 9709ab9d8..34f2e8261 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -33,81 +33,81 @@
 from tqdm import tqdm
 import shutil
 
-# cache = dict()
+cache = dict()
 supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
 MAX_SERVER_SEL_DELAY = 5000  # 5 sec connection timeout
 
 
-# def kernel_name_shortener(df, cache, level):
-#     if level >= 5:
-#         return df
-
-#     columnName = ""
-#     if "KernelName" in df:
-#         columnName = "KernelName"
-#     if "Name" in df:
-#         columnName = "Name"
-
-#     if columnName == "KernelName" or columnName == "Name":
-#         # loop through all indices
-#         for index in df.index:
-#             original_name = df.loc[index, columnName]
-#             if original_name in cache:
-#                 continue
-
-#             # cache miss, add the shortened name to the dictionary
-#             new_name = ""
-#             matches = ""
-
-#             names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
-
-#             # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
-#             if names_and_args.search(original_name):
-#                 matches = names_and_args.findall(original_name)
-#             else:
-#                 # Works for first case  '__amd_rocclr_fillBuffer.kd'
-#                 # remove .kd and then parse through original regex
-#                 first_case = re.compile(r"([^\s]+)(.kd)")
-#                 Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
-#                 interim_name = first_case.search(original_name).group(1)
-#                 matches = Mod_name_and_args.findall(interim_name)
-
-#             current_level = 0
-#             for name in matches:
-#                 ##can cause errors if a function name or argument is equal to 'clone'
-#                 if name[0] == "clone":
-#                     continue
-#                 if len(name) == 3:
-#                     if name[2] == "::":
-#                         continue
-
-#                 if current_level < level:
-#                     new_name += name[0]
-#                 # closing '>' is to be taken account by the while loop
-#                 if name[1].count(">") == 0:
-#                     if current_level < level:
-#                         if not (current_level == level - 1 and name[1].count("<") > 0):
-#                             new_name += name[1]
-#                     current_level += name[1].count("<")
-
-#                 curr_index = 0
-#                 # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
-#                 while name[1].count(">") > 0 and curr_index < len(name[1]):
-#                     if current_level < level:
-#                         new_name += name[1][curr_index:]
-#                         current_level -= name[1][curr_index:].count(">")
-#                         curr_index = len(name[1])
-#                     elif name[1][curr_index] == (">"):
-#                         current_level -= 1
-#                     curr_index += 1
-
-#             cache[original_name] = new_name
-#             if new_name == None or new_name == "":
-#                 cache[original_name] = original_name
-
-#         df[columnName] = df[columnName].map(cache)
-
-#     return df
+def kernel_name_shortener(df, cache, level):
+    if level >= 5:
+        return df
+
+    columnName = ""
+    if "KernelName" in df:
+        columnName = "KernelName"
+    if "Name" in df:
+        columnName = "Name"
+
+    if columnName == "KernelName" or columnName == "Name":
+        # loop through all indices
+        for index in df.index:
+            original_name = df.loc[index, columnName]
+            if original_name in cache:
+                continue
+
+            # cache miss, add the shortened name to the dictionary
+            new_name = ""
+            matches = ""
+
+            names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
+
+            # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
+            if names_and_args.search(original_name):
+                matches = names_and_args.findall(original_name)
+            else:
+                # Works for first case  '__amd_rocclr_fillBuffer.kd'
+                # remove .kd and then parse through original regex
+                first_case = re.compile(r"([^\s]+)(.kd)")
+                Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
+                interim_name = first_case.search(original_name).group(1)
+                matches = Mod_name_and_args.findall(interim_name)
+
+            current_level = 0
+            for name in matches:
+                ##can cause errors if a function name or argument is equal to 'clone'
+                if name[0] == "clone":
+                    continue
+                if len(name) == 3:
+                    if name[2] == "::":
+                        continue
+
+                if current_level < level:
+                    new_name += name[0]
+                # closing '>' is to be taken account by the while loop
+                if name[1].count(">") == 0:
+                    if current_level < level:
+                        if not (current_level == level - 1 and name[1].count("<") > 0):
+                            new_name += name[1]
+                    current_level += name[1].count("<")
+
+                curr_index = 0
+                # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
+                while name[1].count(">") > 0 and curr_index < len(name[1]):
+                    if current_level < level:
+                        new_name += name[1][curr_index:]
+                        current_level -= name[1][curr_index:].count(">")
+                        curr_index = len(name[1])
+                    elif name[1][curr_index] == (">"):
+                        current_level -= 1
+                    curr_index += 1
+
+            cache[original_name] = new_name
+            if new_name == None or new_name == "":
+                cache[original_name] = original_name
+
+        df[columnName] = df[columnName].map(cache)
+
+    return df
 
 
 # Verify target directory and setup connection

From 59d77f9d8127ac6734ab11736023dbeb169e12c2 Mon Sep 17 00:00:00 2001
From: josantos <josantos@amd.com>
Date: Thu, 10 Aug 2023 11:13:27 -0500
Subject: [PATCH 63/81] Names shortened/demangled after join_prof

-  Added kernelVerbose flag in profile_group
-  Added KernelVerbose flag in analyze_group
-  Analyze replaces csv with shortened/demangled name
-  csv_converter uses llvm-cxxfilt

Signed-off-by: josantos <josantos@amd.com>
---
 src/omniperf                             | 67 +++++++++++++++---------
 src/omniperf_analyze/omniperf_analyze.py | 10 ++++
 src/parser.py                            | 18 +++++++
 src/utils/csv_converter.py               | 26 +++++----
 4 files changed, 86 insertions(+), 35 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index acdfbc3bf..4195ca64a 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -446,6 +446,21 @@ def characterize_app(args, VER):
     # Manually join each pmc_perf*.csv output
     if args.use_rocscope == False:
         join_prof(workload_dir, args.join_type, log, args.verbose)
+        #demangle
+        for filename in os.listdir(workload_dir):
+                try:
+                    # fileName = file[0 : file.find(".")]
+                    # Only shorten KernelNames if instructed to
+                    if args.kernelVerbose < 5:
+                        t1 = pd.read_csv(
+                            os.path.join(workload_dir, filename),
+                            on_bad_lines="skip",
+                            engine="python",
+                        )
+                        t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose)
+                        t2.to_csv(fname, index=False)
+                except pd.errors.EmptyDataError:
+                    print("Skipping empty csv " + filename)
 
     # Close log
     log.close()
@@ -660,6 +675,7 @@ def omniperf_profile(args, VER):
                 run_rocscope(args, fname)
             else:
                 run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose)
+                
 
         # Update timestamps
         replace_timestamps(workload_dir, log)
@@ -667,6 +683,22 @@ def omniperf_profile(args, VER):
         # Manually join each pmc_perf*.csv output
         if args.use_rocscope == False:
             join_prof(workload_dir, args.join_type, log, args.verbose)
+            #demangle
+            for filename in os.listdir(workload_dir):
+                if filename.endswith('.csv'):
+                    try:
+                        # fileName = file[0 : file.find(".")]
+                        # Only shorten KernelNames if instructed to
+                        if args.kernelVerbose < 5:
+                            t1 = pd.read_csv(
+                                os.path.join(workload_dir, filename),
+                                on_bad_lines="skip",
+                                engine="python",
+                            )
+                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose)
+                            t2.to_csv(os.path.join(workload_dir, filename), index=False)
+                    except pd.errors.EmptyDataError:
+                        print("Skipping empty csv " + filename)
 
     # Generate sysinfo
     gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof)
@@ -739,7 +771,7 @@ def main():
     # PROFILE MODE
     ##############
     if args.mode == "profile":
-        Extractionlvl = 3 #args.extraction_level
+        Extractionlvl = args.kernelVerbose
         print("Resolving rocprof")
         resolve_rocprof()
         # Cannot access parent directories
@@ -777,43 +809,28 @@ def main():
             roof_setup(args, my_parser, VER)
             # Generate roofline
             roofline_only(args.path, args.device, args.sort, args.mem_level, args.kernel_names, args.verbose)
-            #demangle
-            for file in os.listdir(args.path):
-                if file.endswith(".csv"):
-                    try:
-                        fileName = file[0 : file.find(".")]
-                        # Only shorten KernelNames if instructed to
-                        if Extractionlvl < 5:
-                            t1 = pd.read_csv(
-                                os.listdir(args.path) + "/" + file,
-                                on_bad_lines="skip",
-                                engine="python",
-                            )
-
-                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=Extractionlvl)
-                    except pd.errors.EmptyDataError:
-                        print("Skipping empty csv " + file)
 
         # Profile only
         else:
             print("\n-------------\nProfile only\n-------------\n")
             omniperf_profile(args, VER)
+            workload_dir = args.path
             #demangle
-            for file in os.listdir(args.path):
-                if file.endswith(".csv"):
+            for filename in os.listdir(workload_dir):
+                if filename.endswith('.csv'):
                     try:
-                        fileName = file[0 : file.find(".")]
+                        # fileName = file[0 : file.find(".")]
                         # Only shorten KernelNames if instructed to
-                        if Extractionlvl < 5:
+                        if args.kernelVerbose < 5:
                             t1 = pd.read_csv(
-                                os.listdir(args.path) + "/" + file,
+                                os.path.join(workload_dir, filename),
                                 on_bad_lines="skip",
                                 engine="python",
                             )
-
-                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=Extractionlvl)
+                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose)
+                            t2.to_csv(os.path.join(workload_dir, filename), index=False)
                     except pd.errors.EmptyDataError:
-                        print("Skipping empty csv " + file)
+                        print("Skipping empty csv " + filename)
 
     ##############
     # DATABASE MODE
diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 87fac064d..2a657d3d3 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -45,6 +45,8 @@
 from pathlib import Path
 from omniperf_analyze.utils import parser, file_io
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
+from utils import csv_converter
+import pandas as pd
 
 archConfigs = {}
 
@@ -220,7 +222,15 @@ def run_cli(args, runs):
     # If we assume the panel layout for all archs are similar, it doesn't matter
     # which archConfig passed into show_all function.
     # After decide to how to manage kernels display patterns, we can revisit it.
+    cache =dict()
     for d in args.path:
+        #demangle
+        for filename in os.listdir(d[0]):
+            if filename.endswith('.csv'):
+                df = pd.read_csv(os.path.join(d[0],filename))
+                new_df = csv_converter.kernel_name_shortener(df, cache, args.kernelVerbose)
+                new_df.to_csv(os.path.join(d[0],filename), index=False)
+            
         file_io.create_df_kernel_top_stats(
             d[0],
             runs[d[0]].filter_gpu_ids,
diff --git a/src/parser.py b/src/parser.py
index 9d6dd8f6f..8de09542f 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -204,6 +204,15 @@ def parse(my_parser):
         nargs=argparse.REMAINDER,
         help="\t\t\tProvide command for profiling after double dash.",
     )
+    profile_group.add_argument(
+        "-f",
+        "--kernelVerbose",
+        required=False,
+        metavar="",
+        help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)",
+        default=2,
+        type=int,
+    )
 
     ## Roofline Command Line Options
     roofline_group.add_argument(
@@ -514,3 +523,12 @@ def parse(my_parser):
         action="store_true",
         help="\t\tRandomly generate a port to launch GUI application.\n\t\tRegistered Ports range inclusive (1024-49151).",
     )
+    analyze_group.add_argument(
+        "-f",
+        "--kernelVerbose",
+        required=False,
+        metavar="",
+        help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)",
+        default=2,
+        type=int,
+    )
diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index 34f2e8261..35bc5a649 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -25,6 +25,7 @@
 import argparse
 import collections
 import os
+import subprocess
 import sys
 import re
 import pandas as pd
@@ -33,11 +34,11 @@
 from tqdm import tqdm
 import shutil
 
+
 cache = dict()
 supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
 MAX_SERVER_SEL_DELAY = 5000  # 5 sec connection timeout
 
-
 def kernel_name_shortener(df, cache, level):
     if level >= 5:
         return df
@@ -55,6 +56,13 @@ def kernel_name_shortener(df, cache, level):
             if original_name in cache:
                 continue
 
+            cmd = ["llvm-cxxfilt", original_name]
+
+            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+            demangled_name, e = proc.communicate()
+            demangled_name = str(demangled_name, "UTF-8").strip()
+
             # cache miss, add the shortened name to the dictionary
             new_name = ""
             matches = ""
@@ -62,15 +70,14 @@ def kernel_name_shortener(df, cache, level):
             names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
 
             # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
-            if names_and_args.search(original_name):
-                matches = names_and_args.findall(original_name)
+            if names_and_args.search(demangled_name):
+                matches = names_and_args.findall(demangled_name)
             else:
                 # Works for first case  '__amd_rocclr_fillBuffer.kd'
-                # remove .kd and then parse through original regex
-                first_case = re.compile(r"([^\s]+)(.kd)")
-                Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
-                interim_name = first_case.search(original_name).group(1)
-                matches = Mod_name_and_args.findall(interim_name)
+                cache[original_name] = new_name
+                if new_name == None or new_name == "":
+                    cache[original_name] = demangled_name
+                continue
 
             current_level = 0
             for name in matches:
@@ -103,13 +110,12 @@ def kernel_name_shortener(df, cache, level):
 
             cache[original_name] = new_name
             if new_name == None or new_name == "":
-                cache[original_name] = original_name
+                cache[original_name] = demangled_name
 
         df[columnName] = df[columnName].map(cache)
 
     return df
 
-
 # Verify target directory and setup connection
 def parse(args, profileAndExport):
     host = args.host

From bbb254ef43c0e0f01e4c52d786501ac45ed4111d Mon Sep 17 00:00:00 2001
From: josantos <josantos@amd.com>
Date: Thu, 10 Aug 2023 11:16:50 -0500
Subject: [PATCH 64/81] reformatting

Signed-off-by: josantos <josantos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py | 16 +++++++++-------
 src/utils/csv_converter.py               | 22 ++++++++++++----------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 2a657d3d3..123bdd15a 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -222,15 +222,17 @@ def run_cli(args, runs):
     # If we assume the panel layout for all archs are similar, it doesn't matter
     # which archConfig passed into show_all function.
     # After decide to how to manage kernels display patterns, we can revisit it.
-    cache =dict()
+    cache = dict()
     for d in args.path:
-        #demangle
+        # demangle
         for filename in os.listdir(d[0]):
-            if filename.endswith('.csv'):
-                df = pd.read_csv(os.path.join(d[0],filename))
-                new_df = csv_converter.kernel_name_shortener(df, cache, args.kernelVerbose)
-                new_df.to_csv(os.path.join(d[0],filename), index=False)
-            
+            if filename.endswith(".csv"):
+                df = pd.read_csv(os.path.join(d[0], filename))
+                new_df = csv_converter.kernel_name_shortener(
+                    df, cache, args.kernelVerbose
+                )
+                new_df.to_csv(os.path.join(d[0], filename), index=False)
+
         file_io.create_df_kernel_top_stats(
             d[0],
             runs[d[0]].filter_gpu_ids,
diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index 35bc5a649..48726bf03 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -39,6 +39,7 @@
 supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
 MAX_SERVER_SEL_DELAY = 5000  # 5 sec connection timeout
 
+
 def kernel_name_shortener(df, cache, level):
     if level >= 5:
         return df
@@ -116,6 +117,7 @@ def kernel_name_shortener(df, cache, level):
 
     return df
 
+
 # Verify target directory and setup connection
 def parse(args, profileAndExport):
     host = args.host
@@ -243,16 +245,16 @@ def convert_folder(connectionInfo, Extractionlvl):
                 #     os.system(cmd)
                 # else:
                 cmd = (
-                        "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
-                    ).format(
-                        connectionInfo["username"],
-                        connectionInfo["password"],
-                        connectionInfo["host"],
-                        connectionInfo["port"],
-                        connectionInfo["db"],
-                        connectionInfo["workload"] + "/" + file,
-                        fileName,
-                    )
+                    "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
+                ).format(
+                    connectionInfo["username"],
+                    connectionInfo["password"],
+                    connectionInfo["host"],
+                    connectionInfo["port"],
+                    connectionInfo["db"],
+                    connectionInfo["workload"] + "/" + file,
+                    fileName,
+                )
                 os.system(cmd)
                 i += 1
             except pd.errors.EmptyDataError:

From 73933572afd49829c2128a49323e3db28708b2aa Mon Sep 17 00:00:00 2001
From: josantos <josantos@amd.com>
Date: Thu, 10 Aug 2023 13:15:44 -0500
Subject: [PATCH 65/81] add llvm to github action container

Signed-off-by: josantos <josantos@amd.com>
---
 .github/workflows/ubuntu-focal.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml
index d72aaad70..f945b7f13 100644
--- a/.github/workflows/ubuntu-focal.yml
+++ b/.github/workflows/ubuntu-focal.yml
@@ -30,6 +30,7 @@ jobs:
           sudo apt-get install -y git
           sudo apt-get install -y python3-pip
           sudo apt-get install -y cmake
+          sudo apt-get install llvm-7
       - name: Checkout
         uses: actions/checkout@v3
       - name: Install Python prereqs

From 802308cd28fdc30355242804c360d5c1f53a29d2 Mon Sep 17 00:00:00 2001
From: josantos <josantos@amd.com>
Date: Thu, 10 Aug 2023 13:23:52 -0500
Subject: [PATCH 66/81] use llvm-cxxfilt in /opt/rocm

Signed-off-by: josantos <josantos@amd.com>
---
 src/utils/csv_converter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index 48726bf03..bffe56cc5 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -57,7 +57,7 @@ def kernel_name_shortener(df, cache, level):
             if original_name in cache:
                 continue
 
-            cmd = ["llvm-cxxfilt", original_name]
+            cmd = ["/opt/rocm/llvm/bin/llvm-cxxfilt", original_name]
 
             proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 

From 878948e848a9ccb74d24b37cfe43891310cd7915 Mon Sep 17 00:00:00 2001
From: josantos <josantos@amd.com>
Date: Thu, 10 Aug 2023 13:25:11 -0500
Subject: [PATCH 67/81] removing broken install in gh actions container

Signed-off-by: josantos <josantos@amd.com>
---
 .github/workflows/ubuntu-focal.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml
index f945b7f13..d72aaad70 100644
--- a/.github/workflows/ubuntu-focal.yml
+++ b/.github/workflows/ubuntu-focal.yml
@@ -30,7 +30,6 @@ jobs:
           sudo apt-get install -y git
           sudo apt-get install -y python3-pip
           sudo apt-get install -y cmake
-          sudo apt-get install llvm-7
       - name: Checkout
         uses: actions/checkout@v3
       - name: Install Python prereqs

From bfbd5bbb3a4d09afefd2449c5f331ff43f627ce6 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Mon, 7 Aug 2023 11:23:25 -0500
Subject: [PATCH 68/81] Adding min/max/avg breakdown across dispatches for
 instruction mix panels

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../1000_compute-unit-instruction-mix.yaml    | 20 +++++++++----------
 .../1000_compute-unit-instruction-mix.yaml    |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index c9e8edd38..7e2a536d1 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -36,8 +36,8 @@ Panel Config:
             tips: 
           LDS:
             avg: AVG((SQ_INSTS_LDS / $denom))
-            min: AVG((SQ_INSTS_LDS / $denom))
-            max: AVG((SQ_INSTS_LDS / $denom))
+            min: MIN((SQ_INSTS_LDS / $denom))
+            max: MAX((SQ_INSTS_LDS / $denom))
             unit: (instr + $normUnit)
             tips: 
           VALU - MFMA:
@@ -48,26 +48,26 @@ Panel Config:
             tips: 
           SALU:
             avg: AVG((SQ_INSTS_SALU / $denom))
-            min: AVG((SQ_INSTS_SALU / $denom))
-            max: AVG((SQ_INSTS_SALU / $denom))
+            min: MIN((SQ_INSTS_SALU / $denom))
+            max: MAX((SQ_INSTS_SALU / $denom))
             unit: (instr + $normUnit)
             tips: 
           SMEM:
             avg: AVG((SQ_INSTS_SMEM / $denom))
-            min: AVG((SQ_INSTS_SMEM / $denom))
-            max: AVG((SQ_INSTS_SMEM / $denom))
+            min: MIN((SQ_INSTS_SMEM / $denom))
+            max: MAX((SQ_INSTS_SMEM / $denom))
             unit: (instr + $normUnit)
             tips: 
           Branch:
             avg: AVG((SQ_INSTS_BRANCH / $denom))
-            min: AVG((SQ_INSTS_BRANCH / $denom))
-            max: AVG((SQ_INSTS_BRANCH / $denom))
+            min: MIN((SQ_INSTS_BRANCH / $denom))
+            max: MAX((SQ_INSTS_BRANCH / $denom))
             unit: (instr + $normUnit)
             tips: 
           GDS:
             avg: AVG((SQ_INSTS_GDS / $denom))
-            min: AVG((SQ_INSTS_GDS / $denom))
-            max: AVG((SQ_INSTS_GDS / $denom))
+            min: MIN((SQ_INSTS_GDS / $denom))
+            max: MAX((SQ_INSTS_GDS / $denom))
             unit: (instr + $normUnit)
             tips: 
 
diff --git a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
index 1a05a8042..75eca82b8 100644
--- a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
@@ -30,8 +30,8 @@ Panel Config:
             tips: 
           VMEM:
             avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
-            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
-            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
+            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
             unit: (instr + $normUnit)
             tips: 
           LDS:

From 5dac9042e051a704489ec0819e4897852a080a88 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <87447437+JoseSantosAMD@users.noreply.github.com>
Date: Thu, 10 Aug 2023 15:15:42 -0500
Subject: [PATCH 69/81] Update 1000_compute-unit-instruction-mix.yaml

Signed-off-by: JoseSantosAMD <87447437+JoseSantosAMD@users.noreply.github.com>
---
 .../configs/gfx908/1000_compute-unit-instruction-mix.yaml     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
index 75eca82b8..1a05a8042 100644
--- a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
@@ -30,8 +30,8 @@ Panel Config:
             tips: 
           VMEM:
             avg: AVG(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
-            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
-            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom)
+            min: MIN(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
+            max: MAX(((SQ_INSTS_VMEM - SQ_INSTS_FLAT_LDS_ONLY) / $denom))
             unit: (instr + $normUnit)
             tips: 
           LDS:

From b82f033bf9556a8ba675cf0c898d116446901597 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Fri, 11 Aug 2023 15:46:46 -0500
Subject: [PATCH 70/81] Added min/max/avg to VALU Arithmetic Instr Mix

-  gui now runs on archconfigs from sys_info

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../1000_compute-unit-instruction-mix.yaml    | 64 ++++++++++++++-----
 .../1000_compute-unit-instruction-mix.yaml    | 64 ++++++++++++++-----
 .../1000_compute-unit-instruction-mix.yaml    | 64 ++++++++++++++-----
 .../1100_compute-unit-compute-pipeline.yaml   | 56 +++-------------
 src/omniperf_analyze/omniperf_analyze.py      |  2 +-
 src/omniperf_analyze/utils/gui.py             | 10 +--
 6 files changed, 158 insertions(+), 102 deletions(-)

diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
index 7e2a536d1..679acc34d 100644
--- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml
@@ -76,7 +76,9 @@ Panel Config:
         title: VALU Arithmetic Instr Mix
         header:
           metric: Metric
-          count: Count
+          avg: Avg
+          min: Min
+          max: Max
           unit: Unit
           tips: Tips
         style: 
@@ -84,63 +86,93 @@ Panel Config:
           label_txt: (# of instr + $normUnit)
         metric:
           INT-32:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           INT-64:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F16-ADD:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F16-Mult:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F16-FMA:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F16-Trans:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F32-ADD:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F32-Mult:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F32-FMA:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F32-Trans:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F64-ADD:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F64-Mult:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F64-FMA:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           F64-Trans:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
           Conversion:
-            count: None # No perf counter
+            avg: None # No HW module
+            min: None # No HW module
+            max: None # No HW module
             unit: (instr + $normUnit)
             tips:
 
diff --git a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
index 1a05a8042..13c27dd20 100644
--- a/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1000_compute-unit-instruction-mix.yaml
@@ -76,7 +76,9 @@ Panel Config:
         title: VALU Arithmetic Instr Mix
         header:
           metric: Metric
-          count: Count
+          avg: Avg
+          min: Min
+          max: Max
           unit: Unit
           tips: Tips
         style: 
@@ -84,63 +86,93 @@ Panel Config:
           label_txt: (# of instr + $normUnit)
         metric:
           INT-32:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           INT-64:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F16-ADD:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F16-Mult:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F16-FMA:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F16-Trans:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F32-ADD:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F32-Mult:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F32-FMA:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F32-Trans:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F64-ADD:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F64-Mult:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F64-FMA:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           F64-Trans:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
           Conversion:
-            count: None # No perf counter
+            avg: None # No perf counter
+            min: None # No perf counter
+            max: None # No perf counter
             unit: (instr + $normUnit)
             tips:
 
diff --git a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
index 811ebca11..8ffd87d2c 100644
--- a/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1000_compute-unit-instruction-mix.yaml
@@ -76,7 +76,9 @@ Panel Config:
         title: VALU Arithmetic Instr Mix
         header:
           metric: Metric
-          count: Count
+          avg: Avg
+          min: Min
+          max: Max
           unit: Unit
           tips: Tips
         style: 
@@ -84,63 +86,93 @@ Panel Config:
           label_txt: (# of instr + $normUnit)
         metric:
           INT32:
-            count: AVG((SQ_INSTS_VALU_INT32 / $denom))
+            avg: AVG((SQ_INSTS_VALU_INT32 / $denom))
+            min: MIN((SQ_INSTS_VALU_INT32 / $denom))
+            max: MAX((SQ_INSTS_VALU_INT32 / $denom))
             unit: (instr + $normUnit)
             tips: 
           INT64:
-            count: AVG((SQ_INSTS_VALU_INT64 / $denom))
+            avg: AVG((SQ_INSTS_VALU_INT64 / $denom))
+            min: MIN((SQ_INSTS_VALU_INT64 / $denom))
+            max: MAX((SQ_INSTS_VALU_INT64 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F16-ADD:
-            count: AVG((SQ_INSTS_VALU_ADD_F16 / $denom))
+            avg: AVG((SQ_INSTS_VALU_ADD_F16 / $denom))
+            min: MIN((SQ_INSTS_VALU_ADD_F16 / $denom))
+            max: MAX((SQ_INSTS_VALU_ADD_F16 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F16-MUL:
-            count: AVG((SQ_INSTS_VALU_MUL_F16 / $denom))
+            avg: AVG((SQ_INSTS_VALU_MUL_F16 / $denom))
+            min: MIN((SQ_INSTS_VALU_MUL_F16 / $denom))
+            max: MAX((SQ_INSTS_VALU_MUL_F16 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F16-FMA:
-            count: AVG((SQ_INSTS_VALU_FMA_F16 / $denom))
+            avg: AVG((SQ_INSTS_VALU_FMA_F16 / $denom))
+            min: MIN((SQ_INSTS_VALU_FMA_F16 / $denom))
+            max: MAX((SQ_INSTS_VALU_FMA_F16 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F16-Trans:
-            count: AVG((SQ_INSTS_VALU_TRANS_F16 / $denom))
+            avg: AVG((SQ_INSTS_VALU_TRANS_F16 / $denom))
+            min: MIN((SQ_INSTS_VALU_TRANS_F16 / $denom))
+            max: MAX((SQ_INSTS_VALU_TRANS_F16 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F32-ADD:
-            count: AVG((SQ_INSTS_VALU_ADD_F32 / $denom))
+            avg: AVG((SQ_INSTS_VALU_ADD_F32 / $denom))
+            min: MIN((SQ_INSTS_VALU_ADD_F32 / $denom))
+            max: MAX((SQ_INSTS_VALU_ADD_F32 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F32-MUL:
-            count: AVG((SQ_INSTS_VALU_MUL_F32 / $denom))
+            avg: AVG((SQ_INSTS_VALU_MUL_F32 / $denom))
+            min: MIN((SQ_INSTS_VALU_MUL_F32 / $denom))
+            max: MAX((SQ_INSTS_VALU_MUL_F32 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F32-FMA:
-            count: AVG((SQ_INSTS_VALU_FMA_F32 / $denom))
+            avg: AVG((SQ_INSTS_VALU_FMA_F32 / $denom))
+            min: MIN((SQ_INSTS_VALU_FMA_F32 / $denom))
+            max: MAX((SQ_INSTS_VALU_FMA_F32 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F32-Trans:
-            count: AVG((SQ_INSTS_VALU_TRANS_F32 / $denom))
+            avg: AVG((SQ_INSTS_VALU_TRANS_F32 / $denom))
+            min: MIN((SQ_INSTS_VALU_TRANS_F32 / $denom))
+            max: MAX((SQ_INSTS_VALU_TRANS_F32 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F64-ADD:
-            count: AVG((SQ_INSTS_VALU_ADD_F64 / $denom))
+            avg: AVG((SQ_INSTS_VALU_ADD_F64 / $denom))
+            min: MIN((SQ_INSTS_VALU_ADD_F64 / $denom))
+            max: MAX((SQ_INSTS_VALU_ADD_F64 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F64-MUL:
-            count: AVG((SQ_INSTS_VALU_MUL_F64 / $denom))
+            avg: AVG((SQ_INSTS_VALU_MUL_F64 / $denom))
+            min: MIN((SQ_INSTS_VALU_MUL_F64 / $denom))
+            max: MAX((SQ_INSTS_VALU_MUL_F64 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F64-FMA:
-            count: AVG((SQ_INSTS_VALU_FMA_F64 / $denom))
+            avg: AVG((SQ_INSTS_VALU_FMA_F64 / $denom))
+            min: MIN((SQ_INSTS_VALU_FMA_F64 / $denom))
+            max: MAX((SQ_INSTS_VALU_FMA_F64 / $denom))
             unit: (instr + $normUnit)
             tips: 
           F64-Trans:
-            count: AVG((SQ_INSTS_VALU_TRANS_F64 / $denom))
+            avg: AVG((SQ_INSTS_VALU_TRANS_F64 / $denom))
+            min: MIN((SQ_INSTS_VALU_TRANS_F64 / $denom))
+            max: MAX((SQ_INSTS_VALU_TRANS_F64 / $denom))
             unit: (instr + $normUnit)
             tips: 
           Conversion:
-            count: AVG((SQ_INSTS_VALU_CVT / $denom))
+            avg: AVG((SQ_INSTS_VALU_CVT / $denom))
+            min: MIN((SQ_INSTS_VALU_CVT / $denom))
+            max: MAX((SQ_INSTS_VALU_CVT / $denom))
             unit: (instr + $normUnit)
             tips: 
 
diff --git a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
index 301217fde..770087569 100644
--- a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
@@ -13,30 +13,11 @@ Panel Config:
         title: Speed-of-Light
         header:
           metric: Metric
-          avg: Avg
-          min: Min
-          max: Max
+          value: Value
           tips: Tips
-        style:
-          type: simple_bar
-          range_color: [1, 100]
-          label_txt: (%)
-          xrange: [0, 110]
         metric:
           valu_flops_pop:
-            avg: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
-              + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
-              + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
-              + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
-              + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
-              * $numCU) * 64) * 2) / 1000))
-            min: ((100 * MIN(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
-              + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
-              + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
-              + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
-              + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
-              * $numCU) * 64) * 2) / 1000))
-            max: ((100 * MAX(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
+            value: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)
               + SQ_INSTS_VALU_TRANS_F16) + (2 * SQ_INSTS_VALU_FMA_F16))) + (64 * (((SQ_INSTS_VALU_ADD_F32
               + SQ_INSTS_VALU_MUL_F32) + SQ_INSTS_VALU_TRANS_F32) + (2 * SQ_INSTS_VALU_FMA_F32))))
               + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
@@ -44,43 +25,23 @@ Panel Config:
               * $numCU) * 64) * 2) / 1000))
             tips: 
           mfma_flops_bf16_pop:
-            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 512) / 1000))
-            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 512) / 1000))
-            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
+            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 512) / 1000))
             tips: 
           mfma_flops_f16_pop:
-            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 1024) / 1000))
-            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 1024) / 1000))
-            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
+            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 1024) / 1000))
             tips: 
           mfma_flops_f32_pop:
-            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 256) / 1000))
-            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 256) / 1000))
-            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
+            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 256) / 1000))
             tips: 
           mfma_flops_f64_pop:
-            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 256) / 1000))
-            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 256) / 1000))
-            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
+            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 256) / 1000))
             tips: 
           mfma_flops_i8_pop:
-            avg: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 1024) / 1000))
-            min: ((100 * MIN(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
-              / ((($sclk * $numCU) * 1024) / 1000))
-            max: ((100 * MAX(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
+            value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 1024) / 1000))
             tips: 
 
@@ -226,5 +187,4 @@ Panel Config:
             max: MAX((((64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
               + (SQ_INSTS_VALU_FMA_F64 * 2))) + (512 * SQ_INSTS_VALU_MFMA_MOPS_F64)) / $denom))
             unit: (OPs  + $normUnit)
-            tips: 
-
+            tips: 
\ No newline at end of file
diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 87fac064d..8d94b37d0 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -195,7 +195,7 @@ def run_gui(args, runs):
         gui.build_layout(
             app,
             runs,
-            archConfigs["gfx90a"],
+            archConfigs[runs[args.path[0][0]].sys_info.iloc[0]["gpu_soc"]],
             input_filters,
             args.decimal,
             args.time_unit,
diff --git a/src/omniperf_analyze/utils/gui.py b/src/omniperf_analyze/utils/gui.py
index ca05bd3ea..4128bf5ad 100644
--- a/src/omniperf_analyze/utils/gui.py
+++ b/src/omniperf_analyze/utils/gui.py
@@ -154,17 +154,17 @@ def build_bar_chart(display_df, table_config, norm_filt):
 
     # Insr Mix bar chart
     if table_config["id"] in barchart_elements["instr_mix"]:
-        display_df["Count"] = [
-            x.astype(int) if x != "" else int(0) for x in display_df["Count"]
+        display_df["Avg"] = [
+            x.astype(int) if x != "" else int(0) for x in display_df["Avg"]
         ]
         df_unit = display_df["Unit"][0]
         d_figs.append(
             px.bar(
                 display_df,
-                x="Count",
+                x="Avg",
                 y="Metric",
-                color="Count",
-                labels={"Count": "# of {}".format(df_unit.lower())},
+                color="Avg",
+                labels={"Avg": "# of {}".format(df_unit.lower())},
                 height=400,
                 orientation="h",
             )

From e25d61dcb75ce6733bb806a32a7c1dfb091c4784 Mon Sep 17 00:00:00 2001
From: josantos <josantos@amd.com>
Date: Mon, 14 Aug 2023 13:15:15 -0500
Subject: [PATCH 71/81] add hidden min/max to instr mix panels

Signed-off-by: josantos <josantos@amd.com>
---
 dashboards/Omniperf_v1.0.8_pub.json | 689 ++++++++++++++--------------
 1 file changed, 351 insertions(+), 338 deletions(-)

diff --git a/dashboards/Omniperf_v1.0.8_pub.json b/dashboards/Omniperf_v1.0.8_pub.json
index 0412ba914..72df97ef3 100644
--- a/dashboards/Omniperf_v1.0.8_pub.json
+++ b/dashboards/Omniperf_v1.0.8_pub.json
@@ -24,8 +24,8 @@
   "editable": true,
   "fiscalYearStartMonth": 0,
   "graphTooltip": 0,
-  "id": 43,
-  "iteration": 1684189070197,
+  "id": 2,
+  "iteration": 1692036465764,
   "links": [],
   "liveNow": false,
   "panels": [
@@ -58,7 +58,8 @@
                 "mode": "absolute",
                 "steps": [
                   {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                   },
                   {
                     "color": "red",
@@ -339,7 +340,7 @@
               "hide": false,
               "rawQuery": true,
               "refId": "A",
-              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n          \"valu_flops_val\": {\n            \"$avg\": { \"$divide\": [ { \"$add\": [\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n            ]}\n            ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n          }},\n    \n          \"valu_intOps_val\": {\n            \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n          },\n    \n          \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n    \n    \n          \"salu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"valu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"mfma_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n          },\n\n    \n          \"lds_bconf\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    \"&SQ_LDS_BANK_CONFLICT\",\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                                null\n                              ]\n                    }     \n          },\n    \n   \n          \"lds_bw\":  {\n            \"$avg\": {\"$divide\":[ \n                        { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                            4,  \n                                            {\"$toInt\": \"$L2Banks\"}\n                                        ]}, \n                        {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                    ]}\n         },\n    \n          \"lds_bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                    4,  \n                                                    {\"$toInt\": \"$L2Banks\"}\n                                                ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},            \n                             {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n    \n          \"unpredthreads_val\": {\n              \"$avg\": {\n                  \"$cond\": [\n                      {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n                      { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n                      null\n                  ]\n              }\n          },\n    \n          \"ipcIssue_val\": {\n              \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n          },\n          \"eaWriteLat_val\": {\n            \"$avg\": {\n                \"$cond\": [\n                  {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n                  { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n                  null                        \n                ]\n            }\n            },\n            \"eaReadLat_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                      {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n                      { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n                      null                        \n                    ]\n                }\n            },\n            \"eaWriteBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            }, \n            \"eaReadBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            },\n            \"l2_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n                        null                          \n                    ]\n                }\n            },\n            \"vecl1_cacheHits_val\": {\n                \"$avg\":  {\n                    \"$cond\": [\n                        {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n                        { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]  }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n                        null\n                    ]\n                }\n            },\n            \"vecl1_BW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n            },\n            \"l1k_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n                        null\n                    ]\n                }\n            },\n            \"l1i_hitRate_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n            },\n            \"l1i_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            },\n            \"l1k_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            }\n            \n        }},\n      {\"$set\": {\n        \"array\": [\n            {\n              \"Metric\": \"VALU FLOPs\",\n              \"Value\": \"&valu_flops_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"VALU IOPs\",\n              \"Value\": \"&valu_intOps_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (BF16)\",\n              \"Value\": \"&mfma_flops_bf16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F16)\",\n              \"Value\": \"&mfma_flops_f16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F32)\",\n              \"Value\": \"&mfma_flops_f32_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F64)\",\n              \"Value\": \"&mfma_flops_f64_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA IOPs (Int8)\",\n              \"Value\": \"&mfma_flops_i8_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"Active CUs\",\n              \"Value\": $numActiveCUs,\n              \"Unit\": \"CUs\",\n              \"peak\": $numCU,\n              \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n            },\n    \n            {\n              \"Metric\": \"SALU Util\",\n              \"Value\": \"&salu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&salu_val\"\n            },\n            {\n              \"Metric\": \"VALU Util\",\n              \"Value\": \"&valu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&valu_val\"\n            },\n            {\n              \"Metric\": \"MFMA Util\",\n              \"Value\": \"&mfma_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&mfma_val\"\n            },\n            {\n              \"Metric\": \"VALU Active Threads/Wave\",\n              \"Value\": \"&unpredthreads_val\",\n              \"Unit\": \"Threads\",\n              \"peak\": 64,\n              \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n            },\n            {\n              \"Metric\": \"IPC - Issue\",\n              \"Value\": \"&ipcIssue_val\",\n              \"Unit\": \"Instr/cycle\",\n              \"peak\": 5,\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n            },\n            {\n              \"Metric\": \"LDS BW\",\n              \"Value\": \"&lds_bw\",\n              \"Unit\": \"GB/sec\",\n              \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n              \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n            },\n            {\n              \"Metric\": \"LDS Bank Conflict\",\n              \"Value\": \"&lds_bconf\",\n              \"Unit\": \"Conflicts/access\",\n              \"peak\": \"32\",\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n            },\n            {\n                \"Metric\": \"Instr Cache Hit Rate\",\n                \"Value\": \"&l1i_hitRate_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n            }, \n            {\n                \"Metric\": \"Instr Cache BW\",\n                \"Value\": \"&l1i_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache Hit Rate\",\n                \"Value\": \"&l1k_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache BW\",\n                \"Value\": \"&l1k_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\":  { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n\n            {\n                \"Metric\": \"Vector L1D Cache Hit Rate\",\n                \"Value\": \"&vecl1_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Vector L1D Cache BW\",\n                \"Value\": \"&vecl1_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n            },\n            {\n                \"Metric\": \"L2 Cache Hit Rate\",\n                \"Value\": \"&l2_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Read BW\",\n                \"Value\": \"&eaReadBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Write BW\",\n                \"Value\": \"&eaWriteBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Read Latency\",\n                \"Value\": \"&eaReadLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Write Latency\",\n                \"Value\": \"&eaWriteLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            }\n\n          ]\n      }},\n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }},\n      {\"$unionWith\": {\n            \"coll\": \"SQ_LEVEL_WAVES\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"waveOcc_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n                    },\n                    \"waveOcc_pop\": {\n                        \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n                    }\n\n                }},\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Wave Occupancy\",\n                            \"Value\": \"&waveOcc_val\",\n                            \"Unit\": \"Wavefronts\",\n                            \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n                            \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n                        }\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }},\n        {\"$unionWith\": {\n            \"coll\": \"SQ_IFETCH_LEVEL\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"instrFetchBW_val\": {\n                        \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n                    },\n                    \"instrFetchLat_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n                    }\n                }},\n\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Instr Fetch BW\",\n                            \"Value\": \"&instrFetchBW_val\",\n                            \"Unit\": \"GB/s\",\n                            \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n                            \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n                        },\n                        {\n                            \"Metric\": \"Instr Fetch Latency\",\n                            \"Value\": \"&instrFetchLat_val\",\n                            \"Unit\": \"Cycles\",\n                            \"peak\": \"\",\n                            \"Percent of Peak - PoP\": \"\"\n\n                        }\n\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }}\n\n    ]);",
+              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n          \"valu_flops_val\": {\n            \"$avg\": { \"$divide\": [ { \"$add\": [\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F16\", \"&SQ_INSTS_VALU_MUL_F16\", \"&SQ_INSTS_VALU_TRANS_F16\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F16\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F32\", \"&SQ_INSTS_VALU_MUL_F32\", \"&SQ_INSTS_VALU_TRANS_F32\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F32\"] }] }] },\n              { \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_ADD_F64\", \"&SQ_INSTS_VALU_MUL_F64\", \"&SQ_INSTS_VALU_TRANS_F64\", { \"$multiply\": [2, \"&SQ_INSTS_VALU_FMA_F64\"] }] }] }\n            ]}\n            ,{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }]\n          }},\n    \n          \"valu_intOps_val\": {\n            \"$avg\": { \"$divide\": [{ \"$multiply\": [64, { \"$add\": [\"&SQ_INSTS_VALU_INT32\", \"&SQ_INSTS_VALU_INT64\"] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n          },\n    \n          \"mfma_flops_f16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_bf16_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_BF16\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f32_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F32\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_f64_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_F64\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n          \"mfma_flops_i8_val\": { \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&SQ_INSTS_VALU_MFMA_MOPS_I8\", 512] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }  },\n    \n    \n          \"salu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_SCA\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"valu_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_ACTIVE_INST_VALU\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU] }] }\n          },\n    \n          \"mfma_val\": {\n              \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQ_VALU_MFMA_BUSY_CYCLES\"] }, { \"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU, 4] }] }\n          },\n\n    \n          \"lds_bconf\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    \"&SQ_LDS_BANK_CONFLICT\",\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                                null\n                              ]\n                    }     \n          },\n    \n   \n          \"lds_bw\":  {\n            \"$avg\": {\"$divide\":[ \n                        { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                            4,  \n                                            {\"$toInt\": \"$L2Banks\"}\n                                        ]}, \n                        {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                    ]}\n         },\n    \n          \"lds_bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                    4,  \n                                                    {\"$toInt\": \"$L2Banks\"}\n                                                ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},            \n                             {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n    \n          \"unpredthreads_val\": {\n              \"$avg\": {\n                  \"$cond\": [\n                      {\"$ne\":[\"&SQ_ACTIVE_INST_VALU\" , 0]},\n                      { \"$divide\": [\"&SQ_THREAD_CYCLES_VALU\", \"&SQ_ACTIVE_INST_VALU\"] },\n                      null\n                  ]\n              }\n          },\n    \n          \"ipcIssue_val\": {\n              \"$avg\": { \"$divide\": [{ \"$add\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_VMEM\", \"&SQ_INSTS_SALU\", \"&SQ_INSTS_SMEM\", \"&SQ_INSTS_GDS\", \"&SQ_INSTS_BRANCH\", \"&SQ_INSTS_SENDMSG\", \"&SQ_INSTS_VSKIPPED\"] }, \"&SQ_ACTIVE_INST_ANY\"] }\n          },\n          \"eaWriteLat_val\": {\n            \"$avg\": {\n                \"$cond\": [\n                  {\"$ne\": [\"&TCC_EA_WRREQ_sum\" , 0]},\n                  { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n                  null                        \n                ]\n            }\n            },\n            \"eaReadLat_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                      {\"$ne\": [\"&TCC_EA_RDREQ_sum\" , 0]},\n                      { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n                      null                        \n                    ]\n                }\n            },\n            \"eaWriteBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_WRREQ_64B_sum\", 64] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_WRREQ_sum\", \"&TCC_EA_WRREQ_64B_sum\"] }, 32] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            }, \n            \"eaReadBW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$add\": [{ \"$multiply\": [\"&TCC_EA_RDREQ_32B_sum\", 32] }, { \"$multiply\": [{ \"$subtract\": [\"&TCC_EA_RDREQ_sum\", \"&TCC_EA_RDREQ_32B_sum\"] }, 64] }] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }\n            },\n            \"l2_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n                        null                          \n                    ]\n                }\n            },\n            \"vecl1_cacheHits_val\": {\n                \"$avg\":  {\n                    \"$cond\": [\n                        {\"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0]},\n                        { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }]  }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n                        null\n                    ]\n                }\n            },\n            \"vecl1_BW_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 64 ] }, { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }\n            },\n            \"l1k_cacheHits_val\": {\n                \"$avg\": {\n                    \"$cond\": [\n                        {\"$ne\": [{ \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]} , 0]},\n                        { \"$divide\": [{ \"$multiply\": [100, \"&SQC_DCACHE_HITS\"] }, { \"$add\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_MISSES\"]}] },\n                        null\n                    ]\n                }\n            },\n            \"l1i_hitRate_val\": {\n                \"$avg\": { \"$divide\": [{ \"$multiply\": [100, \"&SQC_ICACHE_HITS\"] }, { \"$add\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_MISSES\"]}] }\n            },\n            \"l1i_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_ICACHE_REQ\",{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            },\n            \"l1k_BW_val\": {\n                \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQC_DCACHE_REQ\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] } ] }, 64] }\n            }\n            \n        }},\n      {\"$set\": {\n        \"array\": [\n            {\n              \"Metric\": \"VALU FLOPs\",\n              \"Value\": \"&valu_flops_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_flops_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"VALU IOPs\",\n              \"Value\": \"&valu_intOps_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&valu_intOps_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 64, 2 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (BF16)\",\n              \"Value\": \"&mfma_flops_bf16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_bf16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 512 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F16)\",\n              \"Value\": \"&mfma_flops_f16_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f16_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F32)\",\n              \"Value\": \"&mfma_flops_f32_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f32_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA FLOPs (F64)\",\n              \"Value\": \"&mfma_flops_f64_val\",\n              \"Unit\": \"GFLOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_f64_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 256 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"MFMA IOPs (Int8)\",\n              \"Value\": \"&mfma_flops_i8_val\",\n              \"Unit\": \"GIOP\",\n              \"peak\": { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] },\n              \"Percent of Peak - PoP\": {\n                \"$divide\": [{ \"$multiply\": [100, \"&mfma_flops_i8_val\"] }, { \"$divide\": [{ \"$multiply\": [$sclk, $numCU, 1024 ]}, 1000] }]\n              }\n            },\n            {\n              \"Metric\": \"Active CUs\",\n              \"Value\": $numActiveCUs,\n              \"Unit\": \"CUs\",\n              \"peak\": $numCU,\n              \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, $numActiveCUs] }, $numCU]}\n            },\n    \n            {\n              \"Metric\": \"SALU Util\",\n              \"Value\": \"&salu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&salu_val\"\n            },\n            {\n              \"Metric\": \"VALU Util\",\n              \"Value\": \"&valu_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&valu_val\"\n            },\n            {\n              \"Metric\": \"MFMA Util\",\n              \"Value\": \"&mfma_val\",\n              \"Unit\": \"pct\",\n              \"peak\": 100,\n              \"Percent of Peak - PoP\": \"&mfma_val\"\n            },\n            {\n              \"Metric\": \"VALU Active Threads/Wave\",\n              \"Value\": \"&unpredthreads_val\",\n              \"Unit\": \"Threads\",\n              \"peak\": 64,\n              \"Percent of Peak - PoP\": { \"$multiply\": [\"&unpredthreads_val\", 1.5625]}\n            },\n            {\n              \"Metric\": \"IPC - Issue\",\n              \"Value\": \"&ipcIssue_val\",\n              \"Unit\": \"Instr/cycle\",\n              \"peak\": 5,\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&ipcIssue_val\"] }, 5] }\n            },\n            {\n              \"Metric\": \"LDS BW\",\n              \"Value\": \"&lds_bw\",\n              \"Unit\": \"GB/sec\",\n              \"peak\": {\"$multiply\": [$sclk, $numCU, 0.128]},\n              \"Percent of Peak - PoP\": \"&lds_bw_pop\"\n            },\n            {\n              \"Metric\": \"LDS Bank Conflict\",\n              \"Value\": \"&lds_bconf\",\n              \"Unit\": \"Conflicts/access\",\n              \"peak\": \"32\",\n              \"Percent of Peak - PoP\": {\"$divide\": [{ \"$multiply\": [100, \"&lds_bconf\"] }, 32] }\n            },\n            {\n                \"Metric\": \"Instr Cache Hit Rate\",\n                \"Value\": \"&l1i_hitRate_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1i_hitRate_val\"\n            }, \n            {\n                \"Metric\": \"Instr Cache BW\",\n                \"Value\": \"&l1i_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&l1i_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache Hit Rate\",\n                \"Value\": \"&l1k_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l1k_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Scalar L1D Cache BW\",\n                \"Value\": \"&l1k_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]},\n                \"Percent of Peak - PoP\":  { \"$divide\": [{ \"$multiply\": [100, \"&l1k_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numSQC]}] }\n            },\n\n            {\n                \"Metric\": \"Vector L1D Cache Hit Rate\",\n                \"Value\": \"&vecl1_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&vecl1_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"Vector L1D Cache BW\",\n                \"Value\": \"&vecl1_BW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]},\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&vecl1_BW_val\"] }, { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 64] }, $numCU]}] }\n            },\n            {\n                \"Metric\": \"L2 Cache Hit Rate\",\n                \"Value\": \"&l2_cacheHits_val\",\n                \"Unit\": \"pct\",\n                \"peak\": \"100\",\n                \"Percent of Peak - PoP\": \"&l2_cacheHits_val\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Read BW\",\n                \"Value\": \"&eaReadBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaReadBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Write BW\",\n                \"Value\": \"&eaWriteBW_val\",\n                \"Unit\": \"GB/s\",\n                \"peak\": \"$hbmBW\",\n                \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&eaWriteBW_val\"] }, $hbmBW] }\n            },\n            {\n                \"Metric\": \"L2-Fabric Read Latency\",\n                \"Value\": \"&eaReadLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            },\n            {\n                \"Metric\": \"L2-Fabric Write Latency\",\n                \"Value\": \"&eaWriteLat_val\",\n                \"Unit\": \"Cycles\",\n                \"peak\": \"\",\n                \"Percent of Peak - PoP\": \"\"\n            }\n\n          ]\n      }},\n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }},\n      {\"$unionWith\": {\n            \"coll\": \"SQ_LEVEL_WAVES\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"waveOcc_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] }\n                    },\n                    \"waveOcc_pop\": {\n                        \"$avg\": { \"$divide\": [{ \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&GRBM_GUI_ACTIVE\"] },{ \"$multiply\": [$maxWavesPerCU, $numCU] }] }\n                    }\n\n                }},\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Wave Occupancy\",\n                            \"Value\": \"&waveOcc_val\",\n                            \"Unit\": \"Wavefronts\",\n                            \"peak\": { \"$multiply\": [$maxWavesPerCU, $numCU] },\n                            \"Percent of Peak - PoP\": { \"$multiply\": [100, \"&waveOcc_pop\"] }\n                        }\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }},\n        {\"$unionWith\": {\n            \"coll\": \"SQ_IFETCH_LEVEL\",\n            \"pipeline\": [\n                {\"$match\": {\n                    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n                }},\n    \n                {\"$group\": {\n                    \"_id\": null,\n                    \"instrFetchBW_val\": {\n                        \"$avg\": { \"$multiply\": [{ \"$divide\": [\"&SQ_IFETCH\", { \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }] }, 32] }\n                    },\n                    \"instrFetchLat_val\": {\n                        \"$avg\": { \"$divide\": [\"&SQ_ACCUM_PREV_HIRES\", \"&SQ_IFETCH\"] }\n                    }\n                }},\n\n                {\"$set\": {\n                    \"array\": [\n                        {\n                            \"Metric\": \"Instr Fetch BW\",\n                            \"Value\": \"&instrFetchBW_val\",\n                            \"Unit\": \"GB/s\",\n                            \"peak\": { \"$multiply\": [ { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }, $numSQC]},\n                            \"Percent of Peak - PoP\": { \"$divide\": [{ \"$multiply\": [100, \"&instrFetchBW_val\"]}, { \"$multiply\": [$numSQC, { \"$multiply\": [{ \"$divide\": [$sclk, 1000] }, 32] }] }] }\n                        },\n                        {\n                            \"Metric\": \"Instr Fetch Latency\",\n                            \"Value\": \"&instrFetchLat_val\",\n                            \"Unit\": \"Cycles\",\n                            \"peak\": \"\",\n                            \"Percent of Peak - PoP\": \"\"\n\n                        }\n\n                    ]\n                }},\n                {\"$unwind\": {\n                    \"path\": \"&array\"\n                }},\n                {\"$replaceRoot\": {\n                    \"newRoot\": \"&array\"\n                }}\n\n            ]\n        }}\n\n    ]);",
               "type": "table"
             },
             {
@@ -1539,8 +1540,7 @@
                 "mode": "absolute",
                 "steps": [
                   {
-                    "color": "green",
-                    "value": null
+                    "color": "green"
                   },
                   {
                     "color": "red",
@@ -3699,7 +3699,8 @@
                 "mode": "absolute",
                 "steps": [
                   {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                   },
                   {
                     "color": "red",
@@ -3906,7 +3907,8 @@
                 "mode": "absolute",
                 "steps": [
                   {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                   },
                   {
                     "color": "red",
@@ -4592,7 +4594,6 @@
       "id": 209,
       "panels": [
         {
-          "datasource": {},
           "fieldConfig": {
             "defaults": {
               "color": {
@@ -4604,7 +4605,8 @@
                 "mode": "absolute",
                 "steps": [
                   {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                   },
                   {
                     "color": "red",
@@ -4649,7 +4651,7 @@
               },
               "rawQuery": true,
               "refId": "A",
-              "target": "$Workload1.pmc_perf.aggregate([\n    {\"$match\": {\n          \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n          \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n          \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n        }},\n        {\"$addFields\": {\n            \"denom\": {\n                   \"$switch\" : {\n                      \"branches\": [\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                              \"then\":  \"&SQ_WAVES\"\n                         },\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                              \"then\":  \"&GRBM_GUI_ACTIVE\"\n                         },\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                              \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                         }\n                      ],\n                     \"default\": 1\n                   }         \n            } \n        }},\n        {\"$group\": {\n            \"_id\": null,\n            \"valu\": {\n              \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n            },\n            \"mfma\": {\n                \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n            },\n            \"vmem\": {\n              \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n            },\n            \"lds\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n            },\n            \"salu\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n            },\n            \"smem\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n            },\n            \"branch\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n            },\n            \"gds\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n            }\n          }\n        },\n        {\"$set\": {\n          \"array\": [\n              {\n                \"metric\": \"VALU - Vector\",\n                \"count\": \"&valu\"\n              },\n              {\n                \"metric\": \"VMEM\",\n                \"count\": \"&vmem\"\n              },\n              {\n                \"metric\": \"LDS\",\n                \"count\": \"&lds\"\n              },\n              {\n                \"metric\": \"VALU - MFMA\",\n                \"count\": \"&mfma\"\n              },\n              {\n                \"metric\": \"SALU\",\n                \"count\": \"&salu\"\n              },\n              {\n                \"metric\": \"SMEM\",\n                \"count\": \"&smem\"\n              },\n              {\n                \"metric\": \"Branch\",\n                \"count\": \"&branch\"\n              },\n              {\n                \"metric\": \"GDS\",\n                \"count\": \"&gds\"\n              }\n            ]\n        }},\n        {\"$unwind\": {\n          \"path\": \"&array\"\n        }},\n        {\"$replaceRoot\": {\n          \"newRoot\": \"&array\"\n        }}\n      ]);",
+              "target": "$Workload1.pmc_perf.aggregate([\n    {\"$match\": {\n          \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n          \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n          \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n        }},\n        {\"$addFields\": {\n            \"denom\": {\n                   \"$switch\" : {\n                      \"branches\": [\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                              \"then\":  \"&SQ_WAVES\"\n                         },\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                              \"then\":  \"&GRBM_GUI_ACTIVE\"\n                         },\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                              \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                         }\n                      ],\n                     \"default\": 1\n                   }         \n            } \n        }},\n        {\"$group\": {\n            \"_id\": null,\n            \"valu_avg\": {\n              \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n            },\n            \"mfma_avg\": {\n                \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n            },\n            \"vmem_avg\": {\n              \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n            },\n            \"lds_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n            },\n            \"salu_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n            },\n            \"smem_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n            },\n            \"branch_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n            },\n            \"gds_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n            },\n            \"valu_min\": {\n                \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n              },\n              \"mfma_min\": {\n                  \"$min\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n              },\n              \"vmem_min\": {\n                \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n              },\n              \"lds_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n              },\n              \"salu_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n              },\n              \"smem_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n              },\n              \"branch_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n              },\n              \"gds_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n              },\n              \"valu_max\": {\n                \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n              },\n              \"mfma_max\": {\n                  \"$max\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n              },\n              \"vmem_max\": {\n                \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n              },\n              \"lds_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n              },\n              \"salu_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n              },\n              \"smem_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n              },\n              \"branch_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n              },\n              \"gds_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n              }\n\n          }\n        },\n        {\"$set\": {\n          \"array\": [\n              {\n                \"metric\": \"VALU - Vector\",\n                \"avg\": \"&valu_avg\",\n                \"min\": \"&valu_min\",\n                \"max\": \"&valu_max\"\n              },\n              {\n                \"metric\": \"VMEM\",\n                \"avg\": \"&vmem_avg\",\n                \"min\": \"&vmem_min\",\n                \"max\": \"&vmem_max\"\n              },\n              {\n                \"metric\": \"LDS\",\n                \"avg\": \"&lds_avg\",\n                \"min\": \"&lds_min\",\n                \"max\": \"&lds_max\"\n              },\n              {\n                \"metric\": \"VALU - MFMA\",\n                \"avg\": \"&mfma_avg\",\n                \"min\": \"&mfma_min\",\n                \"max\": \"&mfma_max\"\n              },\n              {\n                \"metric\": \"SALU\",\n                \"avg\": \"&salu_avg\",\n                \"min\": \"&salu_min\",\n                \"max\": \"&salu_max\"\n              },\n              {\n                \"metric\": \"SMEM\",\n                \"avg\": \"&smem_avg\",\n                \"min\": \"&smem_min\",\n                \"max\": \"&smem_max\"\n              },\n              {\n                \"metric\": \"Branch\",\n                \"avg\": \"&branch_avg\",\n                \"min\": \"&branch_min\",\n                \"max\": \"&branch_max\"\n              },\n              {\n                \"metric\": \"GDS\",\n                \"avg\": \"&gds_avg\",\n                \"min\": \"&gds_min\",\n                \"max\": \"&gds_max\"\n              }\n            ]\n        }},\n        {\"$unwind\": {\n          \"path\": \"&array\"\n        }},\n        {\"$replaceRoot\": {\n          \"newRoot\": \"&array\"\n        }}\n      ]);",
               "type": "table"
             },
             {
@@ -4660,7 +4662,7 @@
               "hide": false,
               "rawQuery": true,
               "refId": "B",
-              "target": "$Workload2.pmc_perf.aggregate([\n    {\"$match\": {\n      \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n      \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n      \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n      \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n    }},\n    {\"$addFields\": {\n      \"denom\": {\n             \"$switch\" : {\n                \"branches\": [\n                   {\n                        \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                        \"then\":  \"&SQ_WAVES\"\n                   },\n                   {\n                        \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                        \"then\":  \"&GRBM_GUI_ACTIVE\"\n                   },\n                   {\n                        \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                        \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                   }\n                ],\n               \"default\": 1\n             }         \n      } \n  }},\n  \n    {\"$group\": {\n        \"_id\": null,\n        \"valu\": {\n        \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n        },\n        \"mfma\": {\n            \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n        },\n        \"vmem\": {\n        \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n        },\n        \"lds\": {\n        \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n        },\n        \"salu\": {\n        \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n        },\n        \"smem\": {\n        \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n        },\n        \"branch\": {\n        \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n        },\n        \"gds\": {\n        \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n        }\n    }},\n      {\"$set\": {\n        \"array\": [\n            {\n              \"metric\": \"VALU - Vector (Baseline)\",\n              \"count\": \"&valu\"\n            },\n            {\n              \"metric\": \"VMEM (Baseline)\",\n              \"count\": \"&vmem\"\n            },\n            {\n              \"metric\": \"LDS (Baseline)\",\n              \"count\": \"&lds\"\n            },\n            {\n              \"metric\": \"VALU - MFMA (Baseline)\",\n              \"count\": \"&mfma\"\n            },\n            {\n              \"metric\": \"SALU (Baseline)\",\n              \"count\": \"&salu\"\n            },\n            {\n              \"metric\": \"SMEM (Baseline)\",\n              \"count\": \"&smem\"\n            },\n            {\n              \"metric\": \"Branch (Baseline)\",\n              \"count\": \"&branch\"\n            },\n            {\n              \"metric\": \"GDS (Baseline)\",\n              \"count\": \"&gds\"\n            }\n          ]\n      }},\n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }}\n    ]);",
+              "target": "$Workload2.pmc_perf.aggregate([\n    {\"$match\": {\n      \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n      \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n      \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n      \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n    }},\n        {\"$addFields\": {\n            \"denom\": {\n                   \"$switch\" : {\n                      \"branches\": [\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                              \"then\":  \"&SQ_WAVES\"\n                         },\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                              \"then\":  \"&GRBM_GUI_ACTIVE\"\n                         },\n                         {\n                              \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                              \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                         }\n                      ],\n                     \"default\": 1\n                   }         \n            } \n        }},\n        {\"$group\": {\n            \"_id\": null,\n            \"valu_avg\": {\n              \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n            },\n            \"mfma_avg\": {\n                \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n            },\n            \"vmem_avg\": {\n              \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n            },\n            \"lds_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n            },\n            \"salu_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n            },\n            \"smem_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n            },\n            \"branch_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n            },\n            \"gds_avg\": {\n              \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n            },\n            \"valu_min\": {\n                \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n              },\n              \"mfma_min\": {\n                  \"$min\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n              },\n              \"vmem_min\": {\n                \"$min\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n              },\n              \"lds_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n              },\n              \"salu_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n              },\n              \"smem_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n              },\n              \"branch_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n              },\n              \"gds_min\": {\n                \"$min\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n              },\n              \"valu_max\": {\n                \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n              },\n              \"mfma_max\": {\n                  \"$max\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n              },\n              \"vmem_max\": {\n                \"$max\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n              },\n              \"lds_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n              },\n              \"salu_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n              },\n              \"smem_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n              },\n              \"branch_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n              },\n              \"gds_max\": {\n                \"$max\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n              }\n\n          }\n        },\n        {\"$set\": {\n          \"array\": [\n              {\n                \"metric\": \"VALU - Vector (Baseline)\",\n                \"avg\": \"&valu_avg\",\n                \"min\": \"&valu_min\",\n                \"max\": \"&valu_max\"\n              },\n              {\n                \"metric\": \"VMEM (Baseline)\",\n                \"avg\": \"&vmem_avg\",\n                \"min\": \"&vmem_min\",\n                \"max\": \"&vmem_max\"\n              },\n              {\n                \"metric\": \"LDS (Baseline)\",\n                \"avg\": \"&lds_avg\",\n                \"min\": \"&lds_min\",\n                \"max\": \"&lds_max\"\n              },\n              {\n                \"metric\": \"VALU - MFMA (Baseline)\",\n                \"avg\": \"&mfma_avg\",\n                \"min\": \"&mfma_min\",\n                \"max\": \"&mfma_max\"\n              },\n              {\n                \"metric\": \"SALU (Baseline)\",\n                \"avg\": \"&salu_avg\",\n                \"min\": \"&salu_min\",\n                \"max\": \"&salu_max\"\n              },\n              {\n                \"metric\": \"SMEM (Baseline)\",\n                \"avg\": \"&smem_avg\",\n                \"min\": \"&smem_min\",\n                \"max\": \"&smem_max\"\n              },\n              {\n                \"metric\": \"Branch (Baseline)\",\n                \"avg\": \"&branch_avg\",\n                \"min\": \"&branch_min\",\n                \"max\": \"&branch_max\"\n              },\n              {\n                \"metric\": \"GDS (Baseline)\",\n                \"avg\": \"&gds_avg\",\n                \"min\": \"&gds_min\",\n                \"max\": \"&gds_max\"\n              }\n            ]\n        }},\n        {\"$unwind\": {\n          \"path\": \"&array\"\n        }},\n        {\"$replaceRoot\": {\n          \"newRoot\": \"&array\"\n        }}\n      ]);",
               "type": "table"
             }
           ],
@@ -4680,6 +4682,19 @@
                   }
                 ]
               }
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "max": true,
+                  "min": true
+                },
+                "indexByName": {},
+                "renameByName": {
+                  "avg": ""
+                }
+              }
             }
           ],
           "transparent": true,
@@ -4697,7 +4712,8 @@
                 "mode": "percentage",
                 "steps": [
                   {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                   },
                   {
                     "color": "red",
@@ -4792,7 +4808,8 @@
                 "mode": "absolute",
                 "steps": [
                   {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                   },
                   {
                     "color": "red",
@@ -4885,7 +4902,8 @@
                 "mode": "absolute",
                 "steps": [
                   {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                   },
                   {
                     "color": "red",
@@ -5608,7 +5626,7 @@
       "type": "row"
     },
     {
-      "collapsed": false,
+      "collapsed": true,
       "datasource": {
         "type": "amd-miperf-data-plugin",
         "uid": "oVK0I__nk"
@@ -5620,333 +5638,332 @@
         "y": 10
       },
       "id": 98,
-      "panels": [],
-      "targets": [
+      "panels": [
         {
-          "datasource": {
-            "type": "amd-miperf-data-plugin",
-            "uid": "oVK0I__nk"
-          },
-          "refId": "A"
-        }
-      ],
-      "title": "Local Data Share (LDS)",
-      "type": "row"
-    },
-    {
-      "datasource": {},
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "decimals": 1,
-          "mappings": [],
-          "max": 100,
-          "min": 0,
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
+          "datasource": {},
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "thresholds"
               },
-              {
-                "color": "#EAB839",
-                "value": 50
+              "decimals": 1,
+              "mappings": [],
+              "max": 100,
+              "min": 0,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "#EAB839",
+                    "value": 50
+                  },
+                  {
+                    "color": "red",
+                    "value": 90
+                  }
+                ]
               },
-              {
-                "color": "red",
-                "value": 90
-              }
-            ]
-          },
-          "unit": "percent"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 0,
-        "y": 11
-      },
-      "id": 205,
-      "options": {
-        "displayMode": "gradient",
-        "minVizHeight": 10,
-        "minVizWidth": 0,
-        "orientation": "horizontal",
-        "reduceOptions": {
-          "calcs": [
-            "mean"
-          ],
-          "fields": "/.*/",
-          "values": true
-        },
-        "showUnfilled": true,
-        "text": {
-          "titleSize": 14,
-          "valueSize": 16
-        }
-      },
-      "pluginVersion": "8.3.4",
-      "targets": [
-        {
-          "datasource": {
-            "type": "amd-miperf-data-plugin",
-            "uid": "Zzw1yR27k"
+              "unit": "percent"
+            },
+            "overrides": []
           },
-          "rawQuery": true,
-          "refId": "A",
-          "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n    \n          \"bconf_rate\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                               null\n                              ]\n                    }     \n          },\n    \n          \"bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                 4,  \n                                                 {\"$toInt\": \"$L2Banks\"}\n                                               ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},\n     \n                            {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n\n         \"lds_util\":  {\n            \"$avg\":    {\"$divide\":[ \n                            { \"$multiply\": [ 100,  \"&SQ_LDS_IDX_ACTIVE\" ]}, \n                            {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n                       ]}\n         },\n\n         \"lds_access_rate\":  {\n            \"$avg\":    {\"$divide\":[ \n                            { \"$multiply\": [200,  \"&SQ_ACTIVE_INST_LDS\" ]}, \n                            {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n                       ]}\n         }\n    \n      }},\n    \n      {\"$set\": {\n        \"array\": [\n            {\n              \"Utilization\": \"&lds_util\",\n              \"Access Rate\": \"&lds_access_rate\",\n              \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n              \"Bank Conflict Rate\": \"&bconf_rate\"\n            }\n    \n          ]\n      }},\n    \n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      \n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }}\n    \n    ]);",
-          "type": "table"
-        },
-        {
-          "datasource": {
-            "type": "amd-miperf-data-plugin",
-            "uid": "Zzw1yR27k"
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 45
           },
-          "hide": false,
-          "rawQuery": true,
-          "refId": "B",
-          "target": "${Workload2}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n        \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n    }},\n  {\"$group\": {\n      \"_id\": null,\n\n      \"bconf_rate\": {\n        \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                            {\"$divide\": [\n                                {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n                                {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                            ]},\n                           null\n                          ]\n                }     \n      },\n\n      \"bw_pop\":  {\n        \"$avg\":     {\"$divide\": [\n                        {\"$divide\":[ \n                            { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                             4,  \n                                             {\"$toInt\": \"$L2Banks2\"}\n                                           ]}, \n                            {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                        ]},\n \n                        {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n                     ]}\n     },\n     \"lds_util\":  {\n        \"$avg\":    {\"$divide\":[ \n                        { \"$multiply\": [ 100,  \"&SQ_LDS_IDX_ACTIVE\" ]}, \n                        {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n                   ]}\n     },\n\n     \"lds_access_rate\":  {\n        \"$avg\":    {\"$divide\":[ \n                        { \"$multiply\": [200,  \"&SQ_ACTIVE_INST_LDS\" ]}, \n                        {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n                   ]}\n     }\n\n\n  }},\n\n  {\"$set\": {\n    \"array\": [\n        {\n          \"Utilization\": \"&lds_util\",\n          \"Access Rate\": \"&lds_access_rate\",\n          \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n          \"Bank Conflict Rate\": \"&bconf_rate\"\n        }\n\n      ]\n  }},\n\n  {\"$unwind\": {\n    \"path\": \"&array\"\n  }},\n  \n  {\"$replaceRoot\": {\n    \"newRoot\": \"&array\"\n  }}\n\n]);",
-          "type": "table"
-        }
-      ],
-      "title": "Speed-of-Light: LDS",
-      "transformations": [
-        {
-          "id": "concatenate",
-          "options": {}
-        },
-        {
-          "id": "organize",
+          "id": 205,
           "options": {
-            "excludeByName": {},
-            "indexByName": {
-              "Access Rate 1": 6,
-              "Access Rate 2": 7,
-              "Bandwith (Pct-of-Peak) 1": 0,
-              "Bandwith (Pct-of-Peak) 2": 1,
-              "Bank Conflict Rate 1": 2,
-              "Bank Conflict Rate 2": 3,
-              "Utilization 1": 4,
-              "Utilization 2": 5
-            },
-            "renameByName": {
-              "Access Rate 1": "Access Rate (Current)",
-              "Access Rate 2": "Access Rate (Baseline)",
-              "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)",
-              "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)",
-              "Bandwith (Pct-of-Peak)": "",
-              "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)",
-              "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ",
-              "Bank Conflict Rate 1": "Bank Conflict Rate (Current)",
-              "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)",
-              "Cache Hit 1": "Cache Hit (Current)",
-              "Cache Hit 2": "Cache Hit (Baseline)",
-              "Latency (Cycles) 1": "Latency  (Current) [Cycles]",
-              "Latency (Cycles) 2": "Latency (Baseline) [Cycles]",
-              "Stall 1": "Stall (Current)",
-              "Stall 2": "Stall (Baseline)",
-              "Util 1": "Util (Current)",
-              "Util 2": "Util (Baseline)",
-              "Utilization 1": "Util (Current)",
-              "Utilization 2": "Util (Baseline)"
+            "displayMode": "gradient",
+            "minVizHeight": 10,
+            "minVizWidth": 0,
+            "orientation": "horizontal",
+            "reduceOptions": {
+              "calcs": [
+                "mean"
+              ],
+              "fields": "/.*/",
+              "values": true
+            },
+            "showUnfilled": true,
+            "text": {
+              "titleSize": 14,
+              "valueSize": 16
             }
-          }
-        }
-      ],
-      "transparent": true,
-      "type": "bargauge"
-    },
-    {
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "custom": {
-            "align": "auto",
-            "displayMode": "auto"
           },
-          "decimals": 0,
-          "mappings": [],
-          "min": -100000000000000000000,
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
+          "pluginVersion": "8.3.4",
+          "targets": [
+            {
+              "datasource": {
+                "type": "amd-miperf-data-plugin",
+                "uid": "Zzw1yR27k"
               },
-              {
-                "color": "red",
-                "value": 80
+              "rawQuery": true,
+              "refId": "A",
+              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n      }},\n      {\"$group\": {\n          \"_id\": null,\n    \n          \"bconf_rate\": {\n            \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                {\"$divide\": [\n                                    {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n                                    {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                ]},\n                               null\n                              ]\n                    }     \n          },\n    \n          \"bw_pop\":  {\n            \"$avg\":     {\"$divide\": [\n                            {\"$divide\":[ \n                                { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                                 4,  \n                                                 {\"$toInt\": \"$L2Banks\"}\n                                               ]}, \n                                {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                            ]},\n     \n                            {\"$multiply\": [$sclk, $numCU, 0.00128]}\n                         ]}\n         },\n\n         \"lds_util\":  {\n            \"$avg\":    {\"$divide\":[ \n                            { \"$multiply\": [ 100,  \"&SQ_LDS_IDX_ACTIVE\" ]}, \n                            {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n                       ]}\n         },\n\n         \"lds_access_rate\":  {\n            \"$avg\":    {\"$divide\":[ \n                            { \"$multiply\": [200,  \"&SQ_ACTIVE_INST_LDS\" ]}, \n                            {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n                       ]}\n         }\n    \n      }},\n    \n      {\"$set\": {\n        \"array\": [\n            {\n              \"Utilization\": \"&lds_util\",\n              \"Access Rate\": \"&lds_access_rate\",\n              \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n              \"Bank Conflict Rate\": \"&bconf_rate\"\n            }\n    \n          ]\n      }},\n    \n      {\"$unwind\": {\n        \"path\": \"&array\"\n      }},\n      \n      {\"$replaceRoot\": {\n        \"newRoot\": \"&array\"\n      }}\n    \n    ]);",
+              "type": "table"
+            },
+            {
+              "datasource": {
+                "type": "amd-miperf-data-plugin",
+                "uid": "Zzw1yR27k"
+              },
+              "hide": false,
+              "rawQuery": true,
+              "refId": "B",
+              "target": "${Workload2}.pmc_perf.aggregate([\n    {\"$match\": {\n        \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n        \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n        \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n        \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n    }},\n  {\"$group\": {\n      \"_id\": null,\n\n      \"bconf_rate\": {\n        \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                            {\"$divide\": [\n                                {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n                                {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                            ]},\n                           null\n                          ]\n                }     \n      },\n\n      \"bw_pop\":  {\n        \"$avg\":     {\"$divide\": [\n                        {\"$divide\":[ \n                            { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]},  \n                                             4,  \n                                             {\"$toInt\": \"$L2Banks2\"}\n                                           ]}, \n                            {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n                        ]},\n \n                        {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n                     ]}\n     },\n     \"lds_util\":  {\n        \"$avg\":    {\"$divide\":[ \n                        { \"$multiply\": [ 100,  \"&SQ_LDS_IDX_ACTIVE\" ]}, \n                        {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n                   ]}\n     },\n\n     \"lds_access_rate\":  {\n        \"$avg\":    {\"$divide\":[ \n                        { \"$multiply\": [200,  \"&SQ_ACTIVE_INST_LDS\" ]}, \n                        {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n                   ]}\n     }\n\n\n  }},\n\n  {\"$set\": {\n    \"array\": [\n        {\n          \"Utilization\": \"&lds_util\",\n          \"Access Rate\": \"&lds_access_rate\",\n          \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n          \"Bank Conflict Rate\": \"&bconf_rate\"\n        }\n\n      ]\n  }},\n\n  {\"$unwind\": {\n    \"path\": \"&array\"\n  }},\n  \n  {\"$replaceRoot\": {\n    \"newRoot\": \"&array\"\n  }}\n\n]);",
+              "type": "table"
+            }
+          ],
+          "title": "Speed-of-Light: LDS",
+          "transformations": [
+            {
+              "id": "concatenate",
+              "options": {}
+            },
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {},
+                "indexByName": {
+                  "Access Rate 1": 6,
+                  "Access Rate 2": 7,
+                  "Bandwith (Pct-of-Peak) 1": 0,
+                  "Bandwith (Pct-of-Peak) 2": 1,
+                  "Bank Conflict Rate 1": 2,
+                  "Bank Conflict Rate 2": 3,
+                  "Utilization 1": 4,
+                  "Utilization 2": 5
+                },
+                "renameByName": {
+                  "Access Rate 1": "Access Rate (Current)",
+                  "Access Rate 2": "Access Rate (Baseline)",
+                  "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)",
+                  "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)",
+                  "Bandwith (Pct-of-Peak)": "",
+                  "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)",
+                  "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ",
+                  "Bank Conflict Rate 1": "Bank Conflict Rate (Current)",
+                  "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)",
+                  "Cache Hit 1": "Cache Hit (Current)",
+                  "Cache Hit 2": "Cache Hit (Baseline)",
+                  "Latency (Cycles) 1": "Latency  (Current) [Cycles]",
+                  "Latency (Cycles) 2": "Latency (Baseline) [Cycles]",
+                  "Stall 1": "Stall (Current)",
+                  "Stall 2": "Stall (Baseline)",
+                  "Util 1": "Util (Current)",
+                  "Util 2": "Util (Baseline)",
+                  "Utilization 1": "Util (Current)",
+                  "Utilization 2": "Util (Baseline)"
+                }
               }
-            ]
-          },
-          "unit": "locale"
+            }
+          ],
+          "transparent": true,
+          "type": "bargauge"
         },
-        "overrides": [
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "Avg (Current)"
+        {
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "thresholds"
+              },
+              "custom": {
+                "align": "auto",
+                "displayMode": "auto"
+              },
+              "decimals": 0,
+              "mappings": [],
+              "min": -100000000000000000000,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "locale"
             },
-            "properties": [
+            "overrides": [
               {
-                "id": "custom.width",
-                "value": 114
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "Min (Current)"
-            },
-            "properties": [
+                "matcher": {
+                  "id": "byName",
+                  "options": "Avg (Current)"
+                },
+                "properties": [
+                  {
+                    "id": "custom.width",
+                    "value": 114
+                  }
+                ]
+              },
               {
-                "id": "custom.width",
-                "value": 107
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "Min (Baseline)"
-            },
-            "properties": [
+                "matcher": {
+                  "id": "byName",
+                  "options": "Min (Current)"
+                },
+                "properties": [
+                  {
+                    "id": "custom.width",
+                    "value": 107
+                  }
+                ]
+              },
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "Min (Baseline)"
+                },
+                "properties": [
+                  {
+                    "id": "custom.width",
+                    "value": 128
+                  }
+                ]
+              },
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "Max (Current)"
+                },
+                "properties": [
+                  {
+                    "id": "custom.width",
+                    "value": 115
+                  }
+                ]
+              },
               {
-                "id": "custom.width",
-                "value": 128
+                "matcher": {
+                  "id": "byName",
+                  "options": "Unit"
+                },
+                "properties": [
+                  {
+                    "id": "custom.width",
+                    "value": 138
+                  }
+                ]
+              },
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "Avg (Baseline)"
+                },
+                "properties": [
+                  {
+                    "id": "custom.width",
+                    "value": 141
+                  }
+                ]
               }
             ]
           },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "Max (Current)"
+          "gridPos": {
+            "h": 12,
+            "w": 12,
+            "x": 12,
+            "y": 45
+          },
+          "id": 100,
+          "options": {
+            "footer": {
+              "fields": "",
+              "reducer": [
+                "sum"
+              ],
+              "show": false
             },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 115
-              }
-            ]
+            "showHeader": true,
+            "sortBy": []
           },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "Unit"
+          "pluginVersion": "8.3.4",
+          "targets": [
+            {
+              "datasource": {
+                "type": "amd-miperf-data-plugin",
+                "uid": "Zzw1yR27k"
+              },
+              "hide": false,
+              "rawQuery": true,
+              "refId": "A",
+              "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n      \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n      \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n      \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n    }},\n    {\"$addFields\": {\n        \"denom\": {\n               \"$switch\" : {\n                  \"branches\": [\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                          \"then\":  \"&SQ_WAVES\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                          \"then\":  \"&GRBM_GUI_ACTIVE\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                          \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                     }\n                  ],\n                 \"default\": 1\n               }         \n        } \n    }},\n    {\"$group\": {\n        \"_id\": null,\n            \"avg_ldsInstrs\": {\n              \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"min_ldsInstrs\": {\n              \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"max_ldsInstrs\": {\n              \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n  \n            \"avg_indexAccesses\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"min_indexAccesses\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"max_indexAccesses\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n  \n            \"avg_atomicCycles\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"min_atomicCycles\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"max_atomicCycles\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n  \n            \"avg_bankConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_bankConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_bankConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n  \n            \"avg_addrConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_addrConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_addrConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n  \n  \n            \"avg_unalignedStall\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"min_unalignedStall\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"max_unalignedStall\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n  \n            \"avg_memViolations\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"min_memViolations\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"max_memViolations\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n  \n            \"avg_bconf_per_op\": {\n              \"$avg\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"min_bconf_per_op\": {\n              \"$min\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"max_bconf_per_op\": {\n              \"$max\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n  \n            \"avg_bw\":  {\n              \"$avg\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"min_bw\":  {\n              \"$min\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"max_bw\":  {\n              \"$max\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            }\n    }},\n    {\"$set\": {\n      \"array\": [\n          {\n            \"metric\": \"LDS Instrs\",\n            \"avg\": \"&avg_ldsInstrs\",\n            \"min\": \"&min_ldsInstrs\",\n            \"max\": \"&max_ldsInstrs\",\n            \"Unit\":{\"$concat\": [\"Instr \", $normUnit]}  \n          },\n          {\n            \"metric\": \"Bandwidth\",\n            \"avg\": \"&avg_bw\",\n            \"min\": \"&min_bw\",\n            \"max\": \"&max_bw\",\n            \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conficts/Access\",\n            \"avg\": \"&avg_bconf_per_op\",\n            \"min\": \"&min_bconf_per_op\",\n            \"max\": \"&max_bconf_per_op\",\n            \"Unit\": \"Conflicts/Access\"\n          },\n          {\n            \"metric\": \"Index Accesses\",\n            \"avg\": \"&avg_indexAccesses\",\n            \"min\": \"&min_indexAccesses\",\n            \"max\": \"&max_indexAccesses\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Atomic Cycles\",\n            \"avg\": \"&avg_atomicCycles\",\n            \"min\": \"&min_atomicCycles\",\n            \"max\": \"&max_atomicCycles\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conflict\",\n            \"avg\": \"&avg_bankConflicts\",\n            \"min\": \"&min_bankConflicts\",\n            \"max\": \"&max_bankConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Addr Conflict\",\n            \"avg\": \"&avg_addrConflicts\",\n            \"min\": \"&min_addrConflicts\",\n            \"max\": \"&max_addrConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Unaligned Stall\",\n            \"avg\": \"&avg_unalignedStall\",\n            \"min\": \"&min_unalignedStall\",\n            \"max\": \"&max_unalignedStall\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Mem Violations\",\n            \"avg\": \"&avg_memViolations\",\n            \"min\": \"&min_memViolations\",\n            \"max\": \"&max_memViolations\",\n            \"Unit\": {\"$concat\": [\"\", $normUnit]}\n          }\n        ]\n    }},\n    {\"$unwind\": {\n      \"path\": \"&array\"\n    }},\n    {\"$replaceRoot\": {\n      \"newRoot\": \"&array\"\n    }},\n    {\"$unionWith\": {\n          \"coll\": \"SQ_INST_LEVEL_LDS\",\n          \"pipeline\": [\n              {\"$match\": {\n                  \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                  \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                  \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n              }},\n  \n            {\"$group\": {\n                \"_id\": null,\n                \"avg_ldsLatency\": {\n                  \"$avg\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"min_ldsLatency\": {\n                  \"$min\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"max_ldsLatency\": {\n                  \"$max\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                }\n            }},\n            {\"$project\": {\n                \"_id\": 0,\n                \"metric\": \"LDS Latency\",\n                \"avg\": \"&avg_ldsLatency\",\n                \"min\": \"&min_ldsLatency\",\n                \"max\": \"&max_ldsLatency\",\n                \"Unit\": \"Cycles\"\n            }}\n          ]\n      }}\n  ]);",
+              "type": "table"
             },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 138
+            {
+              "datasource": {
+                "type": "amd-miperf-data-plugin",
+                "uid": "Zzw1yR27k"
+              },
+              "hide": false,
+              "rawQuery": true,
+              "refId": "B",
+              "target": "${Workload2}.pmc_perf.aggregate([\n    {\"$match\": {\n      \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n      \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n      \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},        \n     \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n  \n    }},\n    {\"$addFields\": {\n        \"denom\": {\n               \"$switch\" : {\n                  \"branches\": [\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                          \"then\":  \"&SQ_WAVES\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                          \"then\":  \"&GRBM_GUI_ACTIVE\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                          \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                     }\n                  ],\n                 \"default\": 1\n               }         \n        } \n    }},\n    {\"$group\": {\n        \"_id\": null,\n            \"avg_ldsInstrs\": {\n              \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"min_ldsInstrs\": {\n              \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"max_ldsInstrs\": {\n              \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n  \n            \"avg_indexAccesses\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"min_indexAccesses\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"max_indexAccesses\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n  \n            \"avg_atomicCycles\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"min_atomicCycles\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"max_atomicCycles\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n  \n            \"avg_bankConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_bankConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_bankConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n  \n            \"avg_addrConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_addrConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_addrConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"avg_unalignedStall\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"min_unalignedStall\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"max_unalignedStall\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n  \n            \"avg_memViolations\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"min_memViolations\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"max_memViolations\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n  \n \n            \"avg_bconf_per_op\": {\n              \"$avg\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"min_bconf_per_op\": {\n              \"$min\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"max_bconf_per_op\": {\n              \"$max\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n  \n            \"avg_bw\":  {\n              \"$avg\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"min_bw\":  {\n              \"$min\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"max_bw\":  {\n              \"$max\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            }\n    }},\n    {\"$set\": {\n      \"array\": [\n          {\n            \"metric\": \"LDS Instrs\",\n            \"avg\": \"&avg_ldsInstrs\",\n            \"min\": \"&min_ldsInstrs\",\n            \"max\": \"&max_ldsInstrs\",\n            \"Unit\":{\"$concat\": [\"Instr \", $normUnit]}  \n          },\n          {\n            \"metric\": \"Bandwidth\",\n            \"avg\": \"&avg_bw\",\n            \"min\": \"&min_bw\",\n            \"max\": \"&max_bw\",\n            \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conficts/Access\",\n            \"avg\": \"&avg_bconf_per_op\",\n            \"min\": \"&min_bconf_per_op\",\n            \"max\": \"&max_bconf_per_op\",\n            \"Unit\": \"Conflicts/Access\"\n          },\n          {\n            \"metric\": \"Index Accesses\",\n            \"avg\": \"&avg_indexAccesses\",\n            \"min\": \"&min_indexAccesses\",\n            \"max\": \"&max_indexAccesses\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Atomic Cycles\",\n            \"avg\": \"&avg_atomicCycles\",\n            \"min\": \"&min_atomicCycles\",\n            \"max\": \"&max_atomicCycles\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conflict\",\n            \"avg\": \"&avg_bankConflicts\",\n            \"min\": \"&min_bankConflicts\",\n            \"max\": \"&max_bankConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Addr Conflict\",\n            \"avg\": \"&avg_addrConflicts\",\n            \"min\": \"&min_addrConflicts\",\n            \"max\": \"&max_addrConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Unaligned Stall\",\n            \"avg\": \"&avg_unalignedStall\",\n            \"min\": \"&min_unalignedStall\",\n            \"max\": \"&max_unalignedStall\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Mem Violations\",\n            \"avg\": \"&avg_memViolations\",\n            \"min\": \"&min_memViolations\",\n            \"max\": \"&max_memViolations\",\n            \"Unit\": {\"$concat\": [\"\", $normUnit]}\n          }\n        ]\n    }},\n    {\"$unwind\": {\n      \"path\": \"&array\"\n    }},\n    {\"$replaceRoot\": {\n      \"newRoot\": \"&array\"\n    }},\n    {\"$unionWith\": {\n          \"coll\": \"SQ_INST_LEVEL_LDS\",\n          \"pipeline\": [\n              {\"$match\": {\n                  \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n                  \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n                  \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n                  \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n              }},\n  \n            {\"$group\": {\n                \"_id\": null,\n                \"avg_ldsLatency\": {\n                  \"$avg\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"min_ldsLatency\": {\n                  \"$min\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"max_ldsLatency\": {\n                  \"$max\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                }\n            }},\n            {\"$project\": {\n                \"_id\": 0,\n                \"metric\": \"LDS Latency\",\n                \"avg\": \"&avg_ldsLatency\",\n                \"min\": \"&min_ldsLatency\",\n                \"max\": \"&max_ldsLatency\",\n                \"Unit\": \"Cycles\"\n            }}\n          ]\n      }}\n  ]);",
+              "type": "table"
+            }
+          ],
+          "title": "LDS Stats",
+          "transformations": [
+            {
+              "id": "concatenate",
+              "options": {
+                "frameNameLabel": "frame",
+                "frameNameMode": "field"
               }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "Avg (Baseline)"
             },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 141
+            {
+              "id": "organize",
+              "options": {
+                "excludeByName": {
+                  "Unit 2": true,
+                  "metric 2": true
+                },
+                "indexByName": {
+                  "Unit 1": 9,
+                  "Unit 2": 8,
+                  "avg 1": 1,
+                  "avg 2": 2,
+                  "max 1": 5,
+                  "max 2": 6,
+                  "metric 1": 0,
+                  "metric 2": 7,
+                  "min 1": 3,
+                  "min 2": 4
+                },
+                "renameByName": {
+                  "avg 1": "Avg (Current)",
+                  "avg 2": "Avg (Baseline)",
+                  "max 1": "Max (Current)",
+                  "max 2": "Max (Baseline)",
+                  "min 1": "Min (Current)",
+                  "min 2": "Min (Baseline)"
+                }
               }
-            ]
-          }
-        ]
-      },
-      "gridPos": {
-        "h": 12,
-        "w": 12,
-        "x": 12,
-        "y": 11
-      },
-      "id": 100,
-      "options": {
-        "footer": {
-          "fields": "",
-          "reducer": [
-            "sum"
-          ],
-          "show": false
-        },
-        "showHeader": true,
-        "sortBy": []
-      },
-      "pluginVersion": "8.3.4",
-      "targets": [
-        {
-          "datasource": {
-            "type": "amd-miperf-data-plugin",
-            "uid": "Zzw1yR27k"
-          },
-          "hide": false,
-          "rawQuery": true,
-          "refId": "A",
-          "target": "${Workload1}.pmc_perf.aggregate([\n    {\"$match\": {\n      \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n      \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n      \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n    }},\n    {\"$addFields\": {\n        \"denom\": {\n               \"$switch\" : {\n                  \"branches\": [\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                          \"then\":  \"&SQ_WAVES\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                          \"then\":  \"&GRBM_GUI_ACTIVE\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                          \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                     }\n                  ],\n                 \"default\": 1\n               }         \n        } \n    }},\n    {\"$group\": {\n        \"_id\": null,\n            \"avg_ldsInstrs\": {\n              \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"min_ldsInstrs\": {\n              \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"max_ldsInstrs\": {\n              \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n  \n            \"avg_indexAccesses\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"min_indexAccesses\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"max_indexAccesses\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n  \n            \"avg_atomicCycles\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"min_atomicCycles\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"max_atomicCycles\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n  \n            \"avg_bankConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_bankConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_bankConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n  \n            \"avg_addrConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_addrConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_addrConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n  \n  \n            \"avg_unalignedStall\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"min_unalignedStall\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"max_unalignedStall\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n  \n            \"avg_memViolations\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"min_memViolations\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"max_memViolations\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n  \n            \"avg_bconf_per_op\": {\n              \"$avg\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"min_bconf_per_op\": {\n              \"$min\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"max_bconf_per_op\": {\n              \"$max\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n  \n            \"avg_bw\":  {\n              \"$avg\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"min_bw\":  {\n              \"$min\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"max_bw\":  {\n              \"$max\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            }\n    }},\n    {\"$set\": {\n      \"array\": [\n          {\n            \"metric\": \"LDS Instrs\",\n            \"avg\": \"&avg_ldsInstrs\",\n            \"min\": \"&min_ldsInstrs\",\n            \"max\": \"&max_ldsInstrs\",\n            \"Unit\":{\"$concat\": [\"Instr \", $normUnit]}  \n          },\n          {\n            \"metric\": \"Bandwidth\",\n            \"avg\": \"&avg_bw\",\n            \"min\": \"&min_bw\",\n            \"max\": \"&max_bw\",\n            \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conficts/Access\",\n            \"avg\": \"&avg_bconf_per_op\",\n            \"min\": \"&min_bconf_per_op\",\n            \"max\": \"&max_bconf_per_op\",\n            \"Unit\": \"Conflicts/Access\"\n          },\n          {\n            \"metric\": \"Index Accesses\",\n            \"avg\": \"&avg_indexAccesses\",\n            \"min\": \"&min_indexAccesses\",\n            \"max\": \"&max_indexAccesses\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Atomic Cycles\",\n            \"avg\": \"&avg_atomicCycles\",\n            \"min\": \"&min_atomicCycles\",\n            \"max\": \"&max_atomicCycles\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conflict\",\n            \"avg\": \"&avg_bankConflicts\",\n            \"min\": \"&min_bankConflicts\",\n            \"max\": \"&max_bankConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Addr Conflict\",\n            \"avg\": \"&avg_addrConflicts\",\n            \"min\": \"&min_addrConflicts\",\n            \"max\": \"&max_addrConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Unaligned Stall\",\n            \"avg\": \"&avg_unalignedStall\",\n            \"min\": \"&min_unalignedStall\",\n            \"max\": \"&max_unalignedStall\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Mem Violations\",\n            \"avg\": \"&avg_memViolations\",\n            \"min\": \"&min_memViolations\",\n            \"max\": \"&max_memViolations\",\n            \"Unit\": {\"$concat\": [\"\", $normUnit]}\n          }\n        ]\n    }},\n    {\"$unwind\": {\n      \"path\": \"&array\"\n    }},\n    {\"$replaceRoot\": {\n      \"newRoot\": \"&array\"\n    }},\n    {\"$unionWith\": {\n          \"coll\": \"SQ_INST_LEVEL_LDS\",\n          \"pipeline\": [\n              {\"$match\": {\n                  \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n                  \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n                  \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n              }},\n  \n            {\"$group\": {\n                \"_id\": null,\n                \"avg_ldsLatency\": {\n                  \"$avg\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"min_ldsLatency\": {\n                  \"$min\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"max_ldsLatency\": {\n                  \"$max\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                }\n            }},\n            {\"$project\": {\n                \"_id\": 0,\n                \"metric\": \"LDS Latency\",\n                \"avg\": \"&avg_ldsLatency\",\n                \"min\": \"&min_ldsLatency\",\n                \"max\": \"&max_ldsLatency\",\n                \"Unit\": \"Cycles\"\n            }}\n          ]\n      }}\n  ]);",
+            }
+          ],
           "type": "table"
-        },
+        }
+      ],
+      "targets": [
         {
           "datasource": {
             "type": "amd-miperf-data-plugin",
-            "uid": "Zzw1yR27k"
+            "uid": "oVK0I__nk"
           },
-          "hide": false,
-          "rawQuery": true,
-          "refId": "B",
-          "target": "${Workload2}.pmc_perf.aggregate([\n    {\"$match\": {\n      \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n      \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n      \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},        \n     \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n  \n    }},\n    {\"$addFields\": {\n        \"denom\": {\n               \"$switch\" : {\n                  \"branches\": [\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n                          \"then\":  \"&SQ_WAVES\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n                          \"then\":  \"&GRBM_GUI_ACTIVE\"\n                     },\n                     {\n                          \"case\":  { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n                          \"then\":  {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n                     }\n                  ],\n                 \"default\": 1\n               }         \n        } \n    }},\n    {\"$group\": {\n        \"_id\": null,\n            \"avg_ldsInstrs\": {\n              \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"min_ldsInstrs\": {\n              \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n            \"max_ldsInstrs\": {\n              \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" ,  \"&denom\"] }\n            },\n  \n            \"avg_indexAccesses\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"min_indexAccesses\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n            \"max_indexAccesses\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" ,  \"&denom\"] }\n            },\n  \n            \"avg_atomicCycles\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"min_atomicCycles\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n            \"max_atomicCycles\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n            },\n  \n            \"avg_bankConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_bankConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_bankConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n            },\n  \n            \"avg_addrConflicts\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"min_addrConflicts\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"max_addrConflicts\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n            },\n            \"avg_unalignedStall\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"min_unalignedStall\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n            \"max_unalignedStall\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n            },\n  \n            \"avg_memViolations\": {\n              \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"min_memViolations\": {\n              \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n            \"max_memViolations\": {\n              \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n            },\n  \n \n            \"avg_bconf_per_op\": {\n              \"$avg\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"min_bconf_per_op\": {\n              \"$min\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n            \"max_bconf_per_op\": {\n              \"$max\": {\n                \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n                                    {\"$divide\": [\n                                        \"&SQ_LDS_BANK_CONFLICT\",\n                                        {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n                                    ]},\n                                    null\n                          ]\n              }\n            },\n  \n            \"avg_bw\":  {\n              \"$avg\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"min_bw\":  {\n              \"$min\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            },\n            \"max_bw\":  {\n              \"$max\": {\n                \"$divide\":\n                  [ { \"$multiply\": [{ \"$multiply\":  [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} ,  4]},  {\"$toInt\": \"$L2Banks\"}]}, \n                    \"&denom\"\n                  ]\n              }\n            }\n    }},\n    {\"$set\": {\n      \"array\": [\n          {\n            \"metric\": \"LDS Instrs\",\n            \"avg\": \"&avg_ldsInstrs\",\n            \"min\": \"&min_ldsInstrs\",\n            \"max\": \"&max_ldsInstrs\",\n            \"Unit\":{\"$concat\": [\"Instr \", $normUnit]}  \n          },\n          {\n            \"metric\": \"Bandwidth\",\n            \"avg\": \"&avg_bw\",\n            \"min\": \"&min_bw\",\n            \"max\": \"&max_bw\",\n            \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conficts/Access\",\n            \"avg\": \"&avg_bconf_per_op\",\n            \"min\": \"&min_bconf_per_op\",\n            \"max\": \"&max_bconf_per_op\",\n            \"Unit\": \"Conflicts/Access\"\n          },\n          {\n            \"metric\": \"Index Accesses\",\n            \"avg\": \"&avg_indexAccesses\",\n            \"min\": \"&min_indexAccesses\",\n            \"max\": \"&max_indexAccesses\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Atomic Cycles\",\n            \"avg\": \"&avg_atomicCycles\",\n            \"min\": \"&min_atomicCycles\",\n            \"max\": \"&max_atomicCycles\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Bank Conflict\",\n            \"avg\": \"&avg_bankConflicts\",\n            \"min\": \"&min_bankConflicts\",\n            \"max\": \"&max_bankConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Addr Conflict\",\n            \"avg\": \"&avg_addrConflicts\",\n            \"min\": \"&min_addrConflicts\",\n            \"max\": \"&max_addrConflicts\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Unaligned Stall\",\n            \"avg\": \"&avg_unalignedStall\",\n            \"min\": \"&min_unalignedStall\",\n            \"max\": \"&max_unalignedStall\",\n            \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n          },\n          {\n            \"metric\": \"Mem Violations\",\n            \"avg\": \"&avg_memViolations\",\n            \"min\": \"&min_memViolations\",\n            \"max\": \"&max_memViolations\",\n            \"Unit\": {\"$concat\": [\"\", $normUnit]}\n          }\n        ]\n    }},\n    {\"$unwind\": {\n      \"path\": \"&array\"\n    }},\n    {\"$replaceRoot\": {\n      \"newRoot\": \"&array\"\n    }},\n    {\"$unionWith\": {\n          \"coll\": \"SQ_INST_LEVEL_LDS\",\n          \"pipeline\": [\n              {\"$match\": {\n                  \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n                  \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n                  \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n                  \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n              }},\n  \n            {\"$group\": {\n                \"_id\": null,\n                \"avg_ldsLatency\": {\n                  \"$avg\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"min_ldsLatency\": {\n                  \"$min\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                },\n                \"max_ldsLatency\": {\n                  \"$max\": { \n                    \"$cond\": [\n                    {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n                    {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n                     null\n                    ] \n                  }\n                }\n            }},\n            {\"$project\": {\n                \"_id\": 0,\n                \"metric\": \"LDS Latency\",\n                \"avg\": \"&avg_ldsLatency\",\n                \"min\": \"&min_ldsLatency\",\n                \"max\": \"&max_ldsLatency\",\n                \"Unit\": \"Cycles\"\n            }}\n          ]\n      }}\n  ]);",
-          "type": "table"
-        }
-      ],
-      "title": "LDS Stats",
-      "transformations": [
-        {
-          "id": "concatenate",
-          "options": {
-            "frameNameLabel": "frame",
-            "frameNameMode": "field"
-          }
-        },
-        {
-          "id": "organize",
-          "options": {
-            "excludeByName": {
-              "Unit 2": true,
-              "metric 2": true
-            },
-            "indexByName": {
-              "Unit 1": 9,
-              "Unit 2": 8,
-              "avg 1": 1,
-              "avg 2": 2,
-              "max 1": 5,
-              "max 2": 6,
-              "metric 1": 0,
-              "metric 2": 7,
-              "min 1": 3,
-              "min 2": 4
-            },
-            "renameByName": {
-              "avg 1": "Avg (Current)",
-              "avg 2": "Avg (Baseline)",
-              "max 1": "Max (Current)",
-              "max 2": "Max (Baseline)",
-              "min 1": "Min (Current)",
-              "min 2": "Min (Baseline)"
-            }
-          }
+          "refId": "A"
         }
       ],
-      "type": "table"
+      "title": "Local Data Share (LDS)",
+      "type": "row"
     },
     {
       "collapsed": true,
@@ -5958,7 +5975,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 23
+        "y": 11
       },
       "id": 44,
       "panels": [
@@ -6206,7 +6223,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 24
+        "y": 12
       },
       "id": 203,
       "panels": [
@@ -6627,7 +6644,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 25
+        "y": 13
       },
       "id": 130,
       "panels": [
@@ -6970,7 +6987,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 26
+        "y": 14
       },
       "id": 112,
       "panels": [
@@ -7713,7 +7730,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 27
+        "y": 15
       },
       "id": 56,
       "panels": [
@@ -8435,7 +8452,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 28
+        "y": 16
       },
       "id": 66,
       "panels": [
@@ -12791,9 +12808,9 @@
       },
       {
         "current": {
-          "selected": false,
-          "text": "omniperf_gfxWL_test_gemm_BF16_profile10k_mi200",
-          "value": "omniperf_gfxWL_test_gemm_BF16_profile10k_mi200"
+          "selected": true,
+          "text": "omniperf_asw_mixbench_mi200",
+          "value": "omniperf_asw_mixbench_mi200"
         },
         "definition": "workload_names.names.aggregate([\n  {\"$group\": {\n    \"_id\": \"&name\"\n  }}\n]);",
         "hide": 0,
@@ -12812,8 +12829,8 @@
       {
         "current": {
           "selected": false,
-          "text": "148579",
-          "value": "148579"
+          "text": "435646",
+          "value": "435646"
         },
         "definition": "$Workload1.pmc_perf.aggregate([\n{\"$match\": {\n    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n  }},\n\n  {\"$group\": {\n      \"_id\": null,\n      \"myAvg\": {\n        \"$avg\": { \"$multiply\": [{ \"$divide\": [{ \"$subtract\": [\"&EndNs\", \"&BeginNs\"] }, 1000] }, $sclk] }\n      }\n  }},\n  {\"$set\": {\n    \"array\": [\n        {\n          \"_id\": { \"$round\": [\"&myAvg\", 0] }\n        }\n      ]\n  }},\n  {\"$unwind\": {\n    \"path\": \"&array\"\n  }},\n  {\"$replaceRoot\": {\n    \"newRoot\": \"&array\"\n  }}\n]);",
         "hide": 2,
@@ -12832,8 +12849,8 @@
       {
         "current": {
           "selected": false,
-          "text": "63",
-          "value": "63"
+          "text": "103",
+          "value": "103"
         },
         "definition": "$Workload1.pmc_perf.aggregate([\n  {\"$match\": {\n    \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n    \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n    \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n  }},\n  {\"$group\": {\n    \"_id\": null,\n    \"theAvg\": {\n      \"$avg\": {\n        \"$cond\":[\n          {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n          {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n          \"\"\n        ]\n      }\n    }\n  }},\n  {\"$set\": {\n    \"array\": [\n        {\n          \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU] }, 8] }] }, $numCU] }}\n        }\n    ]\n  }},\n  {\"$unwind\": {\n    \"path\": \"$array\"\n  }},\n  {\"$replaceRoot\": {\n    \"newRoot\": \"$array\"\n  }}\n]);",
         "hide": 2,
@@ -12937,9 +12954,9 @@
       },
       {
         "current": {
-          "selected": false,
-          "text": "miperf_asw_vcopy_mi200",
-          "value": "miperf_asw_vcopy_mi200"
+          "selected": true,
+          "text": "omniperf_asw_mixbench_mi200",
+          "value": "omniperf_asw_mixbench_mi200"
         },
         "definition": "workload_names.names.aggregate([\n  {\"$group\": {\n    \"_id\": \"&name\"\n  }}\n]);",
         "hide": 0,
@@ -12958,8 +12975,8 @@
       {
         "current": {
           "selected": false,
-          "text": "84",
-          "value": "84"
+          "text": "103",
+          "value": "103"
         },
         "definition": "$Workload2.pmc_perf.aggregate([\n{\"$match\": {\n    \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n    \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n    \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}\n  }},\n\n  {\"$group\": {\n    \"_id\": null,\n    \"theAvg\": {\n      \"$avg\": {\n        \"$cond\":[\n          {\"$ne\": [\"&GRBM_GUI_ACTIVE\", 0]},\n          {\"$divide\": [{ \"$multiply\": [4, \"&SQ_BUSY_CU_CYCLES\"] }, \"&GRBM_GUI_ACTIVE\"]},\n          \"\"\n        ]\n      }\n    }\n  }},\n  {\"$set\": {\n    \"array\": [\n        {\n          \"_id\": {\"$toInt\": { \"$min\": [{ \"$add\": [{ \"$multiply\": [{ \"$divide\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] },8] }, { \"$min\": [{ \"$mod\": [{ \"$round\": [\"$theAvg\", 0] }, $maxWavesPerCU2] }, 8] }] }, $numCU2] }}\n        }\n    ]\n  }},\n  {\"$unwind\": {\n    \"path\": \"&array\"\n  }},\n  {\"$replaceRoot\": {\n    \"newRoot\": \"&array\"\n  }}\n]);",
         "hide": 2,
@@ -13047,10 +13064,6 @@
             "$__all"
           ]
         },
-        "datasource": {
-          "type": "amd-miperf-data-plugin",
-          "uid": "oVK0I__nk"
-        },
         "definition": "$Workload2.pmc_perf.aggregate([\n  {\"$group\": {\n    \"_id\": \"&KernelName\"\n  }}\n]);",
         "hide": 0,
         "includeAll": true,
@@ -13200,8 +13213,8 @@
       {
         "current": {
           "selected": false,
-          "text": "110",
-          "value": "110"
+          "text": "104",
+          "value": "104"
         },
         "definition": "$Workload2.sysinfo.aggregate([\n    {\"$group\": {\n      \"_id\": \"&numCU\"\n    }}\n]);",
         "hide": 2,
@@ -13378,7 +13391,7 @@
   "timepicker": {},
   "timezone": "",
   "title": "Omniperf_v1.0.8_pub",
-  "uid": "MIPerf_v1_0_063020221121",
-  "version": 12,
+  "uid": "MIPerf_v1_0_0630202211210",
+  "version": 4,
   "weekStart": ""
 }
\ No newline at end of file

From 4aa33848de02b684ade6e2bb67a83bff59f24993 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Tue, 15 Aug 2023 14:00:36 -0500
Subject: [PATCH 72/81] Fixing several bugs on original PR

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf                             |  65 ++------
 src/omniperf_analyze/omniperf_analyze.py |  13 +-
 src/parser.py                            |  17 +--
 src/utils/csv_converter.py               | 185 ++++++++++-------------
 4 files changed, 95 insertions(+), 185 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index 4195ca64a..4689b02ac 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -53,8 +53,6 @@ from common import (
 
 from common import getVersion
 
-cache = dict()
-
 ################################################
 # Helper Functions
 ################################################
@@ -443,26 +441,12 @@ def characterize_app(args, VER):
     # Update timestamps
     replace_timestamps(workload_dir, log)
 
-    # Manually join each pmc_perf*.csv output
     if args.use_rocscope == False:
+        # Manually join each pmc_perf*.csv output
         join_prof(workload_dir, args.join_type, log, args.verbose)
-        #demangle
-        for filename in os.listdir(workload_dir):
-                try:
-                    # fileName = file[0 : file.find(".")]
-                    # Only shorten KernelNames if instructed to
-                    if args.kernelVerbose < 5:
-                        t1 = pd.read_csv(
-                            os.path.join(workload_dir, filename),
-                            on_bad_lines="skip",
-                            engine="python",
-                        )
-                        t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose)
-                        t2.to_csv(fname, index=False)
-                except pd.errors.EmptyDataError:
-                    print("Skipping empty csv " + filename)
-
-    # Close log
+        # Demangle and overwrite original KernelNames
+        csv_converter.kernel_name_shortener(workload_dir, args.kernelVerbose)
+        
     log.close()
 
 
@@ -559,6 +543,10 @@ def omniperf_profile(args, VER):
         print("IP Blocks: All")
     else:
         print("IP Blocks: ", args.ipblocks)
+    if args.kernelVerbose > 5:
+        print("KernelName verbose level: DISABLED")
+    else:
+        print("KernelName verbose level: ", str(args.kernelVerbose))
 
     # Set up directories
     workload_dir = args.path + "/" + args.name + "/" + args.target
@@ -680,25 +668,11 @@ def omniperf_profile(args, VER):
         # Update timestamps
         replace_timestamps(workload_dir, log)
         
-        # Manually join each pmc_perf*.csv output
         if args.use_rocscope == False:
+            # Manually join each pmc_perf*.csv output
             join_prof(workload_dir, args.join_type, log, args.verbose)
-            #demangle
-            for filename in os.listdir(workload_dir):
-                if filename.endswith('.csv'):
-                    try:
-                        # fileName = file[0 : file.find(".")]
-                        # Only shorten KernelNames if instructed to
-                        if args.kernelVerbose < 5:
-                            t1 = pd.read_csv(
-                                os.path.join(workload_dir, filename),
-                                on_bad_lines="skip",
-                                engine="python",
-                            )
-                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose)
-                            t2.to_csv(os.path.join(workload_dir, filename), index=False)
-                    except pd.errors.EmptyDataError:
-                        print("Skipping empty csv " + filename)
+            # Demangle and overwrite original KernelNames
+            csv_converter.kernel_name_shortener(workload_dir, args.kernelVerbose)
 
     # Generate sysinfo
     gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof)
@@ -814,23 +788,6 @@ def main():
         else:
             print("\n-------------\nProfile only\n-------------\n")
             omniperf_profile(args, VER)
-            workload_dir = args.path
-            #demangle
-            for filename in os.listdir(workload_dir):
-                if filename.endswith('.csv'):
-                    try:
-                        # fileName = file[0 : file.find(".")]
-                        # Only shorten KernelNames if instructed to
-                        if args.kernelVerbose < 5:
-                            t1 = pd.read_csv(
-                                os.path.join(workload_dir, filename),
-                                on_bad_lines="skip",
-                                engine="python",
-                            )
-                            t2 = csv_converter.kernel_name_shortener(t1, cache, level=args.kernelVerbose)
-                            t2.to_csv(os.path.join(workload_dir, filename), index=False)
-                    except pd.errors.EmptyDataError:
-                        print("Skipping empty csv " + filename)
 
     ##############
     # DATABASE MODE
diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 123bdd15a..099618e8a 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -46,11 +46,9 @@
 from omniperf_analyze.utils import parser, file_io
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
 from utils import csv_converter
-import pandas as pd
 
 archConfigs = {}
 
-
 ################################################
 # Helper Functions
 ################################################
@@ -222,16 +220,9 @@ def run_cli(args, runs):
     # If we assume the panel layout for all archs are similar, it doesn't matter
     # which archConfig passed into show_all function.
     # After decide to how to manage kernels display patterns, we can revisit it.
-    cache = dict()
     for d in args.path:
-        # demangle
-        for filename in os.listdir(d[0]):
-            if filename.endswith(".csv"):
-                df = pd.read_csv(os.path.join(d[0], filename))
-                new_df = csv_converter.kernel_name_shortener(
-                    df, cache, args.kernelVerbose
-                )
-                new_df.to_csv(os.path.join(d[0], filename), index=False)
+        # Demangle and overwrite original KernelNames
+        csv_converter.kernel_name_shortener(d[0], args.kernelVerbose)
 
         file_io.create_df_kernel_top_stats(
             d[0],
diff --git a/src/parser.py b/src/parser.py
index 8de09542f..e8eb28940 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -205,11 +205,10 @@ def parse(my_parser):
         help="\t\t\tProvide command for profiling after double dash.",
     )
     profile_group.add_argument(
-        "-f",
         "--kernelVerbose",
         required=False,
         metavar="",
-        help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)",
+        help="\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)",
         default=2,
         type=int,
     )
@@ -351,15 +350,6 @@ def parse(my_parser):
         dest="workload",
         help="\t\t\t\tSpecify name of workload (to remove) or path to workload (to import)",
     )
-    connection_group.add_argument(
-        "-k",
-        "--kernelVerbose",
-        required=False,
-        metavar="",
-        help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)",
-        default=2,
-        type=int,
-    )
 
     ## Analyze Command Line Options
     ## ----------------------------
@@ -524,11 +514,10 @@ def parse(my_parser):
         help="\t\tRandomly generate a port to launch GUI application.\n\t\tRegistered Ports range inclusive (1024-49151).",
     )
     analyze_group.add_argument(
-        "-f",
         "--kernelVerbose",
         required=False,
         metavar="",
-        help="\t\t\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 2) (DISABLE: 5)",
-        default=2,
+        help="\t\tSpecify Kernel Name verbose level 1-5. Lower the level, shorter the kernel name. (DEFAULT: 5) (DISABLE: 5)",
+        default=5,
         type=int,
     )
diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index bffe56cc5..428b4de91 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -32,90 +32,106 @@
 import getpass
 from pymongo import MongoClient
 from tqdm import tqdm
-import shutil
-
+import glob
 
 cache = dict()
+
 supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
 MAX_SERVER_SEL_DELAY = 5000  # 5 sec connection timeout
 
 
-def kernel_name_shortener(df, cache, level):
-    if level >= 5:
-        return df
+def kernel_name_shortener(workload_dir, level):
 
-    columnName = ""
-    if "KernelName" in df:
-        columnName = "KernelName"
-    if "Name" in df:
-        columnName = "Name"
+    def shorten_file(df, level):
+        global cache
 
-    if columnName == "KernelName" or columnName == "Name":
-        # loop through all indices
-        for index in df.index:
-            original_name = df.loc[index, columnName]
-            if original_name in cache:
-                continue
+        columnName = ""
+        if "KernelName" in df:
+            columnName = "KernelName"
+        if "Name" in df:
+            columnName = "Name"
 
-            cmd = ["/opt/rocm/llvm/bin/llvm-cxxfilt", original_name]
+        if columnName == "KernelName" or columnName == "Name":
+            # loop through all indices
+            for index in df.index:
+                original_name = df.loc[index, columnName]
+                if original_name in cache:
+                    continue
 
-            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                cmd = ["/opt/rocm/llvm/bin/llvm-cxxfilt", original_name]
 
-            demangled_name, e = proc.communicate()
-            demangled_name = str(demangled_name, "UTF-8").strip()
+                proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
-            # cache miss, add the shortened name to the dictionary
-            new_name = ""
-            matches = ""
+                demangled_name, e = proc.communicate()
+                demangled_name = str(demangled_name, "UTF-8").strip()
 
-            names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
+                # cache miss, add the shortened name to the dictionary
+                new_name = ""
+                matches = ""
 
-            # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
-            if names_and_args.search(demangled_name):
-                matches = names_and_args.findall(demangled_name)
-            else:
-                # Works for first case  '__amd_rocclr_fillBuffer.kd'
-                cache[original_name] = new_name
-                if new_name == None or new_name == "":
-                    cache[original_name] = demangled_name
-                continue
+                names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
 
-            current_level = 0
-            for name in matches:
-                ##can cause errors if a function name or argument is equal to 'clone'
-                if name[0] == "clone":
+                # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
+                if names_and_args.search(demangled_name):
+                    matches = names_and_args.findall(demangled_name)
+                else:
+                    # Works for first case  '__amd_rocclr_fillBuffer.kd'
+                    cache[original_name] = new_name
+                    if new_name == None or new_name == "":
+                        cache[original_name] = demangled_name
                     continue
-                if len(name) == 3:
-                    if name[2] == "::":
-                        continue
 
-                if current_level < level:
-                    new_name += name[0]
-                # closing '>' is to be taken account by the while loop
-                if name[1].count(">") == 0:
-                    if current_level < level:
-                        if not (current_level == level - 1 and name[1].count("<") > 0):
-                            new_name += name[1]
-                    current_level += name[1].count("<")
+                current_level = 0
+                for name in matches:
+                    ##can cause errors if a function name or argument is equal to 'clone'
+                    if name[0] == "clone":
+                        continue
+                    if len(name) == 3:
+                        if name[2] == "::":
+                            continue
 
-                curr_index = 0
-                # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
-                while name[1].count(">") > 0 and curr_index < len(name[1]):
                     if current_level < level:
-                        new_name += name[1][curr_index:]
-                        current_level -= name[1][curr_index:].count(">")
-                        curr_index = len(name[1])
-                    elif name[1][curr_index] == (">"):
-                        current_level -= 1
-                    curr_index += 1
+                        new_name += name[0]
+                    # closing '>' is to be taken account by the while loop
+                    if name[1].count(">") == 0:
+                        if current_level < level:
+                            if not (current_level == level - 1 and name[1].count("<") > 0):
+                                new_name += name[1]
+                        current_level += name[1].count("<")
+
+                    curr_index = 0
+                    # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
+                    while name[1].count(">") > 0 and curr_index < len(name[1]):
+                        if current_level < level:
+                            new_name += name[1][curr_index:]
+                            current_level -= name[1][curr_index:].count(">")
+                            curr_index = len(name[1])
+                        elif name[1][curr_index] == (">"):
+                            current_level -= 1
+                        curr_index += 1
 
-            cache[original_name] = new_name
-            if new_name == None or new_name == "":
-                cache[original_name] = demangled_name
+                cache[original_name] = new_name
+                if new_name == None or new_name == "":
+                    cache[original_name] = demangled_name
 
-        df[columnName] = df[columnName].map(cache)
+            df[columnName] = df[columnName].map(cache)
 
-    return df
+        return df
+    
+    # Only shorten if valid shortening level
+    if level < 5:
+        for fpath in glob.glob(workload_dir + "/*.csv"):
+            try:
+                orig_df = pd.read_csv(
+                    fpath,
+                    on_bad_lines="skip",
+                    engine="python",
+                )
+                modified_df = shorten_file(orig_df, level)
+                modified_df.to_csv(fpath, index=False)
+            except pd.errors.EmptyDataError:
+                print("Skipping empty csv " + str(fpath))
+            print("hi")
 
 
 # Verify target directory and setup connection
@@ -152,13 +168,6 @@ def parse(args, profileAndExport):
 
     db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc
 
-    # if Extractionlvl >= 5:
-    #     print("KernelName shortening disabled")
-    # else:
-    #     print("KernelName shortening enabled")
-
-    # print("Kernel name verbose level:", Extractionlvl)
-
     if args.password == "":
         try:
             password = getpass.getpass()
@@ -203,16 +212,7 @@ def convert_folder(connectionInfo, Extractionlvl):
     except:
         print("ERROR: Unable to connect to the server")
         sys.exit(1)
-    # Set up directories
-    # if Extractionlvl < 5:
-    #     newfilepath = connectionInfo["workload"]
-    #     newfilepath_h = newfilepath + "/renamedFiles/"
-    #     if not os.path.exists(newfilepath_h):
-    #         os.mkdir(newfilepath_h)
-    #     newfilepath = newfilepath_h + connectionInfo["db"] + "/"
-    #     if not os.path.exists(newfilepath):
-    #         os.mkdir(newfilepath)
-    # Upload files
+
     i = 0
     file = "blank"
     for file in tqdm(os.listdir(connectionInfo["workload"])):
@@ -220,30 +220,6 @@ def convert_folder(connectionInfo, Extractionlvl):
             print(connectionInfo["workload"] + "/" + file)
             try:
                 fileName = file[0 : file.find(".")]
-                # Only shorten KernelNames if instructed to
-                # if Extractionlvl < 5:
-                #     t1 = pd.read_csv(
-                #         connectionInfo["workload"] + "/" + file,
-                #         on_bad_lines="skip",
-                #         engine="python",
-                #     )
-
-                #     t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
-                #     df_saved_file = t2.to_csv(newfilepath + file)
-
-                #     cmd = (
-                #         "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
-                #     ).format(
-                #         connectionInfo["username"],
-                #         connectionInfo["password"],
-                #         connectionInfo["host"],
-                #         connectionInfo["port"],
-                #         connectionInfo["db"],
-                #         newfilepath + file,
-                #         fileName,
-                #     )
-                #     os.system(cmd)
-                # else:
                 cmd = (
                     "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
                 ).format(
@@ -265,8 +241,5 @@ def convert_folder(connectionInfo, Extractionlvl):
     value = {"name": connectionInfo["db"]}
     newValue = {"name": connectionInfo["db"]}
     mycol.replace_one(value, newValue, upsert=True)
-    # Remove tmp directory if we shortened KernelNames
-    # if Extractionlvl < 5:
-    #     shutil.rmtree(newfilepath_h)
     print("{} collections added.".format(i))
     print("Workload name uploaded")

From dc849b264c51181547a2917abc38a24aa032754d Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Tue, 15 Aug 2023 14:09:38 -0500
Subject: [PATCH 73/81] Conform to Python formatting

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf_analyze/omniperf_analyze.py |  1 +
 src/utils/csv_converter.py               | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 099618e8a..0b54a696c 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -49,6 +49,7 @@
 
 archConfigs = {}
 
+
 ################################################
 # Helper Functions
 ################################################
diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index 428b4de91..bd199ac3a 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -41,7 +41,6 @@
 
 
 def kernel_name_shortener(workload_dir, level):
-
     def shorten_file(df, level):
         global cache
 
@@ -60,7 +59,9 @@ def shorten_file(df, level):
 
                 cmd = ["/opt/rocm/llvm/bin/llvm-cxxfilt", original_name]
 
-                proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                proc = subprocess.Popen(
+                    cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+                )
 
                 demangled_name, e = proc.communicate()
                 demangled_name = str(demangled_name, "UTF-8").strip()
@@ -69,7 +70,9 @@ def shorten_file(df, level):
                 new_name = ""
                 matches = ""
 
-                names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
+                names_and_args = re.compile(
+                    r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?"
+                )
 
                 # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
                 if names_and_args.search(demangled_name):
@@ -95,7 +98,9 @@ def shorten_file(df, level):
                     # closing '>' is to be taken account by the while loop
                     if name[1].count(">") == 0:
                         if current_level < level:
-                            if not (current_level == level - 1 and name[1].count("<") > 0):
+                            if not (
+                                current_level == level - 1 and name[1].count("<") > 0
+                            ):
                                 new_name += name[1]
                         current_level += name[1].count("<")
 
@@ -117,7 +122,7 @@ def shorten_file(df, level):
             df[columnName] = df[columnName].map(cache)
 
         return df
-    
+
     # Only shorten if valid shortening level
     if level < 5:
         for fpath in glob.glob(workload_dir + "/*.csv"):

From 8ddc1a3fe1d1ffb940f538d5adad484230354514 Mon Sep 17 00:00:00 2001
From: Cole Ramos <colramos@amd.com>
Date: Wed, 19 Jul 2023 10:47:12 -0500
Subject: [PATCH 74/81] Update issue templates

---
 .github/ISSUE_TEMPLATE/bug_report.md | 33 ++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 000000000..4dec4e1b1
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,33 @@
+---
+name: Bug report
+about: Report a bug you've encountered for further investigation
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**Development Environment:**
+ - Linux Distribution: [e.g. Ubuntu20.04, RHEL8]
+ - Omniperf Version: [e.g. try `omniperf --version`]
+ - GPU: [e.g. Mi100, Mi200]
+ - Custer (if applicable): [e.g. Crusher, ]
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Run '...'
+2. Go to '...'
+2. Click on '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Additional context**
+Add any other context about the problem here.

From d4c4c733678ad8c1e29602f26d2b750b66743311 Mon Sep 17 00:00:00 2001
From: Cole Ramos <colramos@amd.com>
Date: Wed, 19 Jul 2023 10:49:01 -0500
Subject: [PATCH 75/81] Update issue templates

---
 .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md

diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 000000000..11fc491ef
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.

From 132cb37d692efc108411b18f56ac8a6132c8d7c6 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Tue, 15 Aug 2023 15:04:23 -0500
Subject: [PATCH 76/81] add pct of peak to tables

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 src/omniperf_analyze/configs/gfx906/1200_lds.yaml          | 5 +++++
 .../configs/gfx906/1300_instruction-cache.yaml             | 3 +++
 .../configs/gfx906/1400_constant-cache.yaml                | 3 +++
 src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml     | 5 +++++
 src/omniperf_analyze/configs/gfx908/1200_lds.yaml          | 5 +++++
 .../configs/gfx908/1300_instruction-cache.yaml             | 3 +++
 .../configs/gfx908/1400_constant-cache.yaml                | 3 +++
 src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml     | 5 +++++
 .../configs/gfx90a/1100_compute-unit-compute-pipeline.yaml | 7 +++++++
 src/omniperf_analyze/configs/gfx90a/1200_lds.yaml          | 5 +++++
 .../configs/gfx90a/1300_instruction-cache.yaml             | 3 +++
 .../configs/gfx90a/1400_constant-cache.yaml                | 3 +++
 src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml     | 5 +++++
 13 files changed, 55 insertions(+)

diff --git a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
index 218ad2cda..3fd52c3b1 100644
--- a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -23,17 +24,21 @@ Panel Config:
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
+            unit: Pct of Peak
             tips: 
           Access Rate:
             value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU)))
+            unit: Pct of Peak
             tips: 
           Bandwidth (Pct-of-Peak):
             value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
               / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
+            unit: Pct of Peak
             tips: 
           Bank Conflict Rate:
             value: AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
               if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
+            unit: Pct of Peak
             tips:
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
index fb9f384e1..361cb9ae2 100644
--- a/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1300_instruction-cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,10 +25,12 @@ Panel Config:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
               * (EndNs - BeginNs))))
+            unit: Pct of Peak
             tips: 
           Cache Hit:
             value: AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES)
               + SQC_ICACHE_MISSES_DUPLICATE)))
+            unit: Pct of Peak
             tips: 
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
index 91a2d6c9f..eec636b38 100644
--- a/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1400_constant-cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           mertic: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,11 +25,13 @@ Panel Config:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
               * (EndNs - BeginNs))))
+            unit: Pct of Peak
             tips:
           Cache Hit:
             value:
               AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE))
               if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
+            unit: Pct of Peak
             tips:
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
index c734e21c8..e1a7e29cc 100644
--- a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,20 +25,24 @@ Panel Config:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
               * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None))
+            unit: Pct of Peak
             tips: 
           Cache Util:
             value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
               != 0) else None))
+            unit: Pct of Peak
             tips: 
           Cache BW:
             value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
               / ((($sclk / 1000) * 64) * $numCU))
+            unit: Pct of Peak
             tips: 
           Cache Hit:
             value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
               + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
               / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
               None))
+            unit: Pct of Peak
             tips: 
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
index 218ad2cda..3fd52c3b1 100644
--- a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -23,17 +24,21 @@ Panel Config:
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
+            unit: Pct of Peak
             tips: 
           Access Rate:
             value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU)))
+            unit: Pct of Peak
             tips: 
           Bandwidth (Pct-of-Peak):
             value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
               / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
+            unit: Pct of Peak
             tips: 
           Bank Conflict Rate:
             value: AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
               if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
+            unit: Pct of Peak
             tips:
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
index fb9f384e1..361cb9ae2 100644
--- a/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1300_instruction-cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,10 +25,12 @@ Panel Config:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
               * (EndNs - BeginNs))))
+            unit: Pct of Peak
             tips: 
           Cache Hit:
             value: AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES)
               + SQC_ICACHE_MISSES_DUPLICATE)))
+            unit: Pct of Peak
             tips: 
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
index 91a2d6c9f..eec636b38 100644
--- a/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1400_constant-cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           mertic: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,11 +25,13 @@ Panel Config:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
               * (EndNs - BeginNs))))
+            unit: Pct of Peak
             tips:
           Cache Hit:
             value:
               AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE))
               if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
+            unit: Pct of Peak
             tips:
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
index 7eeed0477..22efba955 100644
--- a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,20 +25,24 @@ Panel Config:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
               * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None))
+            unit: Pct of Peak
             tips: 
           Cache Util:
             value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
               != 0) else None))
+            unit: Pct of Peak
             tips: 
           Cache BW:
             value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
               / ((($sclk / 1000) * 64) * $numCU))
+            unit: Pct of Peak
             tips: 
           Cache Hit:
             value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
               + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
               / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
               None))
+            unit: Pct of Peak
             tips: 
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
index 3e29bc4a2..769212ed5 100644
--- a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -28,26 +29,32 @@ Panel Config:
               + (64 * (((SQ_INSTS_VALU_ADD_F64 + SQ_INSTS_VALU_MUL_F64) + SQ_INSTS_VALU_TRANS_F64)
               + (2 * SQ_INSTS_VALU_FMA_F64)))) / (EndNs - BeginNs)))) / (((($sclk
               * $numCU) * 64) * 2) / 1000))
+            unit: Pct of Peak
             tips: 
           mfma_flops_bf16_pop:
             value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_BF16 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 512) / 1000))
+            unit: Pct of Peak
             tips: 
           mfma_flops_f16_pop:
             value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F16 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 1024) / 1000))
+            unit: Pct of Peak
             tips: 
           mfma_flops_f32_pop:
             value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F32 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 256) / 1000))
+            unit: Pct of Peak
             tips: 
           mfma_flops_f64_pop:
             value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_F64 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 256) / 1000))
+            unit: Pct of Peak
             tips: 
           mfma_flops_i8_pop:
             value: ((100 * AVG(((SQ_INSTS_VALU_MFMA_MOPS_I8 * 512) / (EndNs - BeginNs))))
               / ((($sclk * $numCU) * 1024) / 1000))
+            unit: Pct of Peak
             tips: 
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
index 218ad2cda..3fd52c3b1 100644
--- a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -23,17 +24,21 @@ Panel Config:
         metric:
           Utilization:
             value: AVG(((100 * SQ_LDS_IDX_ACTIVE) / (GRBM_GUI_ACTIVE * $numCU)))
+            unit: Pct of Peak
             tips: 
           Access Rate:
             value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU)))
+            unit: Pct of Peak
             tips: 
           Bandwidth (Pct-of-Peak):
             value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks))
               / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128)))
+            unit: Pct of Peak
             tips: 
           Bank Conflict Rate:
             value: AVG((((SQ_LDS_BANK_CONFLICT * 3.125) / (SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT))
               if ((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) != 0) else None))
+            unit: Pct of Peak
             tips:
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml b/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
index 1a7000e93..c287c13bd 100644
--- a/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1300_instruction-cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,10 +25,12 @@ Panel Config:
           Bandwidth:
             value: AVG(((SQC_ICACHE_REQ * 100000) / (($sclk * $numSQC)
               * (EndNs - BeginNs))))
+            unit: Pct of Peak
             tips: 
           Cache Hit:
             value: AVG(((SQC_ICACHE_HITS * 100) / ((SQC_ICACHE_HITS + SQC_ICACHE_MISSES)
               + SQC_ICACHE_MISSES_DUPLICATE)))
+            unit: Pct of Peak
             tips: 
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml b/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
index 91a2d6c9f..eec636b38 100644
--- a/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1400_constant-cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           mertic: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,11 +25,13 @@ Panel Config:
           Bandwidth:
             value: AVG(((SQC_DCACHE_REQ * 100000) / (($sclk * $numSQC)
               * (EndNs - BeginNs))))
+            unit: Pct of Peak
             tips:
           Cache Hit:
             value:
               AVG((((SQC_DCACHE_HITS * 100) / (SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE))
               if ((SQC_DCACHE_HITS + SQC_DCACHE_MISSES + SQC_DCACHE_MISSES_DUPLICATE) != 0) else None))
+            unit: Pct of Peak
             tips:
 
     - metric_table:
diff --git a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
index 7ea26db05..28450f2ae 100644
--- a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml
@@ -14,6 +14,7 @@ Panel Config:
         header:
           metric: Metric
           value: Value
+          unit: Unit
           tips: Tips
         style:
           type: simple_bar
@@ -24,20 +25,24 @@ Panel Config:
           Buffer Coalescing:
             value: AVG(((((TA_TOTAL_WAVEFRONTS_sum * 64) * 100) / (TCP_TOTAL_ACCESSES_sum
               * 4)) if (TCP_TOTAL_ACCESSES_sum != 0) else None))
+            unit: Pct of Peak
             tips: 
           Cache Util:
             value: AVG((((TCP_GATE_EN2_sum * 100) / TCP_GATE_EN1_sum) if (TCP_GATE_EN1_sum
               != 0) else None))
+            unit: Pct of Peak
             tips: 
           Cache BW:
             value: ((100 * AVG(((TCP_TOTAL_CACHE_ACCESSES_sum * 64) / (EndNs - BeginNs))))
               / ((($sclk / 1000) * 64) * $numCU))
+            unit: Pct of Peak
             tips: 
           Cache Hit:
             value: AVG(((100 - ((100 * (((TCP_TCC_READ_REQ_sum + TCP_TCC_WRITE_REQ_sum)
               + TCP_TCC_ATOMIC_WITH_RET_REQ_sum) + TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum))
               / TCP_TOTAL_CACHE_ACCESSES_sum)) if (TCP_TOTAL_CACHE_ACCESSES_sum != 0) else
                None))
+            unit: Pct of Peak
             tips: 
 
     - metric_table:

From 6d3995570802959bf0c49e41ce731f64293f6a04 Mon Sep 17 00:00:00 2001
From: JoseSantosAMD <Jose.Santos@amd.com>
Date: Tue, 15 Aug 2023 15:14:20 -0500
Subject: [PATCH 77/81] add simple_bar styling

Signed-off-by: JoseSantosAMD <Jose.Santos@amd.com>
---
 .../configs/gfx90a/1100_compute-unit-compute-pipeline.yaml   | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
index 770087569..be1ece043 100644
--- a/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
+++ b/src/omniperf_analyze/configs/gfx90a/1100_compute-unit-compute-pipeline.yaml
@@ -15,6 +15,11 @@ Panel Config:
           metric: Metric
           value: Value
           tips: Tips
+        style:
+          type: simple_bar
+          range_color: [1, 100]
+          label_txt: (%)
+          xrange: [0, 110]
         metric:
           valu_flops_pop:
             value: ((100 * AVG(((((64 * (((SQ_INSTS_VALU_ADD_F16 + SQ_INSTS_VALU_MUL_F16)

From 74f816ed47d09ef0b8e3a12eec7b6c54e9b4e766 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 16 Aug 2023 13:48:31 -0500
Subject: [PATCH 78/81] Add a -d option to analyze mode for consistency with
 profile

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/parser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/parser.py b/src/parser.py
index e8eb28940..1a692cc16 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -429,6 +429,7 @@ def parse(my_parser):
         help="\t\tSpecify kernel id(s) from --list-kernels for filtering.",
     )
     analyze_group.add_argument(
+        "-d",
         "--dispatch",
         dest="gpu_dispatch_id",
         metavar="",

From 5d74b142b6434cfeb2f0f1f75c42bee51bae375c Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 16 Aug 2023 14:16:50 -0500
Subject: [PATCH 79/81] Prevent users from ipblock filtering in standalone GUI

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf                             | 32 +++++++++++++++++-------
 src/omniperf_analyze/omniperf_analyze.py |  2 +-
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/omniperf b/src/omniperf
index 4689b02ac..fc7c7cfa9 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -856,6 +856,20 @@ def main():
                         my_parser,
                         "Access denied. Cannot access parent directories in path ../",
                     )
+                if args.filter_metrics and args.gui:
+                    throw_parse_error(
+                        my_parser,
+                        """
+                        omniperf analyze --path <workload_path> [analyze options]
+                        \n\n-------------------------------------------------------------------------------
+                        \nExamples:
+                        \n\tomniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
+                        \n\tomniperf analyze -p workloads/mixbench/mi200/ --filter-dispatch-ids 12 34 --decimal 3
+                        \n\tomniperf analyze -p workloads/mixbench/mi200/ --gui
+                        \n-------------------------------------------------------------------------------\n
+                        \ntool: error: --gui cannot be used in combination with: -b/--metric
+                        """
+                    )
                 print("\n--------\nAnalyze\n--------\n")
                 # Ensure absolute path
                 for dir in args.path:
@@ -877,15 +891,15 @@ def main():
                 throw_parse_error(
                     my_parser,
                     """
-                                        omniperf analyze --path <workload_path> [analyze options]
-                                        \n\n-------------------------------------------------------------------------------
-                                        \nExamples:
-                                        \n\tomniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
-                                        \n\tomniperf analyze -p workloads/mixbench/mi200/ --filter-dispatch-ids 12 34 --decimal 3
-                                        \n\tomniperf analyze -p workloads/mixbench/mi200/ --gui
-                                        \n-------------------------------------------------------------------------------\n
-                                        \ntool: error: the following arguments are required: -p/--path
-                                        """,
+                    omniperf analyze --path <workload_path> [analyze options]
+                    \n\n-------------------------------------------------------------------------------
+                    \nExamples:
+                    \n\tomniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a
+                    \n\tomniperf analyze -p workloads/mixbench/mi200/ --filter-dispatch-ids 12 34 --decimal 3
+                    \n\tomniperf analyze -p workloads/mixbench/mi200/ --gui
+                    \n-------------------------------------------------------------------------------\n
+                    \ntool: error: the following arguments are required: -p/--path
+                    """,
                 )
 
     sys.exit(0)  # Indicate successful on exit
diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index 7e62556b1..ac06a5c8a 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -268,7 +268,7 @@ def roofline_only(path_to_dir, dev_id, sort_type, mem_level, kernel_names, verbo
     app_path = path_to_dir + "/pmc_perf.csv"
     roofline_exists = os.path.isfile(app_path)
     if not roofline_exists:
-        print("Error: {} does not exist")
+        print("Error: {} does not exist".format(app_path))
         sys.exit(0)
     t_df = OrderedDict()
     t_df["pmc_perf"] = pd.read_csv(app_path)

From f27142160cf68e16a54d134e5a24ce56c93f2365 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 16 Aug 2023 15:34:55 -0500
Subject: [PATCH 80/81] Remove debug logging

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/utils/csv_converter.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py
index bd199ac3a..cc3880446 100644
--- a/src/utils/csv_converter.py
+++ b/src/utils/csv_converter.py
@@ -136,7 +136,6 @@ def shorten_file(df, level):
                 modified_df.to_csv(fpath, index=False)
             except pd.errors.EmptyDataError:
                 print("Skipping empty csv " + str(fpath))
-            print("hi")
 
 
 # Verify target directory and setup connection

From 3ae0198f8e0559c957536c1f33e1d5a7d7c21248 Mon Sep 17 00:00:00 2001
From: coleramos425 <colramos@amd.com>
Date: Wed, 16 Aug 2023 16:16:46 -0500
Subject: [PATCH 81/81] Renaming csv_converter to make more succinct

Signed-off-by: coleramos425 <colramos@amd.com>
---
 src/omniperf                                     | 10 +++++-----
 src/omniperf_analyze/omniperf_analyze.py         |  4 ++--
 src/utils/{csv_converter.py => csv_processor.py} |  0
 3 files changed, 7 insertions(+), 7 deletions(-)
 rename src/utils/{csv_converter.py => csv_processor.py} (100%)

diff --git a/src/omniperf b/src/omniperf
index fc7c7cfa9..eb7a75ff8 100755
--- a/src/omniperf
+++ b/src/omniperf
@@ -40,7 +40,7 @@ from parser import parse
 from utils import specs
 from utils.perfagg import perfmon_filter, pmc_filter, pmc_perf_split, join_prof
 from utils import remove_workload
-from utils import csv_converter  # Import workload
+from utils import csv_processor  # Import workload
 from omniperf_analyze.omniperf_analyze import roofline_only  # Standalone roofline
 from omniperf_analyze.omniperf_analyze import analyze  # CLI analysis
 
@@ -254,10 +254,10 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof):
 
 def mongo_import(args, profileAndImport):
     # Validate target directory
-    connectionInfo, Extractionlvl = csv_converter.parse(args, profileAndImport)
+    connectionInfo, Extractionlvl = csv_processor.parse(args, profileAndImport)
     # Convert and upload data
     print("-- Conversion & Upload in Progress --")
-    csv_converter.convert_folder(connectionInfo, Extractionlvl)
+    csv_processor.convert_folder(connectionInfo, Extractionlvl)
     print("-- Complete! --")
 
 ################################################
@@ -445,7 +445,7 @@ def characterize_app(args, VER):
         # Manually join each pmc_perf*.csv output
         join_prof(workload_dir, args.join_type, log, args.verbose)
         # Demangle and overwrite original KernelNames
-        csv_converter.kernel_name_shortener(workload_dir, args.kernelVerbose)
+        csv_processor.kernel_name_shortener(workload_dir, args.kernelVerbose)
         
     log.close()
 
@@ -672,7 +672,7 @@ def omniperf_profile(args, VER):
             # Manually join each pmc_perf*.csv output
             join_prof(workload_dir, args.join_type, log, args.verbose)
             # Demangle and overwrite original KernelNames
-            csv_converter.kernel_name_shortener(workload_dir, args.kernelVerbose)
+            csv_processor.kernel_name_shortener(workload_dir, args.kernelVerbose)
 
     # Generate sysinfo
     gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof)
diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
index ac06a5c8a..3485d33a5 100644
--- a/src/omniperf_analyze/omniperf_analyze.py
+++ b/src/omniperf_analyze/omniperf_analyze.py
@@ -45,7 +45,7 @@
 from pathlib import Path
 from omniperf_analyze.utils import parser, file_io
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
-from utils import csv_converter
+from utils import csv_processor
 
 archConfigs = {}
 
@@ -223,7 +223,7 @@ def run_cli(args, runs):
     # After decide to how to manage kernels display patterns, we can revisit it.
     for d in args.path:
         # Demangle and overwrite original KernelNames
-        csv_converter.kernel_name_shortener(d[0], args.kernelVerbose)
+        csv_processor.kernel_name_shortener(d[0], args.kernelVerbose)
 
         file_io.create_df_kernel_top_stats(
             d[0],
diff --git a/src/utils/csv_converter.py b/src/utils/csv_processor.py
similarity index 100%
rename from src/utils/csv_converter.py
rename to src/utils/csv_processor.py