Merge branch 'dev' of https://github.com/AMDResearch/omniperf into en…

…hancement_133
ROCm · Jul 10, 2023 · 2d28360 · 2d28360
2 parents ecc3986 + 8edba71
commit 2d28360
Show file tree

Hide file tree

Showing 9 changed files with 114 additions and 50 deletions.
diff --git a/docker/rhel8/rocm.repo b/docker/rhel8/rocm.repo
@@ -7,7 +7,7 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
 
 [amdgpu]
 name=amdgpu
-baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.5/main/x86_64
+baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64
 enabled=1
 gpgcheck=1
 gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
diff --git a/src/omniperf b/src/omniperf
@@ -439,23 +439,7 @@ def characterize_app(args, VER):
         else:
             run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose)
 
-
-
-    # run again with timestamps
-    success, output = capture_subprocess_output(
-        [
-            rocprof_cmd,
-            # "-i", fname,
-            # "-m", perfmon_dir + "/" + "metrics.xml",
-            "--timestamp",
-            "on",
-            "-o",
-            workload_dir + "/" + "timestamps.csv",
-            '"' + app_cmd + '"',
-        ]
-    )
-    log.write(output)
-    # Update pmc_perf.csv timestamps
+    # Update timestamps
     replace_timestamps(workload_dir, log)
 
     # Manually join each pmc_perf*.csv output
@@ -676,21 +660,7 @@ def omniperf_profile(args, VER):
             else:
                 run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose)
 
-        # run again with timestamps
-        success, output = capture_subprocess_output(
-            [
-                rocprof_cmd,
-                # "-i", fname,
-                # "-m", perfmon_dir + "/" + "metrics.xml",
-                "--timestamp",
-                "on",
-                "-o",
-                workload_dir + "/" + "timestamps.csv",
-                '"' + args.remaining + '"',
-            ]
-        )
-        log.write(output)
-        # Update pmc_perf.csv timestamps
+        # Update timestamps
         replace_timestamps(workload_dir, log)
 
         # Manually join each pmc_perf*.csv output

diff --git a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value

diff --git a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value

diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml
@@ -11,6 +11,7 @@ Panel Config:
   data source:
     - metric_table:
         id: 201
+        title: Speed-of-Light
         header:
           metric: Metric
           value: Value

diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py
@@ -47,36 +47,50 @@
 from omniperf_analyze.utils.gui_components.roofline import get_roofline
 
 
-def initialize_run(args, normalization_filter=None):
-    import pandas as pd
-    from collections import OrderedDict
+################################################
+# Helper Functions
+################################################
+def generate_configs(config_dir, list_kernels, filter_metrics):
     from omniperf_analyze.utils import schema
-    from tabulate import tabulate
 
-    # Fixme: cur_root.parent.joinpath('soc_params')
-    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
-    soc_spec_df = file_io.load_soc_params(soc_params_dir)
-
-    single_panel_config = file_io.is_single_panel_config(Path(args.config_dir))
+    single_panel_config = file_io.is_single_panel_config(Path(config_dir))
     global archConfigs
     archConfigs = {}
     for arch in file_io.supported_arch.keys():
         ac = schema.ArchConfig()
-        if args.list_kernels:
+        if list_kernels:
             ac.panel_configs = file_io.top_stats_build_in_config
         else:
             arch_panel_config = (
-                args.config_dir if single_panel_config else args.config_dir.joinpath(arch)
+                config_dir if single_panel_config else config_dir.joinpath(arch)
             )
             ac.panel_configs = file_io.load_panel_configs(arch_panel_config)
 
         # TODO: filter_metrics should/might be one per arch
         # print(ac)
 
-        parser.build_dfs(ac, args.filter_metrics)
+        parser.build_dfs(ac, filter_metrics)
 
         archConfigs[arch] = ac
 
+    return archConfigs  # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin
+
+
+################################################
+# Core Functions
+################################################
+def initialize_run(args, normalization_filter=None):
+    import pandas as pd
+    from collections import OrderedDict
+    from tabulate import tabulate
+    from omniperf_analyze.utils import schema
+
+    # Fixme: cur_root.parent.joinpath('soc_params')
+    soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params")
+    soc_spec_df = file_io.load_soc_params(soc_params_dir)
+
+    generate_configs(args.config_dir, args.list_kernels, args.filter_metrics)
+
     if args.list_metrics in file_io.supported_arch.keys():
         print(
             tabulate(

diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py
@@ -321,6 +321,47 @@ def update_normUnit_string(equation, unit):
     ).capitalize()
 
 
+def gen_counter_list(formula):
+    function_filter = {
+        "MIN": None,
+        "MAX": None,
+        "AVG": None,
+        "ROUND": None,
+        "TO_INT": None,
+        "GB": None,
+        "STD": None,
+        "GFLOP": None,
+        "GOP": None,
+        "OP": None,
+        "CU": None,
+        "NC": None,
+        "UC": None,
+        "CC": None,
+        "RW": None,
+        "GIOP": None,
+    }
+
+    counters = []
+    if not isinstance(formula, str):
+        return counters
+    try:
+        tree = ast.parse(
+            formula.replace("$normUnit", "SQ_WAVES")
+            .replace("$denom", "SQ_WAVES")
+            .replace("$", "")
+        )
+        for node in ast.walk(tree):
+            if (
+                isinstance(node, ast.Name)
+                and node.id.rstrip("_sum").isupper()
+                and node.id not in function_filter
+            ):
+                counters.append(node.id.rstrip("_sum"))
+    except:
+        pass
+    return counters
+
+
 def build_dfs(archConfigs, filter_metrics):
     """
     - Build dataframe for each type of data source within each panel.
@@ -338,6 +379,7 @@ def build_dfs(archConfigs, filter_metrics):
     d = {}
     metric_list = {}
     dfs_type = {}
+    metric_counters = {}
     for panel_id, panel in archConfigs.panel_configs.items():
         for data_source in panel["data source"]:
             for type, data_cofig in data_source.items():
@@ -362,6 +404,7 @@ def build_dfs(archConfigs, filter_metrics):
                         )
                         metric_idx = data_source_idx + "." + str(i)
                         values = []
+                        eqn_content = []
 
                         if (
                             (not filter_metrics)
@@ -378,6 +421,7 @@ def build_dfs(archConfigs, filter_metrics):
                             for k, v in entries.items():
                                 if k != "tips" and k != "coll_level" and k != "alias":
                                     values.append(v)
+                                    eqn_content.append(v)
 
                             if "alias" in entries.keys():
                                 values.append(entries["alias"])
@@ -396,6 +440,15 @@ def build_dfs(archConfigs, filter_metrics):
 
                         # collect metric_list
                         metric_list[metric_idx] = key.replace(" ", "_")
+                        # generate mapping of counters and metrics
+                        filter = {}
+                        for formula in eqn_content:
+                            if formula is not None and formula != "None":
+                                for k in gen_counter_list(formula):
+                                    filter[k] = None
+                        if len(filter) > 0:
+                            metric_counters[key] = list(filter)
+
                         i += 1
 
                     df.set_index("Index", inplace=True)
@@ -431,6 +484,7 @@ def build_dfs(archConfigs, filter_metrics):
     setattr(archConfigs, "dfs", d)
     setattr(archConfigs, "metric_list", metric_list)
     setattr(archConfigs, "dfs_type", dfs_type)
+    setattr(archConfigs, "metric_counters", metric_counters)
 
 
 def build_metric_value_string(dfs, dfs_type, normal_unit):
@@ -469,7 +523,12 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug):
 
     # confirm no illogical counter values (only consider non-roofline runs)
     roof_only_run = sys_info.ip_blocks == "roofline"
-    if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any():
+    rocscope_run = sys_info.ip_blocks == "rocscope"
+    if (
+        not rocscope_run
+        and not roof_only_run
+        and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any()
+    ):
         print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.")
         sys.exit(1)
 
@@ -711,12 +770,13 @@ def load_kernel_top(workload, dir):
     workload.dfs.update(tmp)
 
 
-def load_table_data(workload, dir, is_gui, debug, verbose):
+def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False):
     """
     Load data for all "raw_csv_table".
     Calculate mertric value for all "metric_table".
     """
-    load_kernel_top(workload, dir)
+    if not skipKernelTop:
+        load_kernel_top(workload, dir)
 
     eval_metric(
         workload.dfs,

diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py
@@ -52,6 +52,9 @@ class ArchConfig:
     # [Index: Metric name] pairs
     metric_list: Dict[str, str] = field(default_factory=dict)
 
+    # [Metric name: Counters] pairs
+    metric_counters: Dict[str, list] = field(default_factory=dict)
+
 
 @dataclass
 class Workload:

diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py
@@ -127,11 +127,17 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None):
         "wgr": [col for col in df.columns if "wgr" in col],
         "lds": [col for col in df.columns if "lds" in col],
         "scr": [col for col in df.columns if "scr" in col],
-        "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col],
-        "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col],
         "spgr": [col for col in df.columns if "sgpr" in col],
     }
+    # Check for vgpr counter in ROCm < 5.3
+    if "vgpr" in df.columns:
+        duplicate_cols["vgpr"] = [col for col in df.columns if "vgpr" in col]
+    # Check for vgpr counter in ROCm >= 5.3
+    else:
+        duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col]
+        duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col]
     for key, cols in duplicate_cols.items():
+        print("Key is ", key)
         _df = df[cols]
         if not test_df_column_equality(_df):
             msg = (
@@ -339,6 +345,14 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc):
                             # initial counter in this channel
                             pmc_list["TCC2"][str(ch)] = [counter]
 
+    # add a timestamp file
+    fd = open(workload_perfmon_dir + "/timestamps.txt", "w")
+    fd.write("pmc:\n\n")
+    fd.write("gpu:\n")
+    fd.write("range:\n")
+    fd.write("kernel:\n")
+    fd.close()
+
     # sort the per channel counter, so that same counter in all channels can be aligned
     for ch in range(perfmon_config[soc]["TCC_channels"]):
         pmc_list["TCC2"][str(ch)].sort()