diff --git a/docker/rhel8/rocm.repo b/docker/rhel8/rocm.repo index 17171d755..8b2048978 100644 --- a/docker/rhel8/rocm.repo +++ b/docker/rhel8/rocm.repo @@ -7,7 +7,7 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key [amdgpu] name=amdgpu -baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.5/main/x86_64 +baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64 enabled=1 gpgcheck=1 gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key \ No newline at end of file diff --git a/src/omniperf b/src/omniperf index 3b38e419b..e611547d4 100755 --- a/src/omniperf +++ b/src/omniperf @@ -439,23 +439,7 @@ def characterize_app(args, VER): else: run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose) - - - # run again with timestamps - success, output = capture_subprocess_output( - [ - rocprof_cmd, - # "-i", fname, - # "-m", perfmon_dir + "/" + "metrics.xml", - "--timestamp", - "on", - "-o", - workload_dir + "/" + "timestamps.csv", - '"' + app_cmd + '"', - ] - ) - log.write(output) - # Update pmc_perf.csv timestamps + # Update timestamps replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output @@ -676,21 +660,7 @@ def omniperf_profile(args, VER): else: run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose) - # run again with timestamps - success, output = capture_subprocess_output( - [ - rocprof_cmd, - # "-i", fname, - # "-m", perfmon_dir + "/" + "metrics.xml", - "--timestamp", - "on", - "-o", - workload_dir + "/" + "timestamps.csv", - '"' + args.remaining + '"', - ] - ) - log.write(output) - # Update pmc_perf.csv timestamps + # Update timestamps replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output diff --git a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml index 74de040b2..986b2f0ae 100644 --- a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value diff --git a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml index 74de040b2..986b2f0ae 100644 --- a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml index f10d7630f..20721ee1f 100644 --- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index 58991e8b3..6415ed285 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -47,36 +47,50 @@ from omniperf_analyze.utils.gui_components.roofline import get_roofline -def initialize_run(args, normalization_filter=None): - import pandas as pd - from collections import OrderedDict +################################################ +# Helper Functions +################################################ +def generate_configs(config_dir, list_kernels, filter_metrics): from omniperf_analyze.utils import schema - from tabulate import tabulate - # Fixme: cur_root.parent.joinpath('soc_params') - soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") - soc_spec_df = file_io.load_soc_params(soc_params_dir) - - single_panel_config = file_io.is_single_panel_config(Path(args.config_dir)) + single_panel_config = file_io.is_single_panel_config(Path(config_dir)) global archConfigs archConfigs = {} for arch in file_io.supported_arch.keys(): ac = schema.ArchConfig() - if args.list_kernels: + if list_kernels: ac.panel_configs = file_io.top_stats_build_in_config else: arch_panel_config = ( - args.config_dir if single_panel_config else args.config_dir.joinpath(arch) + config_dir if single_panel_config else config_dir.joinpath(arch) ) ac.panel_configs = file_io.load_panel_configs(arch_panel_config) # TODO: filter_metrics should/might be one per arch # print(ac) - parser.build_dfs(ac, args.filter_metrics) + parser.build_dfs(ac, filter_metrics) archConfigs[arch] = ac + return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin + + +################################################ +# Core Functions +################################################ +def initialize_run(args, normalization_filter=None): + import pandas as pd + from collections import OrderedDict + from tabulate import tabulate + from omniperf_analyze.utils import schema + + # Fixme: cur_root.parent.joinpath('soc_params') + soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") + soc_spec_df = file_io.load_soc_params(soc_params_dir) + + generate_configs(args.config_dir, args.list_kernels, args.filter_metrics) + if args.list_metrics in file_io.supported_arch.keys(): print( tabulate( diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index d11cbbbfc..b6573566b 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -321,6 +321,47 @@ def update_normUnit_string(equation, unit): ).capitalize() +def gen_counter_list(formula): + function_filter = { + "MIN": None, + "MAX": None, + "AVG": None, + "ROUND": None, + "TO_INT": None, + "GB": None, + "STD": None, + "GFLOP": None, + "GOP": None, + "OP": None, + "CU": None, + "NC": None, + "UC": None, + "CC": None, + "RW": None, + "GIOP": None, + } + + counters = [] + if not isinstance(formula, str): + return counters + try: + tree = ast.parse( + formula.replace("$normUnit", "SQ_WAVES") + .replace("$denom", "SQ_WAVES") + .replace("$", "") + ) + for node in ast.walk(tree): + if ( + isinstance(node, ast.Name) + and node.id.rstrip("_sum").isupper() + and node.id not in function_filter + ): + counters.append(node.id.rstrip("_sum")) + except: + pass + return counters + + def build_dfs(archConfigs, filter_metrics): """ - Build dataframe for each type of data source within each panel. @@ -338,6 +379,7 @@ def build_dfs(archConfigs, filter_metrics): d = {} metric_list = {} dfs_type = {} + metric_counters = {} for panel_id, panel in archConfigs.panel_configs.items(): for data_source in panel["data source"]: for type, data_cofig in data_source.items(): @@ -362,6 +404,7 @@ def build_dfs(archConfigs, filter_metrics): ) metric_idx = data_source_idx + "." + str(i) values = [] + eqn_content = [] if ( (not filter_metrics) @@ -378,6 +421,7 @@ def build_dfs(archConfigs, filter_metrics): for k, v in entries.items(): if k != "tips" and k != "coll_level" and k != "alias": values.append(v) + eqn_content.append(v) if "alias" in entries.keys(): values.append(entries["alias"]) @@ -396,6 +440,15 @@ def build_dfs(archConfigs, filter_metrics): # collect metric_list metric_list[metric_idx] = key.replace(" ", "_") + # generate mapping of counters and metrics + filter = {} + for formula in eqn_content: + if formula is not None and formula != "None": + for k in gen_counter_list(formula): + filter[k] = None + if len(filter) > 0: + metric_counters[key] = list(filter) + i += 1 df.set_index("Index", inplace=True) @@ -431,6 +484,7 @@ def build_dfs(archConfigs, filter_metrics): setattr(archConfigs, "dfs", d) setattr(archConfigs, "metric_list", metric_list) setattr(archConfigs, "dfs_type", dfs_type) + setattr(archConfigs, "metric_counters", metric_counters) def build_metric_value_string(dfs, dfs_type, normal_unit): @@ -469,7 +523,12 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): # confirm no illogical counter values (only consider non-roofline runs) roof_only_run = sys_info.ip_blocks == "roofline" - if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + rocscope_run = sys_info.ip_blocks == "rocscope" + if ( + not rocscope_run + and not roof_only_run + and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any() + ): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) @@ -711,12 +770,13 @@ def load_kernel_top(workload, dir): workload.dfs.update(tmp) -def load_table_data(workload, dir, is_gui, debug, verbose): +def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False): """ Load data for all "raw_csv_table". Calculate mertric value for all "metric_table". """ - load_kernel_top(workload, dir) + if not skipKernelTop: + load_kernel_top(workload, dir) eval_metric( workload.dfs, diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py index bcfc0bff5..f9b59868f 100644 --- a/src/omniperf_analyze/utils/schema.py +++ b/src/omniperf_analyze/utils/schema.py @@ -52,6 +52,9 @@ class ArchConfig: # [Index: Metric name] pairs metric_list: Dict[str, str] = field(default_factory=dict) + # [Metric name: Counters] pairs + metric_counters: Dict[str, list] = field(default_factory=dict) + @dataclass class Workload: diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index e965b784c..59460bc80 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -127,11 +127,17 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): "wgr": [col for col in df.columns if "wgr" in col], "lds": [col for col in df.columns if "lds" in col], "scr": [col for col in df.columns if "scr" in col], - "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], - "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col], "spgr": [col for col in df.columns if "sgpr" in col], } + # Check for vgpr counter in ROCm < 5.3 + if "vgpr" in df.columns: + duplicate_cols["vgpr"] = [col for col in df.columns if "vgpr" in col] + # Check for vgpr counter in ROCm >= 5.3 + else: + duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] + duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): + print("Key is ", key) _df = df[cols] if not test_df_column_equality(_df): msg = ( @@ -339,6 +345,14 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter] + # add a timestamp file + fd = open(workload_perfmon_dir + "/timestamps.txt", "w") + fd.write("pmc:\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + # sort the per channel counter, so that same counter in all channels can be aligned for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)].sort()