From acb972954091d14bf91797383ebf0017810a4168 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:00:56 -0500 Subject: [PATCH 01/81] Fix VGPR issue (#139) Signed-off-by: colramos-amd --- src/utils/perfagg.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index e965b784c..651bcb86d 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -127,11 +127,17 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): "wgr": [col for col in df.columns if "wgr" in col], "lds": [col for col in df.columns if "lds" in col], "scr": [col for col in df.columns if "scr" in col], - "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], - "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col], "spgr": [col for col in df.columns if "sgpr" in col], } + # Check for vgpr counter in ROCm < 5.3 + if "vgpr" in df.columns: + duplicate_cols["vgpr"] = [col for col in df.columns if "vgpr" in col] + # Check for vgpr counter in ROCm >= 5.3 + else: + duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] + duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): + print("Key is ", key) _df = df[cols] if not test_df_column_equality(_df): msg = ( From 5f6c776170f01bd62c4eac16a0ec4257583c32c3 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:01:37 -0500 Subject: [PATCH 02/81] Omniperf rocomni changes Signed-off-by: colramos-amd --- src/omniperf_analyze/omniperf_analyze.py | 38 +++++++++++++++------- src/omniperf_analyze/utils/parser.py | 41 ++++++++++++++++++++++-- src/omniperf_analyze/utils/schema.py | 2 ++ 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index 58991e8b3..c15181c6f 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -47,36 +47,50 @@ from omniperf_analyze.utils.gui_components.roofline import get_roofline -def initialize_run(args, normalization_filter=None): - import pandas as pd - from collections import OrderedDict +################################################ +# Helper Functions +################################################ +def generate_configs(config_dir, list_kernels, filter_metrics): from omniperf_analyze.utils import schema - from tabulate import tabulate - # Fixme: cur_root.parent.joinpath('soc_params') - soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") - soc_spec_df = file_io.load_soc_params(soc_params_dir) - - single_panel_config = file_io.is_single_panel_config(Path(args.config_dir)) + single_panel_config = file_io.is_single_panel_config(Path(config_dir)) global archConfigs archConfigs = {} for arch in file_io.supported_arch.keys(): ac = schema.ArchConfig() - if args.list_kernels: + if list_kernels: ac.panel_configs = file_io.top_stats_build_in_config else: arch_panel_config = ( - args.config_dir if single_panel_config else args.config_dir.joinpath(arch) + config_dir if single_panel_config else config_dir.joinpath(arch) ) ac.panel_configs = file_io.load_panel_configs(arch_panel_config) # TODO: filter_metrics should/might be one per arch # print(ac) - parser.build_dfs(ac, args.filter_metrics) + parser.build_dfs(ac, filter_metrics) archConfigs[arch] = ac + return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin + + +################################################ +# Core Functions +################################################ +def initialize_run(args, normalization_filter=None): + import pandas as pd + from collections import OrderedDict + from tabulate import tabulate + from omniperf_analyze.utils import schema + + # Fixme: cur_root.parent.joinpath('soc_params') + soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") + soc_spec_df = file_io.load_soc_params(soc_params_dir) + + generate_configs(args.config_dir, args.list_kernels, args.filter_metrics) + if args.list_metrics in file_io.supported_arch.keys(): print( tabulate( diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index d11cbbbfc..5fb03c39a 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -320,6 +320,26 @@ def update_normUnit_string(equation, unit): str(equation), ).capitalize() +def gen_counter_list(formula): + function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None} + + counters = [] + if not isinstance(formula,str): + return counters + try: + tree = ast.parse( + formula + .replace("$normUnit", "SQ_WAVES") + .replace("$denom", "SQ_WAVES") + .replace("$","") + ) + for node in ast.walk(tree): + if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter: + counters.append(node.id.rstrip("_sum")) + except: + pass + return counters + def build_dfs(archConfigs, filter_metrics): """ @@ -338,6 +358,7 @@ def build_dfs(archConfigs, filter_metrics): d = {} metric_list = {} dfs_type = {} + metric_counters = {} for panel_id, panel in archConfigs.panel_configs.items(): for data_source in panel["data source"]: for type, data_cofig in data_source.items(): @@ -362,6 +383,7 @@ def build_dfs(archConfigs, filter_metrics): ) metric_idx = data_source_idx + "." + str(i) values = [] + eqn_content = [] if ( (not filter_metrics) @@ -378,6 +400,7 @@ def build_dfs(archConfigs, filter_metrics): for k, v in entries.items(): if k != "tips" and k != "coll_level" and k != "alias": values.append(v) + eqn_content.append(v) if "alias" in entries.keys(): values.append(entries["alias"]) @@ -396,6 +419,15 @@ def build_dfs(archConfigs, filter_metrics): # collect metric_list metric_list[metric_idx] = key.replace(" ", "_") + # generate mapping of counters and metrics + filter = {} + for formula in eqn_content: + if formula is not None and formula != "None": + for k in gen_counter_list(formula): + filter[k] = None + if len(filter) > 0: + metric_counters[key] = list(filter) + i += 1 df.set_index("Index", inplace=True) @@ -431,6 +463,7 @@ def build_dfs(archConfigs, filter_metrics): setattr(archConfigs, "dfs", d) setattr(archConfigs, "metric_list", metric_list) setattr(archConfigs, "dfs_type", dfs_type) + setattr(archConfigs, "metric_counters", metric_counters) def build_metric_value_string(dfs, dfs_type, normal_unit): @@ -469,7 +502,8 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): # confirm no illogical counter values (only consider non-roofline runs) roof_only_run = sys_info.ip_blocks == "roofline" - if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + rocscope_run = sys_info.ip_blocks == "rocscope" + if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) @@ -711,12 +745,13 @@ def load_kernel_top(workload, dir): workload.dfs.update(tmp) -def load_table_data(workload, dir, is_gui, debug, verbose): +def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False): """ Load data for all "raw_csv_table". Calculate mertric value for all "metric_table". """ - load_kernel_top(workload, dir) + if not skipKernelTop: + load_kernel_top(workload, dir) eval_metric( workload.dfs, diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py index bcfc0bff5..6e147fcae 100644 --- a/src/omniperf_analyze/utils/schema.py +++ b/src/omniperf_analyze/utils/schema.py @@ -52,6 +52,8 @@ class ArchConfig: # [Index: Metric name] pairs metric_list: Dict[str, str] = field(default_factory=dict) + # [Metric name: Counters] pairs + metric_counters: Dict[str, list] = field(default_factory=dict) @dataclass class Workload: From 79eecb445e4cc4fdc02bdf20fe638bb9c10f755d Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:04:32 -0500 Subject: [PATCH 03/81] Comply to Python formatting Signed-off-by: colramos-amd --- src/omniperf_analyze/omniperf_analyze.py | 2 +- src/omniperf_analyze/utils/parser.py | 41 +++++++++++++++++++----- src/omniperf_analyze/utils/schema.py | 1 + src/utils/perfagg.py | 2 +- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index c15181c6f..6415ed285 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -73,7 +73,7 @@ def generate_configs(config_dir, list_kernels, filter_metrics): archConfigs[arch] = ac - return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin + return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin ################################################ diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index 5fb03c39a..b6573566b 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -320,26 +320,47 @@ def update_normUnit_string(equation, unit): str(equation), ).capitalize() + def gen_counter_list(formula): - function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None} + function_filter = { + "MIN": None, + "MAX": None, + "AVG": None, + "ROUND": None, + "TO_INT": None, + "GB": None, + "STD": None, + "GFLOP": None, + "GOP": None, + "OP": None, + "CU": None, + "NC": None, + "UC": None, + "CC": None, + "RW": None, + "GIOP": None, + } counters = [] - if not isinstance(formula,str): + if not isinstance(formula, str): return counters try: tree = ast.parse( - formula - .replace("$normUnit", "SQ_WAVES") + formula.replace("$normUnit", "SQ_WAVES") .replace("$denom", "SQ_WAVES") - .replace("$","") + .replace("$", "") ) for node in ast.walk(tree): - if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter: + if ( + isinstance(node, ast.Name) + and node.id.rstrip("_sum").isupper() + and node.id not in function_filter + ): counters.append(node.id.rstrip("_sum")) except: pass return counters - + def build_dfs(archConfigs, filter_metrics): """ @@ -503,7 +524,11 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): # confirm no illogical counter values (only consider non-roofline runs) roof_only_run = sys_info.ip_blocks == "roofline" rocscope_run = sys_info.ip_blocks == "rocscope" - if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + if ( + not rocscope_run + and not roof_only_run + and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any() + ): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py index 6e147fcae..f9b59868f 100644 --- a/src/omniperf_analyze/utils/schema.py +++ b/src/omniperf_analyze/utils/schema.py @@ -55,6 +55,7 @@ class ArchConfig: # [Metric name: Counters] pairs metric_counters: Dict[str, list] = field(default_factory=dict) + @dataclass class Workload: sys_info: pd.DataFrame = None diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 651bcb86d..109fdecda 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -135,7 +135,7 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): # Check for vgpr counter in ROCm >= 5.3 else: duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] - duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] + duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): print("Key is ", key) _df = df[cols] From 049ba12f6994cbf617e69980d1a2f5b897e306a7 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Wed, 21 Jun 2023 11:06:03 -0500 Subject: [PATCH 04/81] Add subsection title to System Speed-of-Light Signed-off-by: colramos-amd --- .../configs/gfx906/0200_system-speed-of-light.yaml | 1 + .../configs/gfx908/0200_system-speed-of-light.yaml | 1 + .../configs/gfx90a/0200_system-speed-of-light.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml index 74de040b2..986b2f0ae 100644 --- a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value diff --git a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml index 74de040b2..986b2f0ae 100644 --- a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml index f10d7630f..20721ee1f 100644 --- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml @@ -11,6 +11,7 @@ Panel Config: data source: - metric_table: id: 201 + title: Speed-of-Light header: metric: Metric value: Value From a89cb96b69ca9969bdb182d6d21f214494ee1a98 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 26 Jun 2023 15:30:38 -0500 Subject: [PATCH 05/81] Extend filtering into timestamps.csv (#80) Signed-off-by: coleramos425 --- src/omniperf | 34 ++-------------------------------- src/utils/perfagg.py | 9 +++++++++ 2 files changed, 11 insertions(+), 32 deletions(-) diff --git a/src/omniperf b/src/omniperf index 3b38e419b..e611547d4 100755 --- a/src/omniperf +++ b/src/omniperf @@ -439,23 +439,7 @@ def characterize_app(args, VER): else: run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose) - - - # run again with timestamps - success, output = capture_subprocess_output( - [ - rocprof_cmd, - # "-i", fname, - # "-m", perfmon_dir + "/" + "metrics.xml", - "--timestamp", - "on", - "-o", - workload_dir + "/" + "timestamps.csv", - '"' + app_cmd + '"', - ] - ) - log.write(output) - # Update pmc_perf.csv timestamps + # Update timestamps replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output @@ -676,21 +660,7 @@ def omniperf_profile(args, VER): else: run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose) - # run again with timestamps - success, output = capture_subprocess_output( - [ - rocprof_cmd, - # "-i", fname, - # "-m", perfmon_dir + "/" + "metrics.xml", - "--timestamp", - "on", - "-o", - workload_dir + "/" + "timestamps.csv", - '"' + args.remaining + '"', - ] - ) - log.write(output) - # Update pmc_perf.csv timestamps + # Update timestamps replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 109fdecda..1c80a22a9 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -345,6 +345,15 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter] + + # add a timestamp file + fd = open(workload_perfmon_dir + "/timestamps.txt", "w") + fd.write("pmc:\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + # sort the per channel counter, so that same counter in all channels can be aligned for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)].sort() From f91de7d2f7478ac143b77914ad6560c5a5816f23 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 26 Jun 2023 15:38:51 -0500 Subject: [PATCH 06/81] Comply to Python formatting Signed-off-by: coleramos425 --- src/utils/perfagg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 1c80a22a9..59460bc80 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -345,7 +345,6 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter] - # add a timestamp file fd = open(workload_perfmon_dir + "/timestamps.txt", "w") fd.write("pmc:\n\n") @@ -353,7 +352,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): fd.write("range:\n") fd.write("kernel:\n") fd.close() - + # sort the per channel counter, so that same counter in all channels can be aligned for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)].sort() From 54bc0580850095a91a60c9115934f5e747774426 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Tue, 6 Jun 2023 11:01:37 -0400 Subject: [PATCH 07/81] Incorporate review comments Signed-off-by: Nicholas Curtis --- src/docs/analysis.md | 16 ++++++++-------- src/docs/conf.py | 4 ++++ src/docs/getting_started.md | 20 ++++++++++++-------- src/docs/high_level_design.md | 4 ++-- src/docs/installation.md | 14 +++++++++----- src/docs/introduction.md | 9 ++++----- src/docs/profiling.md | 15 +++++++++------ src/parser.py | 2 +- 8 files changed, 49 insertions(+), 35 deletions(-) diff --git a/src/docs/analysis.md b/src/docs/analysis.md index 2321ddabd..9feff1f64 100644 --- a/src/docs/analysis.md +++ b/src/docs/analysis.md @@ -26,7 +26,7 @@ Run `omniperf analyze -h` for more details. ### Recommended workflow 1) Do a comprehensive analysis with Omniperf CLI at the beginning. -```shell +```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ -------- @@ -108,7 +108,7 @@ Analyze .... ``` 2. Use `--list-metrics` to generate a list of availible metrics for inspection - ```shell + ```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a ╒═════════╤═════════════════════════════╕ │ │ Metric │ @@ -172,7 +172,7 @@ $ omniperf analyze -p workloads/vcopy/mi200/ --list-metrics gfx90a ... ``` 2. Choose your own customized subset of metrics with `-b` (a.k.a. `--metric`), or build your own config following [config_template](https://github.com/AMDResearch/omniperf/blob/main/src/omniperf_analyze/configs/panel_config_template.yaml). Below we'll inspect block 2 (a.k.a. System Speed-of-Light). -```shell +```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ -b 2 -------- Analyze @@ -286,7 +286,7 @@ Analyze - Filter kernels First, list the top kernels in your application using `--list-kernels`. - ```shell + ```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ --list-kernels -------- @@ -306,7 +306,7 @@ Analyze Second, select the index of the kernel you'd like to filter (i.e. __vecCopy(double*, double*, double*, int, int) [clone .kd]__ at index __0__). Then, use this index to apply the filter via `-k/--kernels`. - ```shell + ```shell-session $ omniperf -p workloads/vcopy/mi200/ -k 0 -------- @@ -372,7 +372,7 @@ See [FAQ](https://amdresearch.github.io/omniperf/faq.html) for more details on S To launch the standalone GUI, include the `--gui` flag with your desired analysis command. For example: -```bash +```shell-session $ omniperf analyze -p workloads/vcopy/mi200/ --gui -------- @@ -499,7 +499,7 @@ e.g., omniperf_asw_vcopy_mi200. Below is the sample command to import the *vcopy* profiling data. -```shell +```shell-session $ omniperf database --help ROC Profiler: /usr/bin/rocprof @@ -544,7 +544,7 @@ Connection Options: ``` **omniperf import for vcopy:** -```shell +```shell-session $ omniperf database --import -H pavii1 -u temp -t asw -w workloads/vcopy/mi200/ ROC Profiler: /usr/bin/rocprof diff --git a/src/docs/conf.py b/src/docs/conf.py index 48d4c5596..b659553f9 100644 --- a/src/docs/conf.py +++ b/src/docs/conf.py @@ -53,6 +53,10 @@ def install(package): ] myst_heading_anchors = 2 +# enable replacement of (tm) & friends +myst_enable_extensions = [ + "replacements" +] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/src/docs/getting_started.md b/src/docs/getting_started.md index c75bda9ab..80ae888f0 100644 --- a/src/docs/getting_started.md +++ b/src/docs/getting_started.md @@ -10,24 +10,28 @@ 1. **Launch & Profile the target application with the command line profiler** - The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or ipblock’s. + The command line profiler launches the target application, calls the rocProfiler API, and collects profile results for the specified kernels, dispatches, and/or IP blocks. If not specified, Omniperf will default to collecting all available counters for all kernels/dispatches launched by the user's executable. - To collect the default set of data for all kernels in the target application, launch: + To collect the default set of data for all kernels in the target application, launch, e.g.: ```shell - $ omniperf profile -n vcopy -- ./vcopy 1048576 256 + $ omniperf profile -n vcopy_data -- ./vcopy 1048576 256 ``` - The app runs, each kernel is launched, and profiling results are generated. By default, results are written to ./workloads/\. To collect all requested profile information, it may be required to replay kernels multiple times. + The app runs, each kernel is launched, and profiling results are generated. By default, results are written to (e.g.,) ./workloads/vcopy_data (configurable via the `-n` argument). To collect all requested profile information, it may be required to replay kernels multiple times. 2. **Customize data collection** - Options are available to specify for which kernels data should be collected. - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID. `-b`/`--ipblocks` enables profiling on one or more IP Block(s). + Options are available to specify for which kernels/metrics data should be collected. + Note that filtering can be applied either in the profiling or analysis stage, however filtering at during profiling collection will often speed up your overall profiling run time. - To view available metrics by IP Block you can always use `--list-metrics` to view a list of all available metrics organized by IP Block. + Some common filters include: + + - `-k`/`--kernel` enables filtering kernels by name. `-d`/`--dispatch` enables filtering based on dispatch ID + - `-b`/`--ipblocks` enables collects metrics for only the specified (one or more) IP Blocks. + + To view available metrics by IP Block you can use the `--list-metrics` argument to view a list of all available metrics organized by IP Block. ```shell $ omniperf analyze --list-metrics ``` - Note that filtering can also be applied after the fact, at the analysis stage, however filtering at the profiling level will often speed up your overall profiling run time. 3. **Analyze at the command line** diff --git a/src/docs/high_level_design.md b/src/docs/high_level_design.md index 28c09ff12..6168b7ace 100644 --- a/src/docs/high_level_design.md +++ b/src/docs/high_level_design.md @@ -8,10 +8,10 @@ The [Omniperf](https://github.com/AMDResearch/omniperf) Tool is architecturally composed of three major components, as shown in the following figure. -- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. +- **Omniperf Profiling**: Acquire raw performance counters via application replay based on the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler). The counters are stored in a comma-seperated value, for further analyis. A set of MI200 specific micro benchmarks are also run to acquire the hierarchical roofline data. The roofline model is not available on earlier accelerators. - **Omniperf Grafana Analyzer**: - - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. + - *Grafana database import*: All raw performance counters are imported into the backend MongoDB database for Grafana GUI analysis and visualization. Compatibility of previously generated data between Omniperf versions is not necessarily guarenteed. - *Grafana GUI Analyzer*: A Grafana dashboard is designed to retrieve the raw counters info from the backend database. It also creates the relevant performance metrics and visualization. - **Omniperf Standalone GUI Analyzer**: A standalone GUI is provided to enable performance analysis without importing data into the backend database. diff --git a/src/docs/installation.md b/src/docs/installation.md index ee2489710..e550669b7 100644 --- a/src/docs/installation.md +++ b/src/docs/installation.md @@ -109,7 +109,7 @@ ROC Profiler: /opt/rocm-5.1.0/bin/rocprof omniperf (v{__VERSION__}) ``` -```{tip} Sites relying on an Lmod Python module locally may wish to +```{tip} Users relying on an Lmod Python module locally may wish to customize the resulting Omniperf modulefile post-installation to include additional module dependencies. ``` @@ -129,8 +129,9 @@ export PYTHONPATH=$INSTALL_DIR/python-libs Omniperf relies on a rocprof binary during the profiling process. Normally the path to this binary will be detected -automatically, but it can also be overridden via the use of an -optional `ROCPROF` environment variable. +automatically, but it can also be overridden via the setting the +optional `ROCPROF` environment variable to the path of the binary the user +wishes to use instead. @@ -162,9 +163,12 @@ Omniperf uses [mongoimport](https://www.mongodb.com/docs/database-tools/mongoimp $ wget https://fastdl.mongodb.org/tools/db/mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb $ sudo apt install ./mongodb-database-tools-ubuntu2004-x86_64-100.6.1.deb ``` -> Find install for alternative distros [here](https://www.mongodb.com/download-center/database-tools/releases/archive) +> Installation instructions for alternative distributions can be found [here](https://www.mongodb.com/download-center/database-tools/releases/archive) + +### Persistent Storage + +The user may also choose to bind MongoDB to a directory on the host OS to create a local backup in case of a crash or reset: -### Persist Storage ```bash $ sudo mkdir -p /usr/local/persist && cd /usr/local/persist/ $ sudo mkdir -p grafana-storage mongodb diff --git a/src/docs/introduction.md b/src/docs/introduction.md index 436146db8..6b39d4088 100644 --- a/src/docs/introduction.md +++ b/src/docs/introduction.md @@ -10,17 +10,17 @@ ## Scope -MI Performance Profiler ([Omniperf](https://github.com/AMDResearch/omniperf)) is a system performance profiling tool for Machine Learning/HPC workloads running on AMD MI GPUs. It is currently built on top of the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler) to monitor hardware performance counters. The Omniperf tool primarily targets MI100 and MI200 silicon. Development is in progress to support MI300 and NAVI GPUs. +MI Performance Profiler ([Omniperf](https://github.com/AMDResearch/omniperf)) is a system performance profiling tool for Machine Learning/HPC workloads running on AMD Instinct (tm) Accelerators. It is currently built on top of the [ROC Profiler](https://github.com/ROCm-Developer-Tools/rocprofiler) to monitor hardware performance counters. The Omniperf tool primarily targets accelerators in the MI100 and MI200 families. Development is in progress to support MI300 and Radeon (tm) RDNA (tm) GPUs. ## Features -The Omniperf tool performs system profiling based on all approved hardware counters for MI200. It provides high level performance analysis features including System Speed-of-Light, IP block Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... +The Omniperf tool performs system profiling based on all available hardware counters for the target accelerator. It provides high level performance analysis features including System Speed-of-Light, IP block Speed-of-Light, Memory Chart Analysis, Roofline Analysis, Baseline Comparisons, and more... Both command line analysis and GUI analysis are supported. Detailed Feature List: -- MI200 support - MI100 support +- MI200 support - Standalone GUI Analyzer - Grafana/MongoDB GUI Analyzer - Dispatch Filtering @@ -50,8 +50,7 @@ Detailed Feature List: | Platform | Status | | :------- | :------------- | -| Vega 20 | No | -| MI50 | No | +| Vega 20 (MI-50/60) | No | | MI100 | Supported | | MI200 | Supported | | MI300 | In development | diff --git a/src/docs/profiling.md b/src/docs/profiling.md index 6776097c9..1a9547775 100644 --- a/src/docs/profiling.md +++ b/src/docs/profiling.md @@ -19,7 +19,7 @@ the MI200 platform. ## Workload Compilation **vcopy compilation:** -```shell +```shell-session $ hipcc vcopy.cpp -o vcopy $ ls vcopy vcopy.cpp @@ -40,7 +40,7 @@ Releasing CPU memory The *omniperf* script, availible through the [Omniperf](https://github.com/AMDResearch/omniperf) repository, is used to aquire all necessary perfmon data through analysis of compute workloads. **omniperf help:** -```shell +```shell-session $ omniperf profile --help ROC Profiler: /usr/bin/rocprof @@ -56,7 +56,7 @@ Examples: omniperf profile -n vcopy_all -- ./vcopy 1048576 256 - omniperf profile -n vcopy_SPI_TD -b SQ TCC -- ./vcopy 1048576 256 + omniperf profile -n vcopy_SPI_TCC -b SQ TCC -- ./vcopy 1048576 256 omniperf profile -n vcopy_kernel -k vecCopy -- ./vcopy 1048576 256 @@ -111,7 +111,7 @@ Standalone Roofline Options: The following sample command profiles the *vcopy* workload. **vcopy profiling:** -```shell +```shell-session $ omniperf profile --name vcopy -- ./vcopy 1048576 256 Resolving rocprof ROC Profiler: /usr/bin/rocprof @@ -206,7 +206,10 @@ Peak MFMA IOPs (I8), GPU ID: 1, workgroupSize:256, workgroups:16384, experiments ``` You'll notice two stages in *default* Omniperf profiling. The first stage collects all the counters needed for Omniperf analysis (omitting any filters you've provided). The second stage collects data for the roofline analysis (this stage can be disabled using `--no-roof`) -At the end of the profiling, all resulting csv files should be located in the SOC specific target directory, e.g., mi200. +At the end of the profiling, all resulting csv files should be located in a SOC specific target directory, e.g.: + - "mi200" for the AMD Instinct (tm) MI-200 family of accelerators + - "mi100" for the AMD Instinct (tm) MI-100 family of accelerators +etc. The SOC names are generated as a part of Omniperf, and do not necessarily distinguish between different accelerators in the same family (e.g., an AMD Instinct (tm) MI-210 vs an MI-250) > Note: Additionally, you'll notice a few extra files. An SoC parameters file, *sysinfo.csv*, is created to reflect the target device settings. All profiling output is stored in *log.txt*. Roofline specific benchmark results are stored in *roofline.csv*. @@ -316,7 +319,7 @@ ROCProfiler: input from "/tmp/rpl_data_230411_170300_29696/input0.xml" #### Dispatch Filtering The following example demonstrates profiling on selected dispatches: -```shell +```shell-session $ omniperf profile --name vcopy -d 0 -- ./vcopy 1048576 256 Resolving rocprof ROC Profiler: /usr/bin/rocprof diff --git a/src/parser.py b/src/parser.py index da018ba94..9d6dd8f6f 100644 --- a/src/parser.py +++ b/src/parser.py @@ -66,7 +66,7 @@ def parse(my_parser): \n\n------------------------------------------------------------------------------- \nExamples: \n\tomniperf profile -n vcopy_all -- ./vcopy 1048576 256 - \n\tomniperf profile -n vcopy_SPI_TD -b SQ TCC -- ./vcopy 1048576 256 + \n\tomniperf profile -n vcopy_SPI_TCC -b SQ TCC -- ./vcopy 1048576 256 \n\tomniperf profile -n vcopy_kernel -k vecCopy -- ./vcopy 1048576 256 \n\tomniperf profile -n vcopy_disp -d 0 -- ./vcopy 1048576 256 \n\tomniperf profile -n vcopy_roof --roof-only -- ./vcopy 1048576 256 From aaed37d00417014e35bb2dc97cc95a2aa89b6bf9 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Tue, 6 Jun 2023 12:03:55 -0400 Subject: [PATCH 08/81] fix formatting Signed-off-by: Nicholas Curtis --- src/docs/conf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/docs/conf.py b/src/docs/conf.py index b659553f9..014ae7752 100644 --- a/src/docs/conf.py +++ b/src/docs/conf.py @@ -54,9 +54,7 @@ def install(package): myst_heading_anchors = 2 # enable replacement of (tm) & friends -myst_enable_extensions = [ - "replacements" -] +myst_enable_extensions = ["replacements"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From 8857393571a97f2550ce012098893c3be4751de5 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Tue, 6 Jun 2023 12:25:40 -0400 Subject: [PATCH 09/81] fix missing Signed-off-by: Nicholas Curtis --- src/docs/profiling.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/docs/profiling.md b/src/docs/profiling.md index 1a9547775..b0b56dc91 100644 --- a/src/docs/profiling.md +++ b/src/docs/profiling.md @@ -370,7 +370,7 @@ Standalone Roofline Options: #### Roofline Only The following example demonstrates profiling roofline data only: -```shell +```shell-session $ omniperf profile --name vcopy --roof-only -- ./vcopy 1048576 256 Resolving rocprof ROC Profiler: /usr/bin/rocprof @@ -394,7 +394,8 @@ Checking for pmc_perf.csv in /home/colramos/GitHub/omniperf-pub/workloads/mix/m Empirical Roofline PDFs saved! ``` An inspection of our workload output folder shows .pdf plots were generated successfully -```shell +```shell-session +$ ls workloads/vcopy/mi200/ total 176 drwxrwxr-x 3 colramos colramos 4096 Apr 11 17:18 . drwxrwxr-x 3 colramos colramos 4096 Apr 11 17:15 .. @@ -409,4 +410,4 @@ drwxrwxr-x 2 colramos colramos 4096 Apr 11 17:16 perfmon ``` A sample *empirRoof_gpu-ALL_fp32.pdf* looks something like this: -![Sample Standalone Roof Plot](images/sample-roof-plot.png) +![Sample Standalone Roof Plot](images/sample-roof-plot.png) \ No newline at end of file From 60d4a425366ddf96fb14554564e6d8412d5e8e3c Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Wed, 7 Jun 2023 10:23:49 -0400 Subject: [PATCH 10/81] Add options to enable latexpdf builds Signed-off-by: Nicholas Curtis --- src/docs/conf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/docs/conf.py b/src/docs/conf.py index 014ae7752..d97f79bb7 100644 --- a/src/docs/conf.py +++ b/src/docs/conf.py @@ -89,6 +89,10 @@ def install(package): # The name of the Pygments (syntax highlighting) style to use. pygments_style = None +# options for latex output +latex_engine = 'lualatex' +latex_show_urls = 'footnote' + # -- Options for HTML output ------------------------------------------------- From be1eeee370cbbccb4c5667d2f359f9fd125431b6 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Wed, 7 Jun 2023 15:19:56 -0400 Subject: [PATCH 11/81] apply formatting Signed-off-by: Nicholas Curtis --- src/docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/docs/conf.py b/src/docs/conf.py index d97f79bb7..af0003fb7 100644 --- a/src/docs/conf.py +++ b/src/docs/conf.py @@ -90,8 +90,8 @@ def install(package): pygments_style = None # options for latex output -latex_engine = 'lualatex' -latex_show_urls = 'footnote' +latex_engine = "lualatex" +latex_show_urls = "footnote" # -- Options for HTML output ------------------------------------------------- From 8edba713fbbf1294b412d2eb603f1af082839ba6 Mon Sep 17 00:00:00 2001 From: "Karl W. Schulz" Date: Fri, 30 Jun 2023 15:01:57 -0500 Subject: [PATCH 12/81] updating path for rocm repo to supported rhel8 release (8.8) Signed-off-by: Karl W. Schulz --- docker/rhel8/rocm.repo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/rhel8/rocm.repo b/docker/rhel8/rocm.repo index 17171d755..8b2048978 100644 --- a/docker/rhel8/rocm.repo +++ b/docker/rhel8/rocm.repo @@ -7,7 +7,7 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key [amdgpu] name=amdgpu -baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.5/main/x86_64 +baseurl=https://repo.radeon.com/amdgpu/latest/rhel/8.8/main/x86_64 enabled=1 gpgcheck=1 gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key \ No newline at end of file From 5d84d0bb63c78c386761a91547611281f0e29138 Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 10 Jul 2023 16:26:56 -0500 Subject: [PATCH 13/81] Fixed Units inconsistencies - Table 10: Units were output as "$normUnit" now they are instr + normUnit - Table 16: Changed to Req per $normUnit Signed-off-by: JoseSantosAMD --- .../gfx906/1000_compute-unit-instruction-mix.yaml | 4 ++-- .../configs/gfx906/1600_L1_cache.yaml | 8 ++++---- .../configs/gfx906/1800_L2_cache_per_channel.yaml | 14 +++++++------- .../configs/gfx908/1600_L1_cache.yaml | 8 ++++---- .../configs/gfx908/1800_L2_cache_per_channel.yaml | 14 +++++++------- .../configs/gfx90a/1600_L1_cache.yaml | 8 ++++---- .../configs/gfx90a/1800_L2_cache_per_channel.yaml | 14 +++++++------- 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml index b72344f3b..fd4653c23 100644 --- a/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml +++ b/src/omniperf_analyze/configs/gfx906/1000_compute-unit-instruction-mix.yaml @@ -27,7 +27,7 @@ Panel Config: tips: LDS: count: AVG((SQ_INSTS_LDS / $denom)) - unit: $normUnit + unit: (instr + $normUnit) tips: VALU - MFMA: count: None # No HW module @@ -61,7 +61,7 @@ Panel Config: metric: INT-32: count: None # No perf counter - unit: $normUnit + unit: (instr + $normUnit) tips: INT-64: count: None # No perf counter diff --git a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml index 1713068d2..1e05b3e4c 100644 --- a/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml +++ b/src/omniperf_analyze/configs/gfx906/1600_L1_cache.yaml @@ -361,7 +361,7 @@ Panel Config: mean: AVG((TCP_UTCL1_REQUEST_sum / $denom)) min: MIN((TCP_UTCL1_REQUEST_sum / $denom)) max: MAX((TCP_UTCL1_REQUEST_sum / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: Hit Ratio: mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if @@ -376,17 +376,17 @@ Panel Config: mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: ( + $normUnit) + units: (Hits + $normUnit) tips: Misses (Translation): mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: Misses (Permission): mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: diff --git a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml index 95bba22e8..08a9a9f76 100644 --- a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml +++ b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) diff --git a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml index 4ff3fd4d4..f65309a31 100644 --- a/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml +++ b/src/omniperf_analyze/configs/gfx908/1600_L1_cache.yaml @@ -361,7 +361,7 @@ Panel Config: mean: AVG((TCP_UTCL1_REQUEST_sum / $denom)) min: MIN((TCP_UTCL1_REQUEST_sum / $denom)) max: MAX((TCP_UTCL1_REQUEST_sum / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: Hit Ratio: mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if @@ -376,17 +376,17 @@ Panel Config: mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: ( + $normUnit) + units: (Hits + $normUnit) tips: Misses (Translation): mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: Misses (Permission): mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: diff --git a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml index e68511e9e..3acee5740 100644 --- a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml +++ b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) diff --git a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml index 985be3803..917cb3aa0 100644 --- a/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml +++ b/src/omniperf_analyze/configs/gfx90a/1600_L1_cache.yaml @@ -361,7 +361,7 @@ Panel Config: mean: AVG((TCP_UTCL1_REQUEST_sum / $denom)) min: MIN((TCP_UTCL1_REQUEST_sum / $denom)) max: MAX((TCP_UTCL1_REQUEST_sum / $denom)) - units: ( + $normUnit) + units: (Req + $normUnit) tips: Hit Ratio: mean: AVG((((100 * TCP_UTCL1_TRANSLATION_HIT_sum) / TCP_UTCL1_REQUEST_sum) if @@ -376,17 +376,17 @@ Panel Config: mean: AVG((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_HIT_sum / $denom)) - units: ( + $normUnit) + units: (Hits + $normUnit) tips: Misses (Translation): mean: AVG((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_TRANSLATION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: Misses (Permission): mean: AVG((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) min: MIN((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) max: MAX((TCP_UTCL1_PERMISSION_MISS_sum / $denom)) - units: ( + $normUnit) + units: (Misses + $normUnit) tips: diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml index a5bf6fa25..094df5b19 100644 --- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( + $normUnit) + units: (req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) From 2469716d13b227b2f9435f2e86160a2b8851c9c2 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Tue, 11 Jul 2023 13:27:46 -0500 Subject: [PATCH 14/81] Update 1800_L2_cache_per_channel.yaml Capitalizing for consistency Signed-off-by: Cole Ramos --- .../configs/gfx90a/1800_L2_cache_per_channel.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml index 094df5b19..93fc2b412 100644 --- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) From 6042cfb16a908ae89a7091a927cd124126b04643 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Tue, 11 Jul 2023 13:29:07 -0500 Subject: [PATCH 15/81] Update 1800_L2_cache_per_channel.yaml Capitalizing for consistency Signed-off-by: Cole Ramos --- .../configs/gfx90a/1800_L2_cache_per_channel.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml index 93fc2b412..f13647847 100644 --- a/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml +++ b/src/omniperf_analyze/configs/gfx90a/1800_L2_cache_per_channel.yaml @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: (req + $normUnit) + units: (Req + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) From 267750c085b222748971ea96f369f1a886aa4fef Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Tue, 11 Jul 2023 14:11:38 -0500 Subject: [PATCH 16/81] Rearranging build_df func to optimize ArchConfig for rocomni plugin Signed-off-by: colramos-amd --- src/omniperf_analyze/utils/parser.py | 57 +++++++++++++++++++--------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index b6573566b..025745b17 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -339,11 +339,25 @@ def gen_counter_list(formula): "CC": None, "RW": None, "GIOP": None, + "GFLOPs": None, } + built_in_counter=[ + "lds", + "grd", + "wgr", + "arch_vgpr", + "accum_vgpr", + "sgpr", + "scr", + "BeginNs", + "EndNs" + ] + + visited = False counters = [] if not isinstance(formula, str): - return counters + return visited, counters try: tree = ast.parse( formula.replace("$normUnit", "SQ_WAVES") @@ -351,15 +365,17 @@ def gen_counter_list(formula): .replace("$", "") ) for node in ast.walk(tree): - if ( - isinstance(node, ast.Name) - and node.id.rstrip("_sum").isupper() - and node.id not in function_filter - ): - counters.append(node.id.rstrip("_sum")) + if isinstance(node, ast.Name): + val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id) + if (val.isupper() and val not in function_filter): + counters.append(val) + visited = True + if val in built_in_counter: + visited = True except: pass - return counters + + return visited, counters def build_dfs(archConfigs, filter_metrics): @@ -381,9 +397,14 @@ def build_dfs(archConfigs, filter_metrics): dfs_type = {} metric_counters = {} for panel_id, panel in archConfigs.panel_configs.items(): + panel_idx = str(panel_id // 100) for data_source in panel["data source"]: for type, data_cofig in data_source.items(): if type == "metric_table": + metric_list[panel_idx] = panel["title"] + table_idx = panel_idx + "." + str(data_cofig["id"] % 100) + metric_list[table_idx] = data_cofig["title"] + headers = ["Index"] for key, tile in data_cofig["header"].items(): if key != "tips": @@ -397,12 +418,7 @@ def build_dfs(archConfigs, filter_metrics): i = 0 for key, entries in data_cofig["metric"].items(): - data_source_idx = ( - str(data_cofig["id"] // 100) - + "." - + str(data_cofig["id"] % 100) - ) - metric_idx = data_source_idx + "." + str(i) + metric_idx = table_idx + "." + str(i) values = [] eqn_content = [] @@ -411,7 +427,7 @@ def build_dfs(archConfigs, filter_metrics): or (metric_idx in filter_metrics) # no filter or # metric in filter # the whole table in filter - (data_source_idx in filter_metrics) + (table_idx in filter_metrics) or # the whole IP block in filter (str(panel_id // 100) in filter_metrics) @@ -439,14 +455,19 @@ def build_dfs(archConfigs, filter_metrics): df = pd.concat([df, df_new_row]) # collect metric_list - metric_list[metric_idx] = key.replace(" ", "_") + metric_list[metric_idx] = key # generate mapping of counters and metrics filter = {} + _visited = False for formula in eqn_content: if formula is not None and formula != "None": - for k in gen_counter_list(formula): + visited, counters = gen_counter_list(formula) + if visited: + _visited = True + for k in counters: filter[k] = None - if len(filter) > 0: + + if len(filter) > 0 or _visited: metric_counters[key] = list(filter) i += 1 From 80c04feb77961d17b3e062d8d8f1fa78897d318d Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Tue, 11 Jul 2023 14:13:09 -0500 Subject: [PATCH 17/81] Abstract perfmon coalesing for useage in rocomni plugin Signed-off-by: colramos-amd --- src/utils/perfagg.py | 173 ++++++++++++++++++++++++++++--------------- 1 file changed, 115 insertions(+), 58 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 59460bc80..8e95482c5 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -256,6 +256,96 @@ def pmc_perf_split(workload_dir): os.remove(workload_perfmon_dir + "/pmc_perf.txt") +def update_pmc_bucket( + counters, + save_file, + soc, + pmc_list=None, + stext=None, + workload_perfmon_dir=None + ): + # Verify inputs. + # If save_file is True, we're being called internally, from perfmon_coalesce + # Else we're being called externally, from rocomni + detected_extermal_call = False + if save_file and (stext is None or workload_perfmon_dir is None): + raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True") + if pmc_list is None: + detected_extermal_call = True + pmc_list = dict( + [ + ("SQ", []), + ("GRBM", []), + ("TCP", []), + ("TA", []), + ("TD", []), + ("TCC", []), + ("SPI", []), + ("CPC", []), + ("CPF", []), + ("GDS", []), + ("TCC2", {}), # per-channel TCC perfmon + ] + ) + for ch in range(perfmon_config[soc]["TCC_channels"]): + pmc_list["TCC2"][str(ch)] = [] + + if "SQ_ACCUM_PREV_HIRES" in counters: + # save all level counters separately + nindex = counters.index("SQ_ACCUM_PREV_HIRES") + level_counter = counters[nindex - 1] + + if save_file: + # Save to level counter file, file name = level counter name + fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w") + fd.write(stext + "\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + + return pmc_list + + # save normal pmc counters in matching buckets + for counter in counters: + IP_block = counter.split(sep="_")[0].upper() + # SQC and SQ belong to the IP block, coalesce them + if IP_block == "SQC": + IP_block = "SQ" + + if IP_block != "TCC": + # Insert unique pmc counters into its bucket + if counter not in pmc_list[IP_block]: + pmc_list[IP_block].append(counter) + + else: + # TCC counters processing + m = re.match(r"[\s\S]+\[(\d+)\]", counter) + if m is None: + # Aggregated TCC counters + if counter not in pmc_list[IP_block]: + pmc_list[IP_block].append(counter) + + else: + # TCC channel ID + ch = m.group(1) + + # fake IP block for per channel TCC + if str(ch) in pmc_list["TCC2"]: + # append unique counter into the channel + if counter not in pmc_list["TCC2"][str(ch)]: + pmc_list["TCC2"][str(ch)].append(counter) + else: + # initial counter in this channel + pmc_list["TCC2"][str(ch)] = [counter] + + if detected_extermal_call: + # sort the per channel counter, so that same counter in all channels can be aligned + for ch in range(perfmon_config[soc]["TCC_channels"]): + pmc_list["TCC2"][str(ch)].sort() + return pmc_list + + def perfmon_coalesce(pmc_files_list, workload_dir, soc): workload_perfmon_dir = workload_dir + "/perfmon" @@ -296,55 +386,11 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # we have found all the counters, store them in buckets counters = m.group(1).split() - if "SQ_ACCUM_PREV_HIRES" in counters: - # save all level counters separately - - nindex = counters.index("SQ_ACCUM_PREV_HIRES") - level_counter = counters[nindex - 1] - - # Save to level counter file, file name = level counter name - fd = open(workload_perfmon_dir + "/" + level_counter + ".txt", "w") - fd.write(stext + "\n\n") - fd.write("gpu:\n") - fd.write("range:\n") - fd.write("kernel:\n") - fd.close() - - continue - - # save normal pmc counters in matching buckets - for counter in counters: - IP_block = counter.split(sep="_")[0].upper() - # SQC and SQ belong to the IP block, coalesce them - if IP_block == "SQC": - IP_block = "SQ" - - if IP_block != "TCC": - # Insert unique pmc counters into its bucket - if counter not in pmc_list[IP_block]: - pmc_list[IP_block].append(counter) - - else: - # TCC counters processing - m = re.match(r"[\s\S]+\[(\d+)\]", counter) - if m is None: - # Aggregated TCC counters - if counter not in pmc_list[IP_block]: - pmc_list[IP_block].append(counter) - - else: - # TCC channel ID - ch = m.group(1) - - # fake IP block for per channel TCC - if str(ch) in pmc_list["TCC2"]: - # append unique counter into the channel - if counter not in pmc_list["TCC2"][str(ch)]: - pmc_list["TCC2"][str(ch)].append(counter) - else: - # initial counter in this channel - pmc_list["TCC2"][str(ch)] = [counter] - + + # Utilitze helper function once a list of counters has be extracted + save_file = True + pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir) + # add a timestamp file fd = open(workload_perfmon_dir + "/timestamps.txt", "w") fd.write("pmc:\n\n") @@ -360,9 +406,7 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): return pmc_list -def perfmon_emit(pmc_list, workload_dir, soc): - workload_perfmon_dir = workload_dir + "/perfmon" - +def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None): # Calculate the minimum number of iteration to save the pmc counters # non-TCC counters pmc_cnt = [ @@ -384,7 +428,12 @@ def perfmon_emit(pmc_list, workload_dir, soc): niter = max(math.ceil(max(pmc_cnt)), math.ceil(tcc_cnt) + math.ceil(max(tcc2_cnt))) # Emit PMC counters into pmc config file - fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w") + if save_file: + workload_perfmon_dir = workload_dir + "/perfmon" + fd = open(workload_perfmon_dir + "/pmc_perf.txt", "w") + else: + batches = [] + tcc2_index = 0 for iter in range(niter): @@ -414,12 +463,20 @@ def perfmon_emit(pmc_list, workload_dir, soc): # TCC aggregated counters line = line + " " + " ".join(tcc_counters) - fd.write(line + "\n") + if save_file: + fd.write(line + "\n") + else: + b = line.split() + b.remove("pmc:") + batches.append(b) - fd.write("\ngpu:\n") - fd.write("range:\n") - fd.write("kernel:\n") - fd.close() + if save_file: + fd.write("\ngpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + else: + return batches def perfmon_filter(workload_dir, perfmon_dir, args): From 4d8383b4390cf8cacf225954e971f31891dea39d Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Tue, 11 Jul 2023 14:14:10 -0500 Subject: [PATCH 18/81] Comply to Python formatting Signed-off-by: colramos-amd --- src/omniperf_analyze/utils/parser.py | 8 ++++---- src/utils/perfagg.py | 26 ++++++++++++-------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index 025745b17..0328d7aa8 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -342,7 +342,7 @@ def gen_counter_list(formula): "GFLOPs": None, } - built_in_counter=[ + built_in_counter = [ "lds", "grd", "wgr", @@ -351,7 +351,7 @@ def gen_counter_list(formula): "sgpr", "scr", "BeginNs", - "EndNs" + "EndNs", ] visited = False @@ -367,7 +367,7 @@ def gen_counter_list(formula): for node in ast.walk(tree): if isinstance(node, ast.Name): val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id) - if (val.isupper() and val not in function_filter): + if val.isupper() and val not in function_filter: counters.append(val) visited = True if val in built_in_counter: @@ -404,7 +404,7 @@ def build_dfs(archConfigs, filter_metrics): metric_list[panel_idx] = panel["title"] table_idx = panel_idx + "." + str(data_cofig["id"] % 100) metric_list[table_idx] = data_cofig["title"] - + headers = ["Index"] for key, tile in data_cofig["header"].items(): if key != "tips": diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 8e95482c5..04658795c 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -257,19 +257,16 @@ def pmc_perf_split(workload_dir): def update_pmc_bucket( - counters, - save_file, - soc, - pmc_list=None, - stext=None, - workload_perfmon_dir=None - ): + counters, save_file, soc, pmc_list=None, stext=None, workload_perfmon_dir=None +): # Verify inputs. # If save_file is True, we're being called internally, from perfmon_coalesce # Else we're being called externally, from rocomni detected_extermal_call = False if save_file and (stext is None or workload_perfmon_dir is None): - raise ValueError("stext and workload_perfmon_dir must be specified if save_file is True") + raise ValueError( + "stext and workload_perfmon_dir must be specified if save_file is True" + ) if pmc_list is None: detected_extermal_call = True pmc_list = dict( @@ -289,7 +286,7 @@ def update_pmc_bucket( ) for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)] = [] - + if "SQ_ACCUM_PREV_HIRES" in counters: # save all level counters separately nindex = counters.index("SQ_ACCUM_PREV_HIRES") @@ -305,7 +302,7 @@ def update_pmc_bucket( fd.close() return pmc_list - + # save normal pmc counters in matching buckets for counter in counters: IP_block = counter.split(sep="_")[0].upper() @@ -386,11 +383,13 @@ def perfmon_coalesce(pmc_files_list, workload_dir, soc): # we have found all the counters, store them in buckets counters = m.group(1).split() - + # Utilitze helper function once a list of counters has be extracted save_file = True - pmc_list = update_pmc_bucket(counters, save_file, soc, pmc_list, stext, workload_perfmon_dir) - + pmc_list = update_pmc_bucket( + counters, save_file, soc, pmc_list, stext, workload_perfmon_dir + ) + # add a timestamp file fd = open(workload_perfmon_dir + "/timestamps.txt", "w") fd.write("pmc:\n\n") @@ -434,7 +433,6 @@ def perfmon_emit(pmc_list, soc, save_file=True, workload_dir=None): else: batches = [] - tcc2_index = 0 for iter in range(niter): # Prefix From 3137076a72fa5cf35e723d6211f7b6cec17bc3f9 Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 17 Jul 2023 12:46:05 -0500 Subject: [PATCH 19/81] Migrate to @grafana/create-plugin Signed-off-by: JoseSantosAMD --- grafana_plugins/svg_plugin/.prettierrc.js | 5 +- grafana_plugins/svg_plugin/package.json | 65 +++++++++++++++++++---- grafana_plugins/svg_plugin/tsconfig.json | 12 +---- 3 files changed, 59 insertions(+), 23 deletions(-) diff --git a/grafana_plugins/svg_plugin/.prettierrc.js b/grafana_plugins/svg_plugin/.prettierrc.js index f60eb1d25..aaa5045c6 100644 --- a/grafana_plugins/svg_plugin/.prettierrc.js +++ b/grafana_plugins/svg_plugin/.prettierrc.js @@ -1,3 +1,4 @@ module.exports = { - ...require("./node_modules/@grafana/toolkit/src/config/prettier.plugin.config.json"), - }; \ No newline at end of file + // Prettier configuration provided by Grafana scaffolding + ...require("./.config/.prettierrc.js") +}; \ No newline at end of file diff --git a/grafana_plugins/svg_plugin/package.json b/grafana_plugins/svg_plugin/package.json index fb88025c1..aa445bd25 100644 --- a/grafana_plugins/svg_plugin/package.json +++ b/grafana_plugins/svg_plugin/package.json @@ -3,29 +3,72 @@ "version": "1.0.0", "description": "", "scripts": { - "build": "grafana-toolkit plugin:build", - "test": "grafana-toolkit plugin:test", - "dev": "grafana-toolkit plugin:dev", - "watch": "grafana-toolkit plugin:dev --watch", - "sign": "grafana-toolkit plugin:sign", - "start": "yarn watch" + "build": "webpack -c ./.config/webpack/webpack.config.ts --env production", + "dev": "webpack -w -c ./.config/webpack/webpack.config.ts --env development", + "e2e": "yarn exec cypress install && yarn exec grafana-e2e run", + "e2e:update": "yarn exec cypress install && yarn exec grafana-e2e run --update-screenshots", + "lint": "eslint --cache --ignore-path ./.gitignore --ext .js,.jsx,.ts,.tsx .", + "lint:fix": "yarn run lint --fix", + "server": "docker-compose up --build", + "sign": "npx --yes @grafana/sign-plugin@latest", + "start": "yarn watch", + "test": "jest --watch --onlyChanged", + "test:ci": "jest --passWithNoTests --maxWorkers 4", + "typecheck": "tsc --noEmit" }, "author": "Audacious Software Group", "license": "MIT", "devDependencies": { - "@grafana/toolkit": "latest", + "@babel/core": "^7.21.4", + "@grafana/e2e": "9.5.3", + "@grafana/e2e-selectors": "9.5.3", + "@grafana/eslint-config": "^6.0.0", + "@grafana/tsconfig": "^1.2.0-rc1", + "@swc/core": "^1.3.62", + "@swc/helpers": "^0.5.0", + "@swc/jest": "^0.2.26", + "@testing-library/jest-dom": "^5.16.5", + "@testing-library/react": "^12.1.4", + "@types/jest": "^29.5.0", + "@types/lodash": "^4.14.194", + "@types/node": "^18.15.11", + "copy-webpack-plugin": "^11.0.0", + "css-loader": "^6.7.3", "emotion": "10.0.27", + "eslint-webpack-plugin": "^4.0.1", + "fork-ts-checker-webpack-plugin": "^8.0.0", + "glob": "^10.2.7", + "identity-obj-proxy": "3.0.0", + "jest": "^29.5.0", + "jest-environment-jsdom": "^29.5.0", + "prettier": "^2.8.7", "react-monaco-editor": "^0.44.0", - "tslib": "^2.3.1" + "replace-in-file-webpack-plugin": "^1.0.6", + "sass": "1.63.2", + "sass-loader": "13.3.1", + "style-loader": "3.3.3", + "swc-loader": "^0.2.3", + "ts-node": "^10.9.1", + "tsconfig-paths": "^4.2.0", + "tslib": "^2.3.1", + "typescript": "4.8.4", + "webpack": "^5.86.0", + "webpack-cli": "^5.1.4", + "webpack-livereload-plugin": "^3.0.2" }, "engines": { "node": ">=14" }, "dependencies": { - "@grafana/runtime": "9.1.2", + "@emotion/css": "^11.1.3", "@grafana/data": "9.1.2", + "@grafana/runtime": "9.1.2", "@grafana/ui": "9.1.2", - "@svgdotjs/svg.js": "^3.1.1" + "@svgdotjs/svg.js": "^3.1.1", + "react": "17.0.2", + "react-dom": "17.0.2", + "tslib": "2.5.3" }, - "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project." + "_comments": "Dependencies are not included as part of Omniperf. It's the user's responsibility to accept any licensing implications before building the project.", + "packageManager": "yarn@1.22.19" } diff --git a/grafana_plugins/svg_plugin/tsconfig.json b/grafana_plugins/svg_plugin/tsconfig.json index 7e6657d2f..d294745aa 100644 --- a/grafana_plugins/svg_plugin/tsconfig.json +++ b/grafana_plugins/svg_plugin/tsconfig.json @@ -1,11 +1,3 @@ { - "extends": "./node_modules/@grafana/toolkit/src/config/tsconfig.plugin.json", - "include": ["src", "types"], - "compilerOptions": { - "types": ["@emotion/core"], - "rootDir": "./src", - "baseUrl": "./src", - "typeRoots": ["./node_modules/@types"], - "jsx": "react" - } -} + "extends": "./.config/tsconfig.json" +} \ No newline at end of file From 43d492dce2bb0bdbc2f26de9b569fa3b4010dbd0 Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 17 Jul 2023 13:02:00 -0500 Subject: [PATCH 20/81] Adding config files Signed-off-by: JoseSantosAMD --- .../svg_plugin/.config/tsconfig.json | 26 +++ .../.config/webpack/webpack.config.ts | 201 ++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 grafana_plugins/svg_plugin/.config/tsconfig.json create mode 100644 grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts diff --git a/grafana_plugins/svg_plugin/.config/tsconfig.json b/grafana_plugins/svg_plugin/.config/tsconfig.json new file mode 100644 index 000000000..64b376907 --- /dev/null +++ b/grafana_plugins/svg_plugin/.config/tsconfig.json @@ -0,0 +1,26 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in + * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-typescript-config + */ + { + "compilerOptions": { + "alwaysStrict": true, + "declaration": false, + "rootDir": "../src", + "baseUrl": "../src", + "typeRoots": ["../node_modules/@types"], + "resolveJsonModule": true + }, + "ts-node": { + "compilerOptions": { + "module": "commonjs", + "target": "es5", + "esModuleInterop": true + }, + "transpileOnly": true + }, + "include": ["../src", "./types"], + "extends": "@grafana/tsconfig" +} diff --git a/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts b/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts new file mode 100644 index 000000000..22cb86ca4 --- /dev/null +++ b/grafana_plugins/svg_plugin/.config/webpack/webpack.config.ts @@ -0,0 +1,201 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in + * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-webpack-config + */ + +import CopyWebpackPlugin from 'copy-webpack-plugin'; +import ESLintPlugin from 'eslint-webpack-plugin'; +import ForkTsCheckerWebpackPlugin from 'fork-ts-checker-webpack-plugin'; +import LiveReloadPlugin from 'webpack-livereload-plugin'; +import path from 'path'; +import ReplaceInFileWebpackPlugin from 'replace-in-file-webpack-plugin'; +import { Configuration } from 'webpack'; + +import { getPackageJson, getPluginJson, hasReadme, getEntries } from './utils'; +import { SOURCE_DIR, DIST_DIR } from './constants'; + +const pluginJson = getPluginJson(); + +const config = async (env): Promise => ({ + cache: { + type: 'filesystem', + buildDependencies: { + config: [__filename], + }, + }, + + context: path.join(process.cwd(), SOURCE_DIR), + + devtool: env.production ? 'source-map' : 'eval-source-map', + + entry: await getEntries(), + + externals: [ + 'lodash', + 'jquery', + 'moment', + 'slate', + 'emotion', + '@emotion/react', + '@emotion/css', + 'prismjs', + 'slate-plain-serializer', + '@grafana/slate-react', + 'react', + 'react-dom', + 'react-redux', + 'redux', + 'rxjs', + 'react-router', + 'react-router-dom', + 'd3', + 'angular', + '@grafana/ui', + '@grafana/runtime', + '@grafana/data', + + // Mark legacy SDK imports as external if their name starts with the "grafana/" prefix + ({ request }, callback) => { + const prefix = 'grafana/'; + const hasPrefix = (request) => request.indexOf(prefix) === 0; + const stripPrefix = (request) => request.substr(prefix.length); + + if (hasPrefix(request)) { + return callback(undefined, stripPrefix(request)); + } + + callback(); + }, + ], + + mode: env.production ? 'production' : 'development', + + module: { + rules: [ + { + exclude: /(node_modules)/, + test: /\.[tj]sx?$/, + use: { + loader: 'swc-loader', + options: { + jsc: { + baseUrl: './src', + target: 'es2015', + loose: false, + parser: { + syntax: 'typescript', + tsx: true, + decorators: false, + dynamicImport: true, + }, + }, + }, + }, + }, + { + test: /\.css$/, + use: ["style-loader", "css-loader"] + }, + { + test: /\.s[ac]ss$/, + use: ['style-loader', 'css-loader', 'sass-loader'], + }, + { + test: /\.(png|jpe?g|gif|svg)$/, + type: 'asset/resource', + generator: { + // Keep publicPath relative for host.com/grafana/ deployments + publicPath: `public/plugins/${pluginJson.id}/img/`, + outputPath: 'img/', + filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]', + }, + }, + { + test: /\.(woff|woff2|eot|ttf|otf)(\?v=\d+\.\d+\.\d+)?$/, + type: 'asset/resource', + generator: { + // Keep publicPath relative for host.com/grafana/ deployments + publicPath: `public/plugins/${pluginJson.id}/fonts/`, + outputPath: 'fonts/', + filename: Boolean(env.production) ? '[hash][ext]' : '[name][ext]', + }, + }, + ], + }, + + output: { + clean: { + keep: new RegExp(`.*?_(amd64|arm(64)?)(.exe)?`), + }, + filename: '[name].js', + library: { + type: 'amd', + }, + path: path.resolve(process.cwd(), DIST_DIR), + publicPath: '/', + }, + + plugins: [ + new CopyWebpackPlugin({ + patterns: [ + // If src/README.md exists use it; otherwise the root README + // To `compiler.options.output` + { from: hasReadme() ? 'README.md' : '../README.md', to: '.', force: true }, + { from: 'plugin.json', to: '.' }, + { from: '../LICENSE', to: '.' }, + { from: '../CHANGELOG.md', to: '.', force: true }, + { from: '**/*.json', to: '.' }, // TODO + { from: '**/*.svg', to: '.', noErrorOnMissing: true }, // Optional + { from: '**/*.png', to: '.', noErrorOnMissing: true }, // Optional + { from: '**/*.html', to: '.', noErrorOnMissing: true }, // Optional + { from: 'img/**/*', to: '.', noErrorOnMissing: true }, // Optional + { from: 'libs/**/*', to: '.', noErrorOnMissing: true }, // Optional + { from: 'static/**/*', to: '.', noErrorOnMissing: true }, // Optional + ], + }), + // Replace certain template-variables in the README and plugin.json + new ReplaceInFileWebpackPlugin([ + { + dir: DIST_DIR, + files: ['plugin.json', 'README.md'], + rules: [ + { + search: /\%VERSION\%/g, + replace: getPackageJson().version, + }, + { + search: /\%TODAY\%/g, + replace: new Date().toISOString().substring(0, 10), + }, + { + search: /\%PLUGIN_ID\%/g, + replace: pluginJson.id, + }, + ], + }, + ]), + new ForkTsCheckerWebpackPlugin({ + async: Boolean(env.development), + issue: { + include: [{ file: '**/*.{ts,tsx}' }], + }, + typescript: { configFile: path.join(process.cwd(), 'tsconfig.json') }, + }), + new ESLintPlugin({ + extensions: ['.ts', '.tsx'], + lintDirtyModulesOnly: Boolean(env.development), // don't lint on start, only lint changed files + }), + ...(env.development ? [new LiveReloadPlugin()] : []), + ], + + resolve: { + extensions: ['.js', '.jsx', '.ts', '.tsx'], + // handle resolving "rootDir" paths + modules: [path.resolve(process.cwd(), 'src'), 'node_modules'], + unsafeCache: true, + }, +}); + +export default config; From fd55a698057929b2cc9bceb47ec5dac9ea941e18 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Mon, 17 Jul 2023 13:12:22 -0500 Subject: [PATCH 21/81] Filter additional ops in gen_counter_list fucn Signed-off-by: colramos-amd --- src/omniperf_analyze/utils/parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index 0328d7aa8..bff3314b2 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -340,6 +340,8 @@ def gen_counter_list(formula): "RW": None, "GIOP": None, "GFLOPs": None, + "CONCAT": None, + "MOD": None, } built_in_counter = [ @@ -362,6 +364,12 @@ def gen_counter_list(formula): tree = ast.parse( formula.replace("$normUnit", "SQ_WAVES") .replace("$denom", "SQ_WAVES") + .replace( + "$numActiveCUs", + "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / GRBM_GUI_ACTIVE)), \ + 0) / $maxWavesPerCU) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \ + / GRBM_GUI_ACTIVE)), 0), $maxWavesPerCU), 8)), $numCU))", + ) .replace("$", "") ) for node in ast.walk(tree): From 2b0ac9b5d8c9edb94ce62a9e0480790121e3a15d Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Mon, 17 Jul 2023 13:12:56 -0500 Subject: [PATCH 22/81] Enable join_prof() merge util to be called from outside Omniperf Signed-off-by: colramos-amd --- src/utils/perfagg.py | 57 +++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 04658795c..1c21b1736 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -95,13 +95,19 @@ def test_df_column_equality(df): # joins disparate runs less dumbly than rocprof def join_prof(workload_dir, join_type, log_file, verbose, out=None): # Set default output directory if not specified - if out == None: - out = workload_dir + "/pmc_perf.csv" - files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") - df = None + if type(workload_dir) == str: + if out is None: + out = workload_dir + "/pmc_perf.csv" + files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") + elif type(workload_dir) == list: + files = workload_dir + else: + print("ERROR: Invalid workload_dir") + sys.exit(1) + df = None for i, file in enumerate(files): - _df = pd.read_csv(file) + _df = pd.read_csv(file) if type(workload_dir) == str else file if join_type == "kernel": key = _df.groupby("KernelName").cumcount() _df["key"] = _df.KernelName + " - " + key.astype(str) @@ -137,7 +143,6 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): - print("Key is ", key) _df = df[cols] if not test_df_column_equality(_df): msg = ( @@ -146,10 +151,12 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): ) ) warnings.warn(msg) - log_file.write(msg + "\n") + if log_file: + log_file.write(msg + "\n") else: msg = "Successfully joined {} in pmc_perf.csv".format(key) - log_file.write(msg + "\n") + if log_file: + log_file.write(msg + "\n") if test_df_column_equality(_df) and verbose: print(msg) @@ -179,6 +186,8 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): "fbar", "sig", "obj", + # rocscope specific merged counters, keep original + "dispatch_", ] ) ] @@ -189,7 +198,15 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): [ k for k in df.keys() - if not any(check in k for check in ["DispatchNs", "CompleteNs"]) + if not any( + check in k + for check in [ + "DispatchNs", + "CompleteNs", + # rocscope specific timestamp + "HostDuration", + ] + ) ] ] #   C) sanity check the name and key @@ -216,12 +233,14 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): df["EndNs"] = endNs # finally, join the drop key df = df.drop(columns=["key"]) - # and save to file - df.to_csv(out, index=False) - # and delete old file(s) - if not verbose: - for file in files: - os.remove(file) + # save to file and delete old file(s), skip if we're being called outside of Omniperf + if type(workload_dir) == str: + df.to_csv(out, index=False) + if not verbose: + for file in files: + os.remove(file) + else: + return df def pmc_perf_split(workload_dir): @@ -262,13 +281,13 @@ def update_pmc_bucket( # Verify inputs. # If save_file is True, we're being called internally, from perfmon_coalesce # Else we're being called externally, from rocomni - detected_extermal_call = False + detected_external_call = False if save_file and (stext is None or workload_perfmon_dir is None): raise ValueError( "stext and workload_perfmon_dir must be specified if save_file is True" ) if pmc_list is None: - detected_extermal_call = True + detected_external_call = True pmc_list = dict( [ ("SQ", []), @@ -287,7 +306,7 @@ def update_pmc_bucket( for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)] = [] - if "SQ_ACCUM_PREV_HIRES" in counters: + if "SQ_ACCUM_PREV_HIRES" in counters and not detected_external_call: # save all level counters separately nindex = counters.index("SQ_ACCUM_PREV_HIRES") level_counter = counters[nindex - 1] @@ -336,7 +355,7 @@ def update_pmc_bucket( # initial counter in this channel pmc_list["TCC2"][str(ch)] = [counter] - if detected_extermal_call: + if detected_external_call: # sort the per channel counter, so that same counter in all channels can be aligned for ch in range(perfmon_config[soc]["TCC_channels"]): pmc_list["TCC2"][str(ch)].sort() From d7ba2acec93a2fec5438593f6fe5c67e462a617f Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Mon, 17 Jul 2023 13:19:03 -0500 Subject: [PATCH 23/81] Adding config files Signed-off-by: JoseSantosAMD --- grafana_plugins/svg_plugin/.config/.eslintrc | 13 ++ .../svg_plugin/.config/.prettierrc.js | 16 ++ grafana_plugins/svg_plugin/.config/Dockerfile | 16 ++ grafana_plugins/svg_plugin/.config/README.md | 164 ++++++++++++++++++ .../svg_plugin/.config/jest-setup.js | 25 +++ .../svg_plugin/.config/jest.config.js | 43 +++++ .../.config/jest/mocks/react-inlinesvg.tsx | 25 +++ .../svg_plugin/.config/jest/utils.js | 31 ++++ .../svg_plugin/.config/types/custom.d.ts | 37 ++++ .../svg_plugin/.config/webpack/constants.ts | 2 + .../svg_plugin/.config/webpack/utils.ts | 40 +++++ grafana_plugins/svg_plugin/.eslintrc | 3 + grafana_plugins/svg_plugin/.nvmrc | 1 + .../svg_plugin/docker-compose.yaml | 15 ++ grafana_plugins/svg_plugin/jest-setup.js | 2 + grafana_plugins/svg_plugin/jest.config.js | 8 + 16 files changed, 441 insertions(+) create mode 100644 grafana_plugins/svg_plugin/.config/.eslintrc create mode 100644 grafana_plugins/svg_plugin/.config/.prettierrc.js create mode 100644 grafana_plugins/svg_plugin/.config/Dockerfile create mode 100644 grafana_plugins/svg_plugin/.config/README.md create mode 100644 grafana_plugins/svg_plugin/.config/jest-setup.js create mode 100644 grafana_plugins/svg_plugin/.config/jest.config.js create mode 100644 grafana_plugins/svg_plugin/.config/jest/mocks/react-inlinesvg.tsx create mode 100644 grafana_plugins/svg_plugin/.config/jest/utils.js create mode 100644 grafana_plugins/svg_plugin/.config/types/custom.d.ts create mode 100644 grafana_plugins/svg_plugin/.config/webpack/constants.ts create mode 100644 grafana_plugins/svg_plugin/.config/webpack/utils.ts create mode 100644 grafana_plugins/svg_plugin/.eslintrc create mode 100644 grafana_plugins/svg_plugin/.nvmrc create mode 100644 grafana_plugins/svg_plugin/docker-compose.yaml create mode 100644 grafana_plugins/svg_plugin/jest-setup.js create mode 100644 grafana_plugins/svg_plugin/jest.config.js diff --git a/grafana_plugins/svg_plugin/.config/.eslintrc b/grafana_plugins/svg_plugin/.config/.eslintrc new file mode 100644 index 000000000..3f8c381a4 --- /dev/null +++ b/grafana_plugins/svg_plugin/.config/.eslintrc @@ -0,0 +1,13 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in + * https://grafana.github.io/plugin-tools/docs/advanced-configuration#extending-the-eslint-config + */ + { + "extends": ["@grafana/eslint-config"], + "root": true, + "rules": { + "react/prop-types": "off" + } +} diff --git a/grafana_plugins/svg_plugin/.config/.prettierrc.js b/grafana_plugins/svg_plugin/.config/.prettierrc.js new file mode 100644 index 000000000..66a76ec5b --- /dev/null +++ b/grafana_plugins/svg_plugin/.config/.prettierrc.js @@ -0,0 +1,16 @@ +/* + * ⚠️⚠️⚠️ THIS FILE WAS SCAFFOLDED BY `@grafana/create-plugin`. DO NOT EDIT THIS FILE DIRECTLY. ⚠️⚠️⚠️ + * + * In order to extend the configuration follow the steps in .config/README.md + */ + +module.exports = { + "endOfLine": "auto", + "printWidth": 120, + "trailingComma": "es5", + "semi": true, + "jsxSingleQuote": false, + "singleQuote": true, + "useTabs": false, + "tabWidth": 2 +}; \ No newline at end of file diff --git a/grafana_plugins/svg_plugin/.config/Dockerfile b/grafana_plugins/svg_plugin/.config/Dockerfile new file mode 100644 index 000000000..35d89bd1c --- /dev/null +++ b/grafana_plugins/svg_plugin/.config/Dockerfile @@ -0,0 +1,16 @@ +ARG grafana_version=latest +ARG grafana_image=grafana-enterprise + +FROM grafana/${grafana_image}:${grafana_version} + +# Make it as simple as possible to access the grafana instance for development purposes +# Do NOT enable these settings in a public facing / production grafana instance +ENV GF_AUTH_ANONYMOUS_ORG_ROLE "Admin" +ENV GF_AUTH_ANONYMOUS_ENABLED "true" +ENV GF_AUTH_BASIC_ENABLED "false" +# Set development mode so plugins can be loaded without the need to sign +ENV GF_DEFAULT_APP_MODE "development" + +# Inject livereload script into grafana index.html +USER root +RUN sed -i 's/<\/body><\/html>/