From 4b926cac21504ea2136e6fe18947f36e59f3b806 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Tue, 3 Oct 2023 10:51:59 -0700 Subject: [PATCH] bug fixes and lack of ref-cycle support --- Makefile | 1 + events/bdx.txt | 2 +- events/clx_skx.txt | 10 ++++----- events/icx.txt | 10 ++++----- events/metric_bdx.json | 7 +++++++ events/metric_skx_clx.json | 7 +++++++ events/spr.txt | 10 ++++----- perf-collect.py | 28 +++++++++++++++++++++---- perf-collect.spec | 2 +- perf-postprocess.py | 20 +++++++++--------- src/perf_helpers.py | 42 ++++++++------------------------------ src/prepare_perf_events.py | 29 ++++++++++++++++++-------- 12 files changed, 95 insertions(+), 73 deletions(-) diff --git a/Makefile b/Makefile index 2837cba..d5d86da 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,7 @@ build-public/postprocess: --add-data "./src/base.html:." \ --runtime-tmpdir . \ --exclude-module readline + --bootloader-ignore-signals cp $(TMPDIR)/dist/perf-postprocess build/ rm -rf $(TMPDIR) diff --git a/events/bdx.txt b/events/bdx.txt index 2e1ffd8..692b9e5 100644 --- a/events/bdx.txt +++ b/events/bdx.txt @@ -171,4 +171,4 @@ imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; #power related power/energy-pkg/, -power/energy-ram/; +power/energy-ram/; \ No newline at end of file diff --git a/events/clx_skx.txt b/events/clx_skx.txt index aa716f9..7ce3a76 100644 --- a/events/clx_skx.txt +++ b/events/clx_skx.txt @@ -196,10 +196,6 @@ instructions; cstate_core/c6-residency/; cstate_pkg/c6-residency/; -#power related -power/energy-pkg/, -power/energy-ram/; - #memory read/writes imc/event=0x04,umask=0x03,name='UNC_M_CAS_COUNT.RD'/, imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; @@ -236,4 +232,8 @@ iio/event=0x83,umask=0x01,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CP upi/event=0x2,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/, -upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; \ No newline at end of file +upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; + +#power related +power/energy-pkg/, +power/energy-ram/; \ No newline at end of file diff --git a/events/icx.txt b/events/icx.txt index f7bfa8a..7e0e5ae 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -169,10 +169,6 @@ instructions; cstate_core/c6-residency/; cstate_pkg/c6-residency/; -#power related -power/energy-pkg/, -power/energy-ram/; - # UPI related upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; @@ -196,4 +192,8 @@ cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; #memory read/writes imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/, -imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/; \ No newline at end of file +imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/; + +#power related +power/energy-pkg/, +power/energy-ram/; \ No newline at end of file diff --git a/events/metric_bdx.json b/events/metric_bdx.json index 921d8d9..7e46419 100644 --- a/events/metric_bdx.json +++ b/events/metric_bdx.json @@ -25,6 +25,13 @@ "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, { "name": "metric_locks retired per instr", "name-txn": "metric_locks retired per txn", diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index 0b4b49d..9974e96 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -25,6 +25,13 @@ "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, { "name": "metric_locks retired per instr", "name-txn": "metric_locks retired per txn", diff --git a/events/spr.txt b/events/spr.txt index 5702d1d..92efcdb 100644 --- a/events/spr.txt +++ b/events/spr.txt @@ -153,10 +153,6 @@ instructions:k; cstate_core/c6-residency/; cstate_pkg/c6-residency/; -#power -power/energy-pkg/, -power/energy-ram/; - #UPI upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; @@ -183,4 +179,8 @@ cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; #IMC (memory read/writes) imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, -imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; \ No newline at end of file +imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; + +#power +power/energy-pkg/, +power/energy-ram/; \ No newline at end of file diff --git a/perf-collect.py b/perf-collect.py index 4aee24c..07664d8 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -190,6 +190,26 @@ def tma_supported(): return True +def ref_cycles_supported(): + perf_out = "" + try: + perf = subprocess.Popen( + shlex.split("perf stat -a -e ref-cycles sleep .1"), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + perf_out = perf.communicate()[0].decode() + except subprocess.CalledProcessError: + return False + + if "" in perf_out: + logging.warning( + "ref-cycles not enabled in VM driver. Contact system owner to enable. Collecting reduced metrics" + ) + return False + return True + + def resource_path(relative_path): """Get absolute path to resource, works for dev and for PyInstaller""" base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) @@ -372,12 +392,12 @@ def validate_file(fname): (args.pid is not None or args.cid is not None or not have_uncore), include_tma, not have_uncore, + ref_cycles_supported(), ) - if not perf_helpers.validate_outfile(args.outcsv): - crash( - "Output filename not accepted. Filename should be a .csv without special characters" - ) + # check output file is writable + if not perf_helpers.check_file_writeable(args.outcsv): + crash("Output file %s not writeable " % args.outcsv) mux_intervals = perf_helpers.get_perf_event_mux_interval() if args.muxinterval > 0: diff --git a/perf-collect.spec b/perf-collect.spec index da16dde..b481c68 100644 --- a/perf-collect.spec +++ b/perf-collect.spec @@ -34,7 +34,7 @@ exe = EXE( [], name='perf-collect', debug=False, - bootloader_ignore_signals=False, + bootloader_ignore_signals=True, strip=False, upx=True, upx_exclude=[], diff --git a/perf-postprocess.py b/perf-postprocess.py index a6d3cf8..490005b 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -121,14 +121,6 @@ def get_args(script_path): if args.rawfile and not os.path.isfile(args.rawfile): crash("perf raw data file not found, please provide valid raw file") - # check output file is valid - if not perf_helpers.validate_outfile(args.outfile, True): - crash( - "Output filename: " - + args.outfile - + " not accepted. Filename should be a .csv without special characters" - ) - # check output file is writable if not perf_helpers.check_file_writeable(args.outfile): crash("Output file %s not writeable " % args.outfile) @@ -725,8 +717,16 @@ def get_groups_to_dataframes( group_start_end_index_dict[group_name] = (start_index, end_index) start_index = end_index current_group_indx += 1 - group_name = "group_" + str(current_group_indx) - event_list = group_to_event[group_name] + try: + group_name = "group_" + str(current_group_indx) + event_list = group_to_event[group_name] + except KeyError: + crash( + "could not find " + + str(row) + + " in event grouping: " + + str(group_to_event) + ) end_index += 1 group_to_df[group_name] = get_group_df_from_full_frame( time_slice_df, start_index, time_slice_df.shape[0], perf_mode diff --git a/src/perf_helpers.py b/src/perf_helpers.py index 111b690..cf83f3f 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -150,18 +150,15 @@ def disable_nmi_watchdog(): proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "") ) if new_watchdog_status != 0: - crash("Failed to disable nmi watchdog!") + crash("Failed to disable nmi watchdog.") logging.info( - "nmi_watchdog is temporary disabled. Will enable after collection." + "nmi_watchdog temporarily disabled. Will re-enable after collection." ) else: - logging.info("nmi_watchdog disabled!") + logging.info("nmi_watchdog already disabled. No change needed.") return nmi_watchdog_status - except subprocess.CalledProcessError as e: - logging.warning(e) - logging.warning("Failed to disable nmi_watchdog.") - except ValueError as e: - crash(f"Failed to disable watchdog: {e}") + except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e: + crash(f"Failed to disable nmi_watchdog: {e}") # enable nmi watchdog @@ -172,13 +169,10 @@ def enable_nmi_watchdog(): proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "") ) if new_watchdog_status != 1: - logging.warning("Failed to re-enable nmi_watchdog!") + logging.warning("Failed to re-enable nmi_watchdog.") else: - logging.info("nmi_watchdog enabled!") - except subprocess.CalledProcessError as e: - logging.warning(e.output) - logging.warning("Failed to re-enable nmi_watchdog!") - except ValueError as e: + logging.info("nmi_watchdog re-enabled.") + except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e: logging.warning(f"Failed to re-enable nmi_watchdog: {e}") @@ -308,25 +302,7 @@ def get_cpuid_info(procinfo): return socketinfo -# check for special characters in output filename -def validate_outfile(filename, xlsx=False): - valid = False - resdir = os.path.dirname(filename) - outfile = os.path.basename(filename) - if resdir and not os.path.exists(resdir): - return False - regx = r"[@!#$%^&*()<>?\|}{~:]" - # regex = re.compile("[@!#$%^&*()<>?/\|}{~:]") - regex = re.compile(regx) - if regex.search(outfile) is None: - if filename.endswith(".csv"): - return True - if xlsx and filename.endswith(".xlsx"): - return True - return valid - - -# check write permissions +# check write permissions on file, or directory if file doesn't exist def check_file_writeable(outfile): if os.path.exists(outfile): if os.path.isfile(outfile): diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index e3c4737..74a9564 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -115,7 +115,7 @@ def get_cgroup_events_format(cgroups, events, num_events): return perf_format -def filter_events(event_file, cpu_only, TMA_supported, in_vm): +def filter_events(event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles): if not os.path.isfile(event_file): crash("event file not found") collection_events = [] @@ -124,12 +124,19 @@ def filter_events(event_file, cpu_only, TMA_supported, in_vm): seperate_cycles = [] if in_vm: # since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group - seperate_cycles = [ - "cpu-cycles,", - "cpu-cycles:k,", - "ref-cycles,", - "instructions;", - ] + if supports_ref_cycles: + seperate_cycles = [ + "cpu-cycles,", + "cpu-cycles:k,", + "ref-cycles,", + "instructions;", + ] + else: + seperate_cycles = [ + "cpu-cycles,", + "cpu-cycles:k,", + "instructions;", + ] def process(line): line = line.strip() @@ -153,6 +160,8 @@ def process(line): for line in fin: if in_vm and "cpu-cycles" in line: continue + if not supports_ref_cycles and "ref-cycles" in line: + continue process(line) for line in seperate_cycles: process(line) @@ -163,7 +172,9 @@ def process(line): return collection_events, unsupported_events -def prepare_perf_events(event_file, cpu_only, TMA_supported, in_vm): +def prepare_perf_events( + event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles +): start_group = "'{" end_group = "}'" group = "" @@ -171,7 +182,7 @@ def prepare_perf_events(event_file, cpu_only, TMA_supported, in_vm): new_group = True collection_events, unsupported_events = filter_events( - event_file, cpu_only, TMA_supported, in_vm + event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles ) core_event = [] uncore_event = []