Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fixes and handle ref-cycles missing #58

Merged
merged 7 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ build-public/postprocess:
--add-data "./src/base.html:." \
--runtime-tmpdir . \
--exclude-module readline
--bootloader-ignore-signals
cp $(TMPDIR)/dist/perf-postprocess build/
rm -rf $(TMPDIR)

Expand Down
2 changes: 1 addition & 1 deletion events/bdx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,4 @@ imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/;

#power related
power/energy-pkg/,
power/energy-ram/;
power/energy-ram/;
10 changes: 5 additions & 5 deletions events/clx_skx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,6 @@ instructions;
cstate_core/c6-residency/;
cstate_pkg/c6-residency/;

#power related
power/energy-pkg/,
power/energy-ram/;

#memory read/writes
imc/event=0x04,umask=0x03,name='UNC_M_CAS_COUNT.RD'/,
imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/;
Expand Down Expand Up @@ -236,4 +232,8 @@ iio/event=0x83,umask=0x01,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CP
upi/event=0x2,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/,
upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/,
upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/,
upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/;
upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/;

#power related
power/energy-pkg/,
power/energy-ram/;
10 changes: 5 additions & 5 deletions events/icx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,6 @@ instructions;
cstate_core/c6-residency/;
cstate_pkg/c6-residency/;

#power related
power/energy-pkg/,
power/energy-ram/;

# UPI related
upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/;

Expand All @@ -196,4 +192,8 @@ cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/;

#memory read/writes
imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/,
imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/;
imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/;

#power related
power/energy-pkg/,
power/energy-ram/;
7 changes: 7 additions & 0 deletions events/metric_bdx.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
"expression-txn": "[cpu-cycles:k] / [TXN]",
"origin": "perfspect"
},
{
"name": "metric_IPC",
"name-txn": "metric_txn per cycle",
"expression": "[instructions] / [cpu-cycles]",
"expression-txn": "[TXN] / [cpu-cycles]",
"origin": "perfspect"
},
{
"name": "metric_locks retired per instr",
"name-txn": "metric_locks retired per txn",
Expand Down
7 changes: 7 additions & 0 deletions events/metric_skx_clx.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
"expression-txn": "[cpu-cycles:k] / [TXN]",
"origin": "perfspect"
},
{
"name": "metric_IPC",
"name-txn": "metric_txn per cycle",
"expression": "[instructions] / [cpu-cycles]",
"expression-txn": "[TXN] / [cpu-cycles]",
"origin": "perfspect"
},
{
"name": "metric_locks retired per instr",
"name-txn": "metric_locks retired per txn",
Expand Down
10 changes: 5 additions & 5 deletions events/spr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,6 @@ instructions:k;
cstate_core/c6-residency/;
cstate_pkg/c6-residency/;

#power
power/energy-pkg/,
power/energy-ram/;

#UPI
upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/;

Expand All @@ -183,4 +179,8 @@ cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/;

#IMC (memory read/writes)
imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/,
imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/;
imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/;

#power
power/energy-pkg/,
power/energy-ram/;
28 changes: 24 additions & 4 deletions perf-collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,26 @@ def tma_supported():
return True


def ref_cycles_supported():
perf_out = ""
try:
perf = subprocess.Popen(
shlex.split("perf stat -a -e ref-cycles sleep .1"),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
perf_out = perf.communicate()[0].decode()
except subprocess.CalledProcessError:
return False

if "<not supported>" in perf_out:
logging.warning(
"ref-cycles not enabled in VM driver. Contact system owner to enable. Collecting reduced metrics"
)
return False
return True


def resource_path(relative_path):
"""Get absolute path to resource, works for dev and for PyInstaller"""
base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
Expand Down Expand Up @@ -372,12 +392,12 @@ def validate_file(fname):
(args.pid is not None or args.cid is not None or not have_uncore),
include_tma,
not have_uncore,
ref_cycles_supported(),
)

if not perf_helpers.validate_outfile(args.outcsv):
crash(
"Output filename not accepted. Filename should be a .csv without special characters"
)
# check output file is writable
if not perf_helpers.check_file_writeable(args.outcsv):
crash("Output file %s not writeable " % args.outcsv)

mux_intervals = perf_helpers.get_perf_event_mux_interval()
if args.muxinterval > 0:
Expand Down
2 changes: 1 addition & 1 deletion perf-collect.spec
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ exe = EXE(
[],
name='perf-collect',
debug=False,
bootloader_ignore_signals=False,
bootloader_ignore_signals=True,
strip=False,
upx=True,
upx_exclude=[],
Expand Down
20 changes: 10 additions & 10 deletions perf-postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,6 @@ def get_args(script_path):
if args.rawfile and not os.path.isfile(args.rawfile):
crash("perf raw data file not found, please provide valid raw file")

# check output file is valid
if not perf_helpers.validate_outfile(args.outfile, True):
crash(
"Output filename: "
+ args.outfile
+ " not accepted. Filename should be a .csv without special characters"
)

# check output file is writable
if not perf_helpers.check_file_writeable(args.outfile):
crash("Output file %s not writeable " % args.outfile)
Expand Down Expand Up @@ -725,8 +717,16 @@ def get_groups_to_dataframes(
group_start_end_index_dict[group_name] = (start_index, end_index)
start_index = end_index
current_group_indx += 1
group_name = "group_" + str(current_group_indx)
event_list = group_to_event[group_name]
try:
group_name = "group_" + str(current_group_indx)
event_list = group_to_event[group_name]
except KeyError:
crash(
"could not find "
+ str(row)
+ " in event grouping: "
+ str(group_to_event)
)
end_index += 1
group_to_df[group_name] = get_group_df_from_full_frame(
time_slice_df, start_index, time_slice_df.shape[0], perf_mode
Expand Down
42 changes: 9 additions & 33 deletions src/perf_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,18 +150,15 @@ def disable_nmi_watchdog():
proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "")
)
if new_watchdog_status != 0:
crash("Failed to disable nmi watchdog!")
crash("Failed to disable nmi watchdog.")
logging.info(
"nmi_watchdog is temporary disabled. Will enable after collection."
"nmi_watchdog temporarily disabled. Will re-enable after collection."
)
else:
logging.info("nmi_watchdog disabled!")
logging.info("nmi_watchdog already disabled. No change needed.")
return nmi_watchdog_status
except subprocess.CalledProcessError as e:
logging.warning(e)
logging.warning("Failed to disable nmi_watchdog.")
except ValueError as e:
crash(f"Failed to disable watchdog: {e}")
except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e:
crash(f"Failed to disable nmi_watchdog: {e}")


# enable nmi watchdog
Expand All @@ -172,13 +169,10 @@ def enable_nmi_watchdog():
proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "")
)
if new_watchdog_status != 1:
logging.warning("Failed to re-enable nmi_watchdog!")
logging.warning("Failed to re-enable nmi_watchdog.")
else:
logging.info("nmi_watchdog enabled!")
except subprocess.CalledProcessError as e:
logging.warning(e.output)
logging.warning("Failed to re-enable nmi_watchdog!")
except ValueError as e:
logging.info("nmi_watchdog re-enabled.")
except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e:
logging.warning(f"Failed to re-enable nmi_watchdog: {e}")


Expand Down Expand Up @@ -308,25 +302,7 @@ def get_cpuid_info(procinfo):
return socketinfo


# check for special characters in output filename
def validate_outfile(filename, xlsx=False):
valid = False
resdir = os.path.dirname(filename)
outfile = os.path.basename(filename)
if resdir and not os.path.exists(resdir):
return False
regx = r"[@!#$%^&*()<>?\|}{~:]"
# regex = re.compile("[@!#$%^&*()<>?/\|}{~:]")
regex = re.compile(regx)
if regex.search(outfile) is None:
if filename.endswith(".csv"):
return True
if xlsx and filename.endswith(".xlsx"):
return True
return valid


# check write permissions
# check write permissions on file, or directory if file doesn't exist
def check_file_writeable(outfile):
if os.path.exists(outfile):
if os.path.isfile(outfile):
Expand Down
29 changes: 20 additions & 9 deletions src/prepare_perf_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def get_cgroup_events_format(cgroups, events, num_events):
return perf_format


def filter_events(event_file, cpu_only, TMA_supported, in_vm):
def filter_events(event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles):
if not os.path.isfile(event_file):
crash("event file not found")
collection_events = []
Expand All @@ -124,12 +124,19 @@ def filter_events(event_file, cpu_only, TMA_supported, in_vm):
seperate_cycles = []
if in_vm:
# since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group
seperate_cycles = [
"cpu-cycles,",
"cpu-cycles:k,",
"ref-cycles,",
"instructions;",
]
if supports_ref_cycles:
seperate_cycles = [
"cpu-cycles,",
"cpu-cycles:k,",
"ref-cycles,",
"instructions;",
]
else:
seperate_cycles = [
"cpu-cycles,",
"cpu-cycles:k,",
"instructions;",
]

def process(line):
line = line.strip()
Expand All @@ -153,6 +160,8 @@ def process(line):
for line in fin:
if in_vm and "cpu-cycles" in line:
continue
if not supports_ref_cycles and "ref-cycles" in line:
continue
process(line)
for line in seperate_cycles:
process(line)
Expand All @@ -163,15 +172,17 @@ def process(line):
return collection_events, unsupported_events


def prepare_perf_events(event_file, cpu_only, TMA_supported, in_vm):
def prepare_perf_events(
event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles
):
start_group = "'{"
end_group = "}'"
group = ""
prev_group = ""
new_group = True

collection_events, unsupported_events = filter_events(
event_file, cpu_only, TMA_supported, in_vm
event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles
)
core_event = []
uncore_event = []
Expand Down