Skip to content

Commit

Permalink
add avx256 and 512 metrics. fix non-contiguous device enumeration. (#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
hilldani authored Nov 17, 2023
1 parent 1e289fc commit 676d3e4
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 52 deletions.
2 changes: 1 addition & 1 deletion _version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.3.9
1.3.10
19 changes: 12 additions & 7 deletions events/icx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ cpu-cycles,
ref-cycles,
instructions;

# OCR group 1 (ICX PMU supports a maximum of two OCR counters per group)
cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/,
cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/,
cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/,
Expand All @@ -39,7 +38,6 @@ cpu-cycles,
ref-cycles,
instructions;

# OCR group 2 (ICX PMU supports a maximum of two OCR counters per group)
cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/,
cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/,
cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/,
Expand Down Expand Up @@ -69,6 +67,17 @@ cpu-cycles,
ref-cycles,
instructions;

#TMA AVX512 related
cpu/event=0xc7,umask=0x80,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE'/,
cpu/event=0xc7,umask=0x40,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE'/,
cpu/event=0xc7,umask=0x20,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE'/,
cpu/event=0xc7,umask=0x10,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE'/,
cpu/event=0xc7,umask=0x08,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE'/,
cpu/event=0xc7,umask=0x04,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE'/,
cpu-cycles,
ref-cycles,
instructions;

cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/,
cpu/event=0x79,umask=0x04,cmask=0x05,period=2000003,name='IDQ.MITE_CYCLES_OK'/,
cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/,
Expand Down Expand Up @@ -129,10 +138,6 @@ cpu/event=0xd3,umask=0x02,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRE
cpu/event=0xd3,umask=0x01,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM'/,
cpu/event=0xd3,umask=0x08,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD'/,
cpu/event=0xd3,umask=0x04,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM'/,
cpu/event=0xb1,umask=0x10,cmask=0x00,period=2000003,name='UOPS_EXECUTED.X87'/,
cpu/event=0xb1,umask=0x01,cmask=0x00,period=2000003,name='UOPS_EXECUTED.THREAD'/,
cpu/event=0xc7,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/,
cpu/event=0xc7,umask=0xfc,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc'/,
cpu-cycles,
ref-cycles,
instructions;
Expand Down Expand Up @@ -196,4 +201,4 @@ imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/;

#power related
power/energy-pkg/,
power/energy-ram/;
power/energy-ram/;
27 changes: 6 additions & 21 deletions events/metric_icx.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,21 +118,6 @@
"name": "metric_% Uops delivered from legacy decode pipeline (MITE)",
"expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )"
},
{
"name": "metric_core % cycles in non AVX license",
"expression": "(100 * [CORE_POWER.LVL0_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])",
"origin": "perfspect"
},
{
"name": "metric_core % cycles in AVX2 license",
"expression": "(100 * [CORE_POWER.LVL1_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])",
"origin": "perfspect"
},
{
"name": "metric_core % cycles in AVX-512 license",
"expression": "(100 * [CORE_POWER.LVL2_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])",
"origin": "perfspect"
},
{
"name": "metric_core initiated local dram read bandwidth (MB/sec)",
"expression": "(([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000) / 1"
Expand Down Expand Up @@ -366,16 +351,16 @@
"expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )"
},
{
"name": "metric_TMA_....FP_Arith(%)",
"expression": "100 * ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [UOPS_EXECUTED.X87] / [UOPS_EXECUTED.THREAD] ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) ) )"
"name": "metric_TMA_........FP_Vector_128b(%)",
"expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )"
},
{
"name": "metric_TMA_......FP_Scalar(%)",
"expression": "100 * ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) )"
"name": "metric_TMA_........FP_Vector_256b(%)",
"expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )"
},
{
"name": "metric_TMA_......FP_Vector(%)",
"expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )"
"name": "metric_TMA_........FP_Vector_512b(%)",
"expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )"
},
{
"name": "metric_TMA_..Heavy_Operations(%)",
Expand Down
14 changes: 11 additions & 3 deletions events/metric_spr_emr.json
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,16 @@
"expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( [PERF_METRICS.HEAVY_OPERATIONS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )"
},
{
"name": "metric_TMA_....FP_Arith(%)",
"expression": "100 * ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [UOPS_EXECUTED.X87] / [UOPS_EXECUTED.THREAD] ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] + [FP_ARITH_INST_RETIRED2.SCALAR] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c] + [FP_ARITH_INST_RETIRED2.VECTOR] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) ) + ( [AMX_OPS_RETIRED.BF16:c1] / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) )"
"name": "metric_TMA_........FP_Vector_256b(%)",
"expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE] + [FP_ARITH_INST_RETIRED2.256B_PACKED_HALF] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )"
},
{
"name": "metric_TMA_........FP_Vector_512b(%)",
"expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE] + [FP_ARITH_INST_RETIRED2.512B_PACKED_HALF] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )"
},
{
"name": "metric_TMA_......Int_Vector_256b(%)",
"expression": "100 * ( ( [INT_VEC_RETIRED.ADD_256] + [INT_VEC_RETIRED.MUL_256] + [INT_VEC_RETIRED.VNNI_256] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) )"
},
{
"name": "metric_TMA_..Heavy_Operations(%)",
Expand All @@ -388,4 +396,4 @@
"expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0",
"origin": "perfspect"
}
]
]
18 changes: 14 additions & 4 deletions events/spr_emr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,20 @@ cpu-cycles,
ref-cycles,
instructions;

cpu/event=0xcf,umask=0x10,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.512B_PACKED_HALF'/,
cpu/event=0xcf,umask=0x08,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.256B_PACKED_HALF'/,
cpu/event=0xc7,umask=0x80,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE'/,
cpu/event=0xc7,umask=0x40,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE'/,
cpu/event=0xc7,umask=0x20,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE'/,
cpu/event=0xc7,umask=0x10,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE'/,
cpu-cycles,
ref-cycles,
instructions;

cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/,
cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/,
cpu/event=0x12,umask=0x20,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/,
cpu/event=0x12,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/,
cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/,
cpu/event=0xa3,umask=0x10,cmask=0x10,period=1000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/,
cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/,
cpu/event=0xad,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/,
Expand All @@ -69,7 +79,6 @@ cpu-cycles,
ref-cycles,
instructions;

cpu/event=0xce,umask=0x02,cmask=0x01,period=100003,name='AMX_OPS_RETIRED.BF16:c1'/,
cpu/event=0xd3,umask=0x10,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM'/,
cpu/event=0xd1,umask=0x08,cmask=0x00,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/,
cpu/event=0xd1,umask=0x80,cmask=0x00,period=1000003,name='MEM_LOAD_RETIRED.LOCAL_PMM'/,
Expand All @@ -85,9 +94,7 @@ cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_
cpu/event=0xd0,umask=0x82,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.ALL_STORES'/,
cpu/event=0x24,umask=0xe2,cmask=0x00,period=2000003,name='L2_RQSTS.ALL_RFO'/,
cpu/event=0x24,umask=0xc2,cmask=0x00,period=2000003,name='L2_RQSTS.RFO_HIT'/,
cpu/event=0xcf,umask=0x03,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.SCALAR'/,
cpu/event=0xcf,umask=0x1c,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.VECTOR'/,
cpu/event=0xc7,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/,
cpu/event=0xc7,umask=0x3c,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c'/,
cpu-cycles,
ref-cycles,
Expand Down Expand Up @@ -135,6 +142,9 @@ cpu-cycles,
ref-cycles,
instructions;

cpu/event=0xe7,umask=0x0c,cmask=0x00,period=100003,name='INT_VEC_RETIRED.ADD_256'/,
cpu/event=0xe7,umask=0x20,cmask=0x00,period=100003,name='INT_VEC_RETIRED.VNNI_256'/,
cpu/event=0xe7,umask=0x80,cmask=0x00,period=100003,name='INT_VEC_RETIRED.MUL_256'/,
cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/,
cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/,
cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/,
Expand Down
8 changes: 4 additions & 4 deletions src/perf_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ def get_imc_cha_upi_count():
return imc_count, cha_count, upi_count


# get imc channel ids, channel ids are not consecutive in some cases (observed on bdw)
def get_channel_ids():
# device ids are not consecutive in some cases
def get_channel_ids(pattern):
sysdevices = os.listdir("/sys/bus/event_source/devices")
imc = "uncore_imc_[0-9]*"
devices = pattern + "[0-9]*"
ids = []
for entry in sysdevices:
if fnmatch.fnmatch(entry, imc):
if fnmatch.fnmatch(entry, devices):
words = entry.split("_")
ids.append(int(words[-1]))
ids = sorted(ids)
Expand Down
17 changes: 5 additions & 12 deletions src/prepare_perf_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,11 @@ def is_cpu_event(line):
return False


# save the last group names in a list when it is cha or imc
# test for cha or imc event. append with count value
# once reaches new group, start looping through all imc/cha counts to finish up
def enumerate_uncore(group, pattern, n, default_range=True):
# enumerate uncore events across all devices
def enumerate_uncore(group, pattern, count):
uncore_group = ""
ids = []
if default_range:
ids = range(n)
else:
ids = helper.get_channel_ids()
for i in range(n - 1):
ids = helper.get_channel_ids(pattern)
for i in range(count - 1):
old = pattern + str(ids[i])
new = pattern + str(ids[i + 1])
group = group.replace(old, new)
Expand Down Expand Up @@ -216,8 +210,7 @@ def prepare_perf_events(
# enumerate all uncore units
if new_group and (unc_count > 1):
name = helper.get_dev_name(line.split("/")[0].strip())
default_range = name != "uncore_imc"
group += enumerate_uncore(prev_group, name + "_", unc_count, default_range)
group += enumerate_uncore(prev_group, name + "_", unc_count)

group = group[:-1]
if len(event_names) == 0:
Expand Down

0 comments on commit 676d3e4

Please sign in to comment.