diff --git a/_version.txt b/_version.txt index d4c4950..0c00f61 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.3.9 +1.3.10 diff --git a/events/icx.txt b/events/icx.txt index 7e0e5ae..4d3457b 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -30,7 +30,6 @@ cpu-cycles, ref-cycles, instructions; -# OCR group 1 (ICX PMU supports a maximum of two OCR counters per group) cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, @@ -39,7 +38,6 @@ cpu-cycles, ref-cycles, instructions; -# OCR group 2 (ICX PMU supports a maximum of two OCR counters per group) cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, @@ -69,6 +67,17 @@ cpu-cycles, ref-cycles, instructions; +#TMA AVX512 related +cpu/event=0xc7,umask=0x80,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE'/, +cpu/event=0xc7,umask=0x40,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE'/, +cpu/event=0xc7,umask=0x20,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE'/, +cpu/event=0xc7,umask=0x10,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE'/, +cpu/event=0xc7,umask=0x08,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE'/, +cpu/event=0xc7,umask=0x04,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE'/, +cpu-cycles, +ref-cycles, +instructions; + cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, cpu/event=0x79,umask=0x04,cmask=0x05,period=2000003,name='IDQ.MITE_CYCLES_OK'/, cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, @@ -129,10 +138,6 @@ cpu/event=0xd3,umask=0x02,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRE cpu/event=0xd3,umask=0x01,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM'/, cpu/event=0xd3,umask=0x08,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD'/, cpu/event=0xd3,umask=0x04,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM'/, -cpu/event=0xb1,umask=0x10,cmask=0x00,period=2000003,name='UOPS_EXECUTED.X87'/, -cpu/event=0xb1,umask=0x01,cmask=0x00,period=2000003,name='UOPS_EXECUTED.THREAD'/, -cpu/event=0xc7,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, -cpu/event=0xc7,umask=0xfc,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc'/, cpu-cycles, ref-cycles, instructions; @@ -196,4 +201,4 @@ imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/; #power related power/energy-pkg/, -power/energy-ram/; \ No newline at end of file +power/energy-ram/; diff --git a/events/metric_icx.json b/events/metric_icx.json index 13ac99c..535ed94 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -118,21 +118,6 @@ "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" }, - { - "name": "metric_core % cycles in non AVX license", - "expression": "(100 * [CORE_POWER.LVL0_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", - "origin": "perfspect" - }, - { - "name": "metric_core % cycles in AVX2 license", - "expression": "(100 * [CORE_POWER.LVL1_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", - "origin": "perfspect" - }, - { - "name": "metric_core % cycles in AVX-512 license", - "expression": "(100 * [CORE_POWER.LVL2_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", - "origin": "perfspect" - }, { "name": "metric_core initiated local dram read bandwidth (MB/sec)", "expression": "(([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000) / 1" @@ -366,16 +351,16 @@ "expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )" }, { - "name": "metric_TMA_....FP_Arith(%)", - "expression": "100 * ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [UOPS_EXECUTED.X87] / [UOPS_EXECUTED.THREAD] ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) ) )" + "name": "metric_TMA_........FP_Vector_128b(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )" }, { - "name": "metric_TMA_......FP_Scalar(%)", - "expression": "100 * ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) )" + "name": "metric_TMA_........FP_Vector_256b(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )" }, { - "name": "metric_TMA_......FP_Vector(%)", - "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )" + "name": "metric_TMA_........FP_Vector_512b(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_..Heavy_Operations(%)", diff --git a/events/metric_spr_emr.json b/events/metric_spr_emr.json index 73c7a53..55eab80 100644 --- a/events/metric_spr_emr.json +++ b/events/metric_spr_emr.json @@ -367,8 +367,16 @@ "expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( [PERF_METRICS.HEAVY_OPERATIONS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )" }, { - "name": "metric_TMA_....FP_Arith(%)", - "expression": "100 * ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [UOPS_EXECUTED.X87] / [UOPS_EXECUTED.THREAD] ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] + [FP_ARITH_INST_RETIRED2.SCALAR] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c] + [FP_ARITH_INST_RETIRED2.VECTOR] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) ) + ( [AMX_OPS_RETIRED.BF16:c1] / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) )" + "name": "metric_TMA_........FP_Vector_256b(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE] + [FP_ARITH_INST_RETIRED2.256B_PACKED_HALF] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_........FP_Vector_512b(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE] + [FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE] + [FP_ARITH_INST_RETIRED2.512B_PACKED_HALF] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_......Int_Vector_256b(%)", + "expression": "100 * ( ( [INT_VEC_RETIRED.ADD_256] + [INT_VEC_RETIRED.MUL_256] + [INT_VEC_RETIRED.VNNI_256] ) / ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) )" }, { "name": "metric_TMA_..Heavy_Operations(%)", @@ -388,4 +396,4 @@ "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/spr_emr.txt b/events/spr_emr.txt index bef73ea..5027aa0 100644 --- a/events/spr_emr.txt +++ b/events/spr_emr.txt @@ -57,10 +57,20 @@ cpu-cycles, ref-cycles, instructions; +cpu/event=0xcf,umask=0x10,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.512B_PACKED_HALF'/, +cpu/event=0xcf,umask=0x08,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.256B_PACKED_HALF'/, +cpu/event=0xc7,umask=0x80,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE'/, +cpu/event=0xc7,umask=0x40,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE'/, +cpu/event=0xc7,umask=0x20,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE'/, +cpu/event=0xc7,umask=0x10,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE'/, +cpu-cycles, +ref-cycles, +instructions; + cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, cpu/event=0x12,umask=0x20,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, cpu/event=0x12,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, -cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, cpu/event=0xa3,umask=0x10,cmask=0x10,period=1000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, cpu/event=0xad,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, @@ -69,7 +79,6 @@ cpu-cycles, ref-cycles, instructions; -cpu/event=0xce,umask=0x02,cmask=0x01,period=100003,name='AMX_OPS_RETIRED.BF16:c1'/, cpu/event=0xd3,umask=0x10,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM'/, cpu/event=0xd1,umask=0x08,cmask=0x00,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/, cpu/event=0xd1,umask=0x80,cmask=0x00,period=1000003,name='MEM_LOAD_RETIRED.LOCAL_PMM'/, @@ -85,9 +94,7 @@ cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_ cpu/event=0xd0,umask=0x82,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.ALL_STORES'/, cpu/event=0x24,umask=0xe2,cmask=0x00,period=2000003,name='L2_RQSTS.ALL_RFO'/, cpu/event=0x24,umask=0xc2,cmask=0x00,period=2000003,name='L2_RQSTS.RFO_HIT'/, -cpu/event=0xcf,umask=0x03,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.SCALAR'/, cpu/event=0xcf,umask=0x1c,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.VECTOR'/, -cpu/event=0xc7,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, cpu/event=0xc7,umask=0x3c,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c'/, cpu-cycles, ref-cycles, @@ -135,6 +142,9 @@ cpu-cycles, ref-cycles, instructions; +cpu/event=0xe7,umask=0x0c,cmask=0x00,period=100003,name='INT_VEC_RETIRED.ADD_256'/, +cpu/event=0xe7,umask=0x20,cmask=0x00,period=100003,name='INT_VEC_RETIRED.VNNI_256'/, +cpu/event=0xe7,umask=0x80,cmask=0x00,period=100003,name='INT_VEC_RETIRED.MUL_256'/, cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/, diff --git a/src/perf_helpers.py b/src/perf_helpers.py index cf83f3f..c6d4456 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -112,13 +112,13 @@ def get_imc_cha_upi_count(): return imc_count, cha_count, upi_count -# get imc channel ids, channel ids are not consecutive in some cases (observed on bdw) -def get_channel_ids(): +# device ids are not consecutive in some cases +def get_channel_ids(pattern): sysdevices = os.listdir("/sys/bus/event_source/devices") - imc = "uncore_imc_[0-9]*" + devices = pattern + "[0-9]*" ids = [] for entry in sysdevices: - if fnmatch.fnmatch(entry, imc): + if fnmatch.fnmatch(entry, devices): words = entry.split("_") ids.append(int(words[-1])) ids = sorted(ids) diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index 74a9564..c236645 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -71,17 +71,11 @@ def is_cpu_event(line): return False -# save the last group names in a list when it is cha or imc -# test for cha or imc event. append with count value -# once reaches new group, start looping through all imc/cha counts to finish up -def enumerate_uncore(group, pattern, n, default_range=True): +# enumerate uncore events across all devices +def enumerate_uncore(group, pattern, count): uncore_group = "" - ids = [] - if default_range: - ids = range(n) - else: - ids = helper.get_channel_ids() - for i in range(n - 1): + ids = helper.get_channel_ids(pattern) + for i in range(count - 1): old = pattern + str(ids[i]) new = pattern + str(ids[i + 1]) group = group.replace(old, new) @@ -216,8 +210,7 @@ def prepare_perf_events( # enumerate all uncore units if new_group and (unc_count > 1): name = helper.get_dev_name(line.split("/")[0].strip()) - default_range = name != "uncore_imc" - group += enumerate_uncore(prev_group, name + "_", unc_count, default_range) + group += enumerate_uncore(prev_group, name + "_", unc_count) group = group[:-1] if len(event_names) == 0: