From 17b442426b98fe9ac60a27d4a1bace0e1fff0084 Mon Sep 17 00:00:00 2001
From: jharper5 <jason.m.harper@intel.com>
Date: Tue, 25 Jun 2024 16:39:48 -0700
Subject: [PATCH] changes for version 1.4.0

---
 README.md                             |  10 +
 _version.txt                          |   2 +-
 events/icx_nofixedtma.txt             | 148 +++++++++++
 events/metric_icx_nofixedtma.json     | 329 ++++++++++++++++++++++++
 events/metric_spr_emr_nofixedtma.json | 349 ++++++++++++++++++++++++++
 events/metric_srf.json                | 267 +++++++++++++++++++-
 events/spr_emr_nofixedtma.txt         | 138 ++++++++++
 events/srf.txt                        | 102 +++++++-
 perf-collect.py                       | 272 +++++++++++++++-----
 perf-collect.spec                     |   2 +-
 perf-postprocess.py                   |  41 ++-
 src/perf_helpers.py                   |  75 ++++--
 src/prepare_perf_events.py            |  78 +++---
 13 files changed, 1684 insertions(+), 129 deletions(-)
 create mode 100644 events/icx_nofixedtma.txt
 create mode 100644 events/metric_icx_nofixedtma.json
 create mode 100644 events/metric_spr_emr_nofixedtma.json
 create mode 100644 events/spr_emr_nofixedtma.txt

diff --git a/README.md b/README.md
index da17d57..f7d7739 100644
--- a/README.md
+++ b/README.md
@@ -41,6 +41,16 @@ sudo ./perf-collect --timeout 10
 ./perf-postprocess
 ```
 
+## Running perf-collect as a non-root user
+As seen in the examples above, `sudo` is the standard approach to running perf-collect with elevated privileges. If `sudo` is not possible and running as the root user is not possible, then a user may request the following changes be made to the system by an administrator:
+- sysctl -w kernel.perf_event_paranoid=0
+- sysctl -w kernel.nmi_watchdog=0
+- write '125' to all perf_event_mux_interval_ms files found under /sys/devices/*.
+
+`for i in $(find /sys/devices -name perf_event_mux_interval_ms); do echo 125 > $i; done`
+
+Recommend returning these settings to their prior values when analysis with PerfSpect is complete.
+
 ## Output
 
 perf-collect outputs:
diff --git a/_version.txt b/_version.txt
index 17e63e7..88c5fb8 100644
--- a/_version.txt
+++ b/_version.txt
@@ -1 +1 @@
-1.3.11
+1.4.0
diff --git a/events/icx_nofixedtma.txt b/events/icx_nofixedtma.txt
new file mode 100644
index 0000000..00c16cc
--- /dev/null
+++ b/events/icx_nofixedtma.txt
@@ -0,0 +1,148 @@
+###########################################################################################################
+# Copyright (C) 2021-2023 Intel Corporation
+# SPDX-License-Identifier: BSD-3-Clause
+###########################################################################################################
+
+# Icelake event list for platforms that don't have support for the fixed counter TMA events, e.g., some AWS
+# VMs.
+# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed
+# counter is not supported so a general purpose counter will be used.
+
+cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/,
+cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/,
+cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/,
+cpu/event=0xc3,umask=0x01,cmask=0x01,edge=0x01,period=100003,name='MACHINE_CLEARS.COUNT'/,
+cpu/event=0xc5,umask=0x00,period=50021,name='BR_MISP_RETIRED.ALL_BRANCHES'/,
+cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/,
+cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/,
+cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/,
+cpu/event=0x48,umask=0x02,period=1000003,name='L1D_PEND_MISS.FB_FULL_PERIODS'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+# events for TMA metrics without fixed counter support (group 1)
+cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/,
+cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/,
+cpu/event=0x80,umask=0x04,name='ICACHE_DATA.STALLS'/,
+cpu/event=0x83,umask=0x04,name='ICACHE_TAG.STALLS'/,
+cpu/event=0x79,umask=0x30,name='IDQ.MS_SWITCHES'/,
+cpu/event=0x87,umask=0x01,name='DECODE.LCP'/,
+cpu/event=0x0d,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+# events for TMA metrics without fixed counter support (group 2)
+cpu/event=0xab,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/,
+cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/,
+cpu/event=0x0D,umask=0x01,name='INT_MISC.CLEARS_COUNT'/,
+cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/,
+cpu/event=0xd0,umask=0x83,name='MEM_INST_RETIRED.ANY'/,
+cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/,
+cpu/event=0x9c,umask=0x01,cmask=0x05,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/,
+cpu/event=0xa3,umask=0x0C,cmask=0x0C,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/,
+cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/,
+cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/,
+cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/,
+cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/,
+cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0xd0,umask=0x21,cmask=0x00,period=100007,name='MEM_INST_RETIRED.LOCK_LOADS'/,
+cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/,
+cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/,
+cpu/event=0xd1,umask=0x08,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0xa3,umask=0x05,cmask=0x05,period=1000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/,
+cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/,
+cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/,
+cpu/event=0x79,umask=0x04,cmask=0x05,period=2000003,name='IDQ.MITE_CYCLES_OK'/,
+cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/,
+cpu/event=0x79,umask=0x08,cmask=0x05,period=2000003,name='IDQ.DSB_CYCLES_OK'/,
+cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/,
+cpu/event=0x14,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIVIDER_ACTIVE'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/,
+cpu/event=0x79,umask=0x30,period=100003,name='IDQ.MS_UOPS'/,
+cpu/event=0x56,umask=0x01,period=100003,name='UOPS_DECODED.DEC0'/,
+cpu/event=0x56,umask=0x01,cmask=0x01,period=100003,name='UOPS_DECODED.DEC0:c1'/,
+cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/,
+cpu-cycles:k,
+ref-cycles:k,
+instructions:k;
+
+# OCR
+cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/,
+cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/,
+cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/,
+cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0xb7,umask=0x01,offcore_rsp=0x1030000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/,
+cpu/event=0xb7,umask=0x01,offcore_rsp=0x830000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/,
+cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/,
+cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/,
+cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/,
+cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+# C6
+cstate_core/c6-residency/;
+cstate_pkg/c6-residency/;
+
+# UPI
+upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/;
+
+# CHA
+cha/event=0x00,umask=0x00,name='UNC_CHA_CLOCKTICKS'/;
+
+cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/,
+cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/,
+cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/,
+cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/;
+
+cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/;
+cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/,
+cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/,
+cha/event=0x36,umask=0xC816FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/;
+
+cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD'/,
+cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/,
+cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/,
+cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/;
+
+# memory read/writes
+imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/,
+imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/;
+
+# power
+power/energy-pkg/,
+power/energy-ram/;
diff --git a/events/metric_icx_nofixedtma.json b/events/metric_icx_nofixedtma.json
new file mode 100644
index 0000000..5325629
--- /dev/null
+++ b/events/metric_icx_nofixedtma.json
@@ -0,0 +1,329 @@
+[
+    {
+        "name": "metric_CPU operating frequency (in GHz)",
+        "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)"
+    },
+    {
+        "name": "metric_CPU utilization %",
+        "expression": "100 * [ref-cycles] / [TSC]"
+    },
+    {
+        "name": "metric_CPU utilization% in kernel mode",
+        "expression": "100 * [ref-cycles:k] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_CPI",
+        "name-txn": "metric_cycles per txn",
+        "expression": "[cpu-cycles] / [instructions]",
+        "expression-txn": "[cpu-cycles] / [TXN]"
+    },
+    {
+        "name": "metric_kernel_CPI",
+        "name-txn": "metric_kernel_cycles per txn",
+        "expression": "[cpu-cycles:k] / [instructions:k]",
+        "expression-txn": "[cpu-cycles:k] / [TXN]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_IPC",
+        "name-txn": "metric_txn per cycles",
+        "expression": "[instructions] / [cpu-cycles]",
+        "expression-txn": "[instructions] / [TXN]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_giga_instructions_per_sec",
+        "expression": "[instructions] / 1000000000",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_locks retired per instr",
+        "name-txn": "metric_locks retired per txn",
+        "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]",
+        "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]",
+        "origin": "perfmon website"
+    },
+    {
+        "name": "metric_L1D MPI (includes data+rfo w/ prefetches)",
+        "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)",
+        "expression": "[L1D.REPLACEMENT] / [instructions]",
+        "expression-txn": "[L1D.REPLACEMENT] / [TXN]"
+    },
+    {
+        "name": "metric_L1D demand data read hits per instr",
+        "name-txn": "metric_L1D demand data read hits per txn",
+        "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]",
+        "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]"
+    },
+    {
+        "name": "metric_L1-I code read misses (w/ prefetches) per instr",
+        "name-txn": "metric_L1I code read misses (includes prefetches) per txn",
+        "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]",
+        "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]"
+    },
+    {
+        "name": "metric_L2 demand data read hits per instr",
+        "name-txn": "metric_L2 demand data read hits per txn",
+        "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]",
+        "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]"
+    },
+    {
+        "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)",
+        "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)",
+        "expression": "[L2_LINES_IN.ALL] / [instructions]",
+        "expression-txn": "[L2_LINES_IN.ALL] / [TXN]"
+    },
+    {
+        "name": "metric_L2 demand data read MPI",
+        "name-txn": "metric_L2 demand data read misses per txn",
+        "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]",
+        "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]"
+    },
+    {
+        "name": "metric_L2 demand code MPI",
+        "name-txn": "metric_L2 demand code misses per txn",
+        "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]",
+        "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]"
+    },
+    {
+        "name": "metric_LLC code read MPI (demand+prefetch)",
+        "name-txn": "metric_LLC code read (demand+prefetch) misses per txn",
+        "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]",
+        "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]"
+    },
+    {
+        "name": "metric_LLC data read MPI (demand+prefetch)",
+        "name-txn": "metric_LLC data read (demand+prefetch) misses per txn",
+        "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]",
+        "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]"
+    },
+    {
+        "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)",
+        "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)",
+        "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]"
+    },
+    {
+        "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)",
+        "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)",
+        "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]"
+    },
+    {
+        "name": "metric_Average LLC demand data read miss latency (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_UPI Data transmit BW (MB/sec) (only data)",
+        "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1"
+    },
+    {
+        "name": "metric_package power (watts)",
+        "expression": "[power/energy-pkg/]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_DRAM power (watts)",
+        "expression": "[power/energy-ram/]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_core c6 residency %",
+        "expression": "100 * [cstate_core/c6-residency/] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_package c6 residency %",
+        "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_% Uops delivered from decoded Icache (DSB)",
+        "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )"
+    },
+    {
+        "name": "metric_% Uops delivered from legacy decode pipeline (MITE)",
+        "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )"
+    },
+    {
+        "name": "metric_core initiated local dram read bandwidth (MB/sec)",
+        "expression": "(([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_core initiated remote dram read bandwidth (MB/sec)",
+        "expression": "(([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_memory bandwidth read (MB/sec)",
+        "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_memory bandwidth write (MB/sec)",
+        "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_memory bandwidth total (MB/sec)",
+        "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_ITLB (2nd level) MPI",
+        "name-txn": "metric_ITLB (2nd level) misses per txn",
+        "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) load MPI",
+        "name-txn": "metric_DTLB (2nd level) load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) 2MB large page load MPI",
+        "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) store MPI",
+        "name-txn": "metric_DTLB (2nd level) store misses per txn",
+        "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_NUMA %_Reads addressed to local DRAM",
+        "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])"
+    },
+    {
+        "name": "metric_NUMA %_Reads addressed to remote DRAM",
+        "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])"
+    },
+    {
+        "name": "metric_uncore frequency GHz",
+        "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1"
+    },
+    {
+        "name": "metric_TMA_Frontend_Bound(%)",
+        "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_..Fetch_Latency(%)",
+        "expression": "100 * ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_....ICache_Misses(%)",
+        "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....ITLB_Misses(%)",
+        "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....MS_Switches(%)",
+        "expression": "100 * ( ( 3 ) * [IDQ.MS_SWITCHES] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....LCP(%)",
+        "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....DSB_Switches(%)",
+        "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_..Fetch_Bandwidth(%)",
+        "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_....MITE(%)",
+        "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )"
+    },
+    {
+        "name": "metric_TMA_....DSB(%)",
+        "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )"
+    },
+    {
+        "name": "metric_TMA_Bad_Speculation(%)",
+        "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )"
+    },
+    {
+        "name": "metric_TMA_..Branch_Mispredicts(%)",
+        "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) )"
+    },
+    {
+        "name": "metric_TMA_..Machine_Clears(%)",
+        "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_Backend_Bound(%)",
+        "expression": "100 * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_..Memory_Bound(%)",
+        "expression": "100 * ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) )"
+    },
+    {
+        "name": "metric_TMA_....L1_Bound(%)",
+        "expression": "100 * ( max( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )"
+    },
+    {
+        "name": "metric_TMA_....L2_Bound(%)",
+        "expression": "100 * ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) )"
+    },
+    {
+        "name": "metric_TMA_....L3_Bound(%)",
+        "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....DRAM_Bound(%)",
+        "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) + ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) - ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Store_Bound(%)",
+        "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_..Core_Bound(%)",
+        "expression": "100 * ( max( 0 , ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Divider(%)",
+        "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_Retiring(%)",
+        "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_..Light_Operations(%)",
+        "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Memory_Operations(%)",
+        "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [MEM_INST_RETIRED.ANY] / [instructions] )"
+    },
+    {
+        "name": "metric_TMA_....Branch_Instructions(%)",
+        "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [BR_INST_RETIRED.ALL_BRANCHES] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )"
+    },
+    {
+        "name": "metric_TMA_..Heavy_Operations(%)",
+        "expression": "100 * ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] )"
+    },
+    {
+        "name": "metric_TMA_....Few_Uops_Instructions(%)",
+        "expression": "100 * ( ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) - ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Microcode_Sequencer(%)",
+        "expression": "100 * ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) )"
+    }
+]
\ No newline at end of file
diff --git a/events/metric_spr_emr_nofixedtma.json b/events/metric_spr_emr_nofixedtma.json
new file mode 100644
index 0000000..587d6b1
--- /dev/null
+++ b/events/metric_spr_emr_nofixedtma.json
@@ -0,0 +1,349 @@
+[
+    {
+        "name": "metric_CPU operating frequency (in GHz)",
+        "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)"
+    },
+    {
+        "name": "metric_CPU utilization %",
+        "expression": "100 * [ref-cycles] / [TSC]"
+    },
+    {
+        "name": "metric_CPU utilization% in kernel mode",
+        "expression": "100 * [ref-cycles:k] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_CPI",
+        "name-txn": "metric_cycles per txn",
+        "expression": "[cpu-cycles] / [instructions]",
+        "expression-txn": "[cpu-cycles] / [TXN]"
+    },
+    {
+        "name": "metric_kernel_CPI",
+        "name-txn": "metric_kernel_cycles per txn",
+        "expression": "[cpu-cycles:k] / [instructions:k]",
+        "expression-txn": "[cpu-cycles:k] / [TXN]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_IPC",
+        "name-txn": "metric_txn per cycle",
+        "expression": "[instructions] / [cpu-cycles]",
+        "expression-txn": "[TXN] / [cpu-cycles]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_giga_instructions_per_sec",
+        "expression": "[instructions] / 1000000000",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_locks retired per instr",
+        "name-txn": "metric_locks retired per txn",
+        "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]",
+        "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]",
+        "origin": "perfmon website"
+    },
+    {
+        "name": "metric_L1D MPI (includes data+rfo w/ prefetches)",
+        "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)",
+        "expression": "[L1D.REPLACEMENT] / [instructions]",
+        "expression-txn": "[L1D.REPLACEMENT] / [TXN]"
+    },
+    {
+        "name": "metric_L1D demand data read hits per instr",
+        "name-txn": "metric_L1D demand data read hits per txn",
+        "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]",
+        "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]"
+    },
+    {
+        "name": "metric_L1-I code read misses (w/ prefetches) per instr",
+        "name-txn": "metric_L1I code read misses (includes prefetches) per txn",
+        "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]",
+        "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]"
+    },
+    {
+        "name": "metric_L2 demand data read hits per instr",
+        "name-txn": "metric_L2 demand data read hits per txn",
+        "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]",
+        "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]"
+    },
+    {
+        "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)",
+        "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)",
+        "expression": "[L2_LINES_IN.ALL] / [instructions]",
+        "expression-txn": "[L2_LINES_IN.ALL] / [TXN]"
+    },
+    {
+        "name": "metric_L2 demand data read MPI",
+        "name-txn": "metric_L2 demand data read misses per txn",
+        "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]",
+        "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]"
+    },
+    {
+        "name": "metric_L2 demand code MPI",
+        "name-txn": "metric_L2 demand code misses per txn",
+        "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]",
+        "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]"
+    },
+    {
+        "name": "metric_LLC code read MPI (demand+prefetch)",
+        "name-txn": "metric_LLC code read (demand+prefetch) misses per txn",
+        "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]",
+        "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]"
+    },
+    {
+        "name": "metric_LLC data read MPI (demand+prefetch)",
+        "name-txn": "metric_LLC data read (demand+prefetch) misses per txn",
+        "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]",
+        "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]"
+    },
+    {
+        "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)",
+        "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)",
+        "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)",
+        "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)",
+        "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_Average LLC demand data read miss latency (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_UPI Data transmit BW (MB/sec) (only data)",
+        "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1"
+    },
+    {
+        "name": "metric_package power (watts)",
+        "expression": "[power/energy-pkg/]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_DRAM power (watts)",
+        "expression": "[power/energy-ram/]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_core c6 residency %",
+        "expression": "100 * [cstate_core/c6-residency/] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_package c6 residency %",
+        "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_% Uops delivered from decoded Icache (DSB)",
+        "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )"
+    },
+    {
+        "name": "metric_% Uops delivered from legacy decode pipeline (MITE)",
+        "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )"
+    },
+    {
+        "name": "metric_core initiated local dram read bandwidth (MB/sec)",
+        "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_core initiated remote dram read bandwidth (MB/sec)",
+        "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_memory bandwidth read (MB/sec)",
+        "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_memory bandwidth write (MB/sec)",
+        "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_memory bandwidth total (MB/sec)",
+        "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_ITLB (2nd level) MPI",
+        "name-txn": "metric_ITLB (2nd level) misses per txn",
+        "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) load MPI",
+        "name-txn": "metric_DTLB (2nd level) load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) 2MB large page load MPI",
+        "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) store MPI",
+        "name-txn": "metric_DTLB (2nd level) store misses per txn",
+        "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_NUMA %_Reads addressed to local DRAM",
+        "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])"
+    },
+    {
+        "name": "metric_NUMA %_Reads addressed to remote DRAM",
+        "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])"
+    },
+    {
+        "name": "metric_uncore frequency GHz",
+        "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1"
+    },
+    {
+        "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)",
+        "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)",
+        "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_TMA_Frontend_Bound(%)",
+        "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_..Fetch_Latency(%)",
+        "expression": "100 * ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_....ICache_Misses(%)",
+        "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....ITLB_Misses(%)",
+        "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....MS_Switches(%)",
+        "expression": "100 * ( ( 3 ) * [UOPS_RETIRED.MS:c1:e1] / ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....LCP(%)",
+        "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....DSB_Switches(%)",
+        "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_..Fetch_Bandwidth(%)",
+        "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_....MITE(%)",
+        "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )"
+    },
+    {
+        "name": "metric_TMA_....DSB(%)",
+        "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )"
+    },
+    {
+        "name": "metric_TMA_Bad_Speculation(%)",
+        "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )"
+    },
+    {
+        "name": "metric_TMA_..Branch_Mispredicts(%)",
+        "expression": "100 * ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_..Machine_Clears(%)",
+        "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_Backend_Bound(%)",
+        "expression": "100 * ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_..Memory_Bound(%)",
+        "expression": "100 * ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_....L1_Bound(%)",
+        "expression": "100 * ( max( ( [EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )"
+    },
+    {
+        "name": "metric_TMA_....L2_Bound(%)",
+        "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....L3_Bound(%)",
+        "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....DRAM_Bound(%)",
+        "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Store_Bound(%)",
+        "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_..Core_Bound(%)",
+        "expression": "100 * ( max( 0 , ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Divider(%)",
+        "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....AMX_Busy(%)",
+        "expression": "100 * ( [EXE.AMX_BUSY] / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) )"
+    },
+    {
+        "name": "metric_TMA_Retiring(%)",
+        "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_..Light_Operations(%)",
+        "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Memory_Operations(%)",
+        "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [MEM_UOP_RETIRED.ANY] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Fused_Instructions(%)",
+        "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [INST_RETIRED.MACRO_FUSED] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Non_Fused_Branches(%)",
+        "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * ( [BR_INST_RETIRED.ALL_BRANCHES] - [INST_RETIRED.MACRO_FUSED] ) / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )"
+    },
+    {
+        "name": "metric_TMA_..Heavy_Operations(%)",
+        "expression": "100 * ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) )"
+    },
+    {
+        "name": "metric_TMA_....Few_Uops_Instructions(%)",
+        "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) ) ) )"
+    },
+    {
+        "name": "metric_TMA_....Microcode_Sequencer(%)",
+        "expression": "100 * ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) )"
+    }
+]
\ No newline at end of file
diff --git a/events/metric_srf.json b/events/metric_srf.json
index 1f38f6e..615d432 100644
--- a/events/metric_srf.json
+++ b/events/metric_srf.json
@@ -36,5 +36,270 @@
         "name": "metric_giga_instructions_per_sec",
         "expression": "[instructions] / 1000000000",
         "origin": "perfspect"
+    },
+    {
+        "name": "metric_locks retired per instr",
+        "name-txn": "metric_locks retired per txn",
+        "expression": "[MEM_UOPS_RETIRED.LOCK_LOADS] / [instructions]",
+        "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]"
+    },
+    {
+        "name": "metric_L1D demand data read MPI",
+        "name-txn": "metric_L1D demand data read misses per txn",
+        "expression": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [instructions]",
+        "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [TXN]"
+    },
+    {
+        "name": "metric_L1D demand data read hits per instr",
+        "name-txn": "metric_L1D demand data read hits per txn",
+        "expression": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [instructions]",
+        "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [TXN]"
+    },
+    {
+        "name": "metric_L1-I code read misses (w/ prefetches) per instr",
+        "name-txn": "metric_L1-I code read misses (w/ prefetches) per txn",
+        "expression": "[ICACHE.MISSES] / [instructions]",
+        "expression-txn": "[ICACHE.MISSES] / [TXN]"
+    },
+    {
+        "name": "metric_L2 demand data read hits per instr",
+        "name-txn": "metric_L2 demand data read hits per txn",
+        "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]",
+        "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [TXN]"
+    },
+    {
+        "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)",
+        "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)",
+        "expression": "[LONGEST_LAT_CACHE.REFERENCE] / [instructions]",
+        "expression-txn": "[LONGEST_LAT_CACHE.REFERENCE] / [TXN]"
+    },
+    {
+        "name": "metric_L2 code MPI",
+        "name-txn": "metric_L2 code misses per txn",
+        "expression": "[OCR.L2_CODE_MISS] / [instructions]",
+        "expression-txn": "[OCR.L2_CODE_MISS] / [TXN]"
+    },
+    {
+        "name": "metric_L2 Any local request that HITM in another module (per instr)",
+        "name-txn": "metric_L2 Any local request that HITM in another module per txn",
+        "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [TXN]"
+    },
+    {
+        "name": "metric_L2 Any local request that HIT in another module and forwarded(per instr)",
+        "name-txn": "metric_L2 Any local request that HIT in another module and forwarded per txn",
+        "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [TXN]"
+    },
+    {
+        "name": "metric_L2 all L2 prefetches(per instr)",
+        "name-txn": "metric_L2 all L2 prefetches per txn",
+        "expression": "[OCR.HWPF_L2.ANY_RESPONSE] / [instructions]",
+        "expression-txn": "[OCR.HWPF_L2.ANY_RESPONSE] / [TXN]"
+    },
+    {
+        "name": "metric_data_read_L2_Miss_Latency_using_ORO_events(ns)",
+        "expression": "( 1000000000 * ([OCR.READS_TO_CORE.OUTSTANDING] / [OCR.READS_TO_CORE.ANY_RESPONSE]) / ([cpu-cycles] / [TSC] * [SYSTEM_TSC_FREQ]) )"
+    },
+    {
+        "name": "metric_L3 MPI (includes code+data+rfo w/ prefetches)",
+        "name-txn": "metric_L3 misses per txn (includes code+data+rfo w/ prefetches)",
+        "expression": "[LONGEST_LAT_CACHE.MISS] / [instructions]",
+        "expression-txn": "[LONGEST_LAT_CACHE.MISS] / [TXN]"
+    },
+    {
+        "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)",
+        "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [instructions]",
+        "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)",
+        "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [TXN]"
+    },
+    {
+        "name": "metric_LLC total HITM (per instr)",
+        "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)",
+        "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]"
+    },
+    {
+        "name": "metric_LLC total HIT clean line forwards (per instr)",
+        "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)",
+        "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]",
+        "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]"
+    },
+    {
+        "name": "metric_LLC data read MPI (demand+prefetch)",
+        "name-txn": "metric_LLC data read (demand+prefetch) misses per txn",
+        "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [instructions]",
+        "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [TXN]"
+    },
+    {
+        "name": "metric_LLC code read MPI (demand+prefetch)",
+        "name-txn": "metric_LLC code read (demand+prefetch) misses per txn",
+        "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]",
+        "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]"
+    },
+    {
+        "name": "metric_Average LLC demand data read miss latency (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_Average LLC demand RFO miss latency (in ns)",
+        "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO] / [UNC_CHA_TOR_INSERTS.IA_MISS_RFO]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1"
+    },
+    {
+        "name": "metric_core initiated local dram read bandwidth (MB/sec)",
+        "expression": "([LONGEST_LAT_CACHE.MISS]) * 64 / 1000000",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_memory bandwidth read (MB/sec)",
+        "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_memory bandwidth write (MB/sec)",
+        "expression": "(([UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_memory bandwidth total (MB/sec)",
+        "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD] + [UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)",
+        "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)",
+        "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1"
+    },
+    {
+        "name": "metric_package power (watts)",
+        "expression": "[power/energy-pkg/]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_DRAM power (watts)",
+        "expression": "[power/energy-ram/]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_core c6 residency %",
+        "expression": "100 * [cstate_core/c6-residency/] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_package c6 residency %",
+        "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]",
+        "origin": "perfspect"
+    },
+    {
+        "name": "metric_uncore frequency GHz",
+        "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1"
+    },
+    {
+        "name": "metric_ITLB (2nd level) MPI",
+        "name-txn": "metric_ITLB (2nd level) misses per txn",
+        "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) load MPI",
+        "name-txn": "metric_DTLB (2nd level) load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB  (2nd level) 4KB page load MPI",
+        "name-txn": "metric_DTLB  (2nd level) 4KB page load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) 2MB large page load MPI",
+        "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) 1GB large page load MPI",
+        "name-txn": "metric_DTLB (2nd level) 1GB large page load misses per txn",
+        "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]",
+        "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]"
+    },
+    {
+        "name": "metric_DTLB (2nd level) store MPI",
+        "name-txn": "metric_DTLB (2nd level) store misses per txn",
+        "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]",
+        "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]"
+    },
+    {
+        "name": "metric_TMA_Frontend_Bound(%)",
+        "expression": "100 * ( [TOPDOWN_FE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_..Fetch_Latency(%)",
+        "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_LATENCY] / (6.0 * [cpu-cycles]))"
+    },
+    {
+        "name": "metric_TMA_....ICache_Misses(%)",
+        "expression": "100 * ( [TOPDOWN_FE_BOUND.ICACHE] / ( 6 * [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....ITLB_Misses(%)",
+        "expression": "100 * ( [TOPDOWN_FE_BOUND.ITLB_MISS] / ( 6 * [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_....Branch_Resteer(%)",
+        "expression": "100*([TOPDOWN_FE_BOUND.BRANCH_RESTEER] / (6.0 * [cpu-cycles]))"
+    },
+    {
+        "name": "metric_TMA_..Fetch_Bandwidth(%)",
+        "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH] / (6.0 * [cpu-cycles]))"
+    },
+    {
+        "name": "metric_TMA_Bad_Speculation(%)",
+        "expression": "100 * ( [TOPDOWN_BAD_SPECULATION.ALL] / ( 6 * [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_..Branch_Mispredicts(%)",
+        "expression": "100*([TOPDOWN_BAD_SPECULATION.MISPREDICT] / (6.0 * [cpu-cycles]))"
+    },
+    {
+        "name": "metric_TMA_..Machine_Clears(%)",
+        "expression": "100*([TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS] / (6.0 * [cpu-cycles]))"
+    },
+    {
+        "name": "metric_TMA_Backend_Bound(%)",
+        "expression": "100 * ( [TOPDOWN_BE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )"
+    },
+    {
+        "name": "metric_TMA_..Memory_Bound(%)",
+        "expression": "100*min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))"
+    },
+    {
+        "name": "metric_TMA_....L1_Bound(%)",
+        "expression": "100*([LD_HEAD.L1_BOUND_AT_RET] / [cpu-cycles])"
+    },
+    {
+        "name": "metric_TMA_....L2_Bound(%)",
+        "expression": "100*([MEM_BOUND_STALLS_LOAD.L2_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.L2_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))"
+    },
+    {
+        "name": "metric_TMA_....L3_Bound(%)",
+        "expression": "100*([MEM_BOUND_STALLS_LOAD.LLC_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.LLC_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))"
+    },
+    {
+        "name": "metric_TMA_....Store_Bound(%)",
+        "expression": "100*(([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL])"
+    },
+    {
+        "name": "metric_TMA_..Core_Bound(%)",
+        "expression": "100*max(0, 1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles]) - min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))))"
+    },
+    {
+        "name": "metric_TMA_....Serialization(%)",
+        "expression": "100*([TOPDOWN_BE_BOUND.SERIALIZATION] / (6.0 * [cpu-cycles]))"
+    },
+    {
+        "name": "metric_TMA_Retiring(%)",
+        "expression": "100 * ( [TOPDOWN_RETIRING.ALL] / ( 6 * [cpu-cycles] ) )"
     }
-]
\ No newline at end of file
+]
diff --git a/events/spr_emr_nofixedtma.txt b/events/spr_emr_nofixedtma.txt
new file mode 100644
index 0000000..d767656
--- /dev/null
+++ b/events/spr_emr_nofixedtma.txt
@@ -0,0 +1,138 @@
+###########################################################################################################
+# Copyright (C) 2021-2023 Intel Corporation
+# SPDX-License-Identifier: BSD-3-Clause
+###########################################################################################################
+
+# Sapphire Rapids and Emerald Rapids event list for platforms that don't have support for the fixed counter
+# TMA events, e.g., some AWS VMs.
+# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed
+# counter is not supported so a general purpose counter will be used.
+
+cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/,
+cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/,
+cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/,
+cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/,
+cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/,
+cpu/event=0x11,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/,
+cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/,
+cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/,
+cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/,
+cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/,
+cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/,
+cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/,
+cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/,
+cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/,
+cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+# events for TMA metrics without fixed counter support (group 1)
+cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/,
+cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/,
+cpu/event=0x9c,umask=0x01,name='IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE'/,
+cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/,
+cpu/event=0xae,umask=0x01,name='UOPS_ISSUED.ANY'/,
+cpu/event=0x87,umask=0x01,name='DECODE.LCP'/,
+cpu/event=0x61,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+# events for TMA metrics without fixed counter support (group 2)
+cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/,
+cpu/event=0xa4,umask=0x08,name='TOPDOWN.BR_MISPREDICT_SLOTS'/,
+cpu/event=0xa4,umask=0x10,name='TOPDOWN.MEMORY_BOUND_SLOTS'/,
+cpu/event=0xc2,umask=0x01,name='UOPS_RETIRED.HEAVY'/,
+cpu/event=0xe5,umask=0x03,name='MEM_UOP_RETIRED.ANY'/,
+cpu/event=0xc0,umask=0x10,name='INST_RETIRED.MACRO_FUSED'/,
+cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/,
+cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/,
+cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/,
+cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/,
+cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_LOADS'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/,
+cpu/event=0x79,umask=0x04,cmask=0x06,period=2000003,name='IDQ.MITE_CYCLES_OK'/,
+cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/,
+cpu/event=0x79,umask=0x08,cmask=0x06,period=2000003,name='IDQ.DSB_CYCLES_OK'/,
+cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/,
+cpu/event=0xb7,umask=0x02,period=2000003,name='EXE.AMX_BUSY'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/,
+cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/,
+cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/,
+cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/,
+cpu-cycles:k,
+ref-cycles:k,
+instructions:k;
+
+#OCR
+cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/,
+cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/,
+cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/,
+cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/,
+cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/,
+cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/;
+
+#C6
+cstate_core/c6-residency/;
+cstate_pkg/c6-residency/;
+
+#UPI
+upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/;
+
+#CHA (Cache)
+cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/,
+cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/,
+cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/;
+
+cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/,
+cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/,
+cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/,
+cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/;
+
+cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/,
+cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/,
+cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/,
+cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/;
+
+#CHA (IO Bandwidth)
+cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/,
+cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/,
+cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/,
+cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/;
+
+#IMC (memory read/writes)
+imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/,
+imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/;
+
+#power
+power/energy-pkg/,
+power/energy-ram/;
diff --git a/events/srf.txt b/events/srf.txt
index b57637d..49b3fe1 100644
--- a/events/srf.txt
+++ b/events/srf.txt
@@ -5,13 +5,107 @@
 
 # SierraForest event list
 
+cpu-cycles:k,
+ref-cycles:k,
+instructions:k;
+
+cpu/event=0x08,umask=0x08,name='DTLB_LOAD_MISSES.WALK_COMPLETED_1G'/,
+cpu/event=0x08,umask=0xe,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/,
+cpu/event=0x49,umask=0xe,name='DTLB_STORE_MISSES.WALK_COMPLETED'/,
+cpu/event=0x12,umask=0x02,name='DTLB_LOAD_MISSES.WALK_COMPLETED_4K'/,
+cpu/event=0x12,umask=0x04,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/,
 cpu-cycles,
 ref-cycles,
 instructions;
 
-cpu-cycles:k,
-ref-cycles:k,
-instructions:k;
+cpu/event=0x2e,umask=0x41,name='LONGEST_LAT_CACHE.MISS'/,
+cpu/event=0x2e,umask=0x4f,name='LONGEST_LAT_CACHE.REFERENCE'/,
+cpu/event=0x85,umask=0xe,name='ITLB_MISSES.WALK_COMPLETED'/,
+cpu/event=0xd0,umask=0x21,name='MEM_UOPS_RETIRED.LOCK_LOADS'/,
+cpu/event=0xd1,umask=0x02,name='MEM_LOAD_UOPS_RETIRED.L2_HIT'/,
+cpu/event=0xd1,umask=0x40,name='MEM_LOAD_UOPS_RETIRED.L1_MISS'/,
+cpu/event=0xd1,umask=0x1,name='MEM_LOAD_UOPS_RETIRED.L1_HIT'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x71,umask=0x00,name='TOPDOWN_FE_BOUND.ALL'/,
+cpu/event=0x71,umask=0x20,name='TOPDOWN_FE_BOUND.ICACHE'/,
+cpu/event=0x71,umask=0x10,name='TOPDOWN_FE_BOUND.ITLB_MISS'/,
+cpu/event=0x71,umask=0x72,name='TOPDOWN_FE_BOUND.FRONTEND_LATENCY'/,
+cpu/event=0x71,umask=0x40,name='TOPDOWN_FE_BOUND.BRANCH_RESTEER'/,
+cpu/event=0x71,umask=0x8d,name='TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x80,umask=0x02,name='ICACHE.MISSES'/,
+cpu/event=0x05,umask=0xf4,name='LD_HEAD.L1_BOUND_AT_RET'/,
+cpu/event=0x72,umask=0x00,name='TOPDOWN_RETIRING.ALL'/,
+cpu/event=0x73,umask=0x03,name='TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS'/,
+cpu/event=0x73,umask=0x04,name='TOPDOWN_BAD_SPECULATION.MISPREDICT'/,
+cpu/event=0x73,umask=0x00,name='TOPDOWN_BAD_SPECULATION.ALL'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x05,umask=0xff,name='LD_HEAD.ANY_AT_RET'/,
+cpu/event=0x04,umask=0x07,name='MEM_SCHEDULER_BLOCK.ALL'/,
+cpu/event=0x04,umask=0x01,name='MEM_SCHEDULER_BLOCK.ST_BUF'/,
+cpu/event=0x74,umask=0x02,name='TOPDOWN_BE_BOUND.MEM_SCHEDULER'/,
+cpu/event=0x74,umask=0x10,name='TOPDOWN_BE_BOUND.SERIALIZATION'/,
+cpu/event=0x74,umask=0x00,name='TOPDOWN_BE_BOUND.ALL'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0x05,umask=0x81,name='LD_HEAD.L1_MISS_AT_RET'/,
+cpu/event=0x34,umask=0x6f,name='MEM_BOUND_STALLS_LOAD.ALL'/,
+cpu/event=0x34,umask=0x01,name='MEM_BOUND_STALLS_LOAD.L2_HIT'/,
+cpu/event=0x34,umask=0x06,name='MEM_BOUND_STALLS_LOAD.LLC_HIT'/,
+cpu-cycles,
+ref-cycles,
+instructions;
+
+cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x8000100000004477,name='OCR.READS_TO_CORE.OUTSTANDING'/,
+cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x100000014477,name='OCR.READS_TO_CORE.ANY_RESPONSE'/;
+
+cpu/event=0xB7,umask=0x01,offcore_rsp=0x101030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/,
+cpu/event=0xB7,umask=0x01,offcore_rsp=0x100830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/;
+
+cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x10244,name='OCR.L2_CODE_MISS'/,
+cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x10070,name='OCR.HWPF_L2.ANY_RESPONSE'/;
+
+cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x1010003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM'/,
+cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x1008003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD'/;
+
+#CHA (Cache)
+cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/;
+
+cha/event=0x35,umask=0x00C827FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT'/,
+cha/event=0x35,umask=0x00C8A7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF'/,
+cha/event=0x35,umask=0x00C80FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/,
+cha/event=0x35,umask=0x00C88FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/;
+
+cha/event=0x35,umask=0x00CCD7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/,
+cha/event=0x35,umask=0x00C807FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO'/,
+cha/event=0x35,umask=0x00C887FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF'/,
+cha/event=0x35,umask=0x00CCC7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO'/;
+
+#CHA (IO Bandwidth)
+cha/event=0x35,umask=0x00C8F3FF04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/,
+cha/event=0x35,umask=0x00CC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/,
+cha/event=0x35,umask=0x00CD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/;
+
+cha/event=0x36,umask=0x00C827FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT'/;
+
+cha/event=0x36,umask=0x00C807FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO'/;
+
+#IMC (memory read/writes)
+imc/event=0x05,umask=0xCF,name='UNC_M_CAS_COUNT_SCH0.RD'/,
+imc/event=0x05,umask=0xF0,name='UNC_M_CAS_COUNT_SCH0.WR'/,
+imc/event=0x06,umask=0xCF,name='UNC_M_CAS_COUNT_SCH1.RD'/,
+imc/event=0x06,umask=0xF0,name='UNC_M_CAS_COUNT_SCH1.WR'/;
 
 #C6
 cstate_core/c6-residency/;
@@ -19,4 +113,4 @@ cstate_pkg/c6-residency/;
 
 #power
 power/energy-pkg/,
-power/energy-ram/;
\ No newline at end of file
+power/energy-ram/;
diff --git a/perf-collect.py b/perf-collect.py
index e3d95cb..1c28a24 100644
--- a/perf-collect.py
+++ b/perf-collect.py
@@ -38,6 +38,8 @@ def write_metadata(
     arch,
     cpuname,
     cpuid_info,
+    pmu_driver_version,
+    fixed_tma_supported,
     muxinterval,
     cpu,
     socket,
@@ -68,6 +70,8 @@ def write_metadata(
             for c in _cpus:
                 modified.write(str(c) + ";")
             modified.write("\n")
+        modified.write("PMUDriverVersion," + str(pmu_driver_version) + ",\n")
+        modified.write("FixedTMASupported," + str(fixed_tma_supported) + ",\n")
         modified.write("Perf event mux Interval ms," + str(muxinterval) + ",\n")
         cpumode = "enabled" if cpu else "disabled"
         socketmode = "enabled" if socket else "disabled"
@@ -158,7 +162,8 @@ def supports_psi():
         return False
 
 
-def tma_supported():
+# fixed_tma_supported returns true if the fixed-purpose PMU counters for TMA events are supported on the target platform
+def fixed_tma_supported():
     perf_out = ""
     try:
         perf = subprocess.Popen(
@@ -180,16 +185,68 @@ def tma_supported():
                 perf_out.split("\n"),
             )
         }
-    except Exception:
+    except (IndexError, ValueError):
+        logging.debug("Failed to parse perf output in fixed_tma_supported()")
+        return False
+    try:
+        if events["TOPDOWN.SLOTS"] == events["PERF_METRICS.BAD_SPECULATION"]:
+            return False
+    except KeyError:
+        logging.debug("Failed to find required events in fixed_tma_supported()")
         return False
 
-    # This is a perf artifact of no vPMU support
-    if events["TOPDOWN.SLOTS"] == events["PERF_METRICS.BAD_SPECULATION"]:
+    if events["TOPDOWN.SLOTS"] == 0 or events["PERF_METRICS.BAD_SPECULATION"] == 0:
         return False
 
     return True
 
 
+# fixed_event_supported returns true if the fixed-purpose PMU counter for the given event (cpu-cycles or instructions) event is supported on the target platform
+# it makes this determination by filling all the general purpose counters with the given events, then adding one more
+def fixed_event_supported(arch, event):
+    num_gp_counters = 0
+    if arch == "broadwell" or arch == "skylake" or arch == "cascadelake":
+        num_gp_counters = 4
+    elif (
+        arch == "icelake"
+        or arch == "sapphirerapids"
+        or arch == "emeraldrapids"
+        or arch == "sierraforest"
+    ):
+        num_gp_counters = 8
+    else:
+        crash(f"Unsupported architecture: {arch}")
+
+    perf_out = ""
+    events = ",".join([event] * (num_gp_counters + 1))
+    try:
+        perf = subprocess.Popen(
+            shlex.split("perf stat -a -e '{" + events + "}' sleep .1"),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+        )
+        perf_out = perf.communicate()[0].decode()
+    except subprocess.CalledProcessError:
+        return False
+    # on some VMs we see "<not counted>" or "<not supported>" in the perf output
+    if "<not counted>" in perf_out or "<not supported>" in perf_out:
+        return False
+    # on some VMs we get a count of 0
+    for line in perf_out.split("\n"):
+        tokens = line.split()
+        if len(tokens) == 2 and tokens[0] == "0":
+            return False
+    return True
+
+
+def fixed_cycles_supported(arch):
+    return fixed_event_supported(arch, "cpu-cycles")
+
+
+def fixed_instructions_supported(arch):
+    return fixed_event_supported(arch, "instructions")
+
+
 def ref_cycles_supported():
     perf_out = ""
     try:
@@ -228,6 +285,38 @@ def validate_file(fname):
         crash(str(fname) + " not accessible")
 
 
+def get_eventfile_path(arch, script_path, supports_tma_fixed_events):
+    eventfile = None
+    if arch == "broadwell":
+        eventfile = "bdx.txt"
+    elif arch == "skylake" or arch == "cascadelake":
+        eventfile = "clx_skx.txt"
+    elif arch == "icelake":
+        if supports_tma_fixed_events:
+            eventfile = "icx.txt"
+        else:
+            eventfile = "icx_nofixedtma.txt"
+    elif arch == "sapphirerapids" or arch == "emeraldrapids":
+        if supports_tma_fixed_events:
+            eventfile = "spr_emr.txt"
+        else:
+            eventfile = "spr_emr_nofixedtma.txt"
+    elif arch == "sierraforest":
+        eventfile = "srf.txt"
+
+    if eventfile is None:
+        return None
+
+    # Convert path of event file to relative path if being packaged by pyInstaller into a binary
+    if getattr(sys, "frozen", False):
+        basepath = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
+        return os.path.join(basepath, eventfile)
+    elif __file__:
+        return script_path + "/events/" + eventfile
+    else:
+        crash("Unknown application type")
+
+
 if __name__ == "__main__":
     common.configure_logging(".")
     if platform.system() != "Linux":
@@ -288,17 +377,46 @@ def validate_file(fname):
     parser.add_argument(
         "-V", "--version", help="display version info", action="store_true"
     )
+    parser.add_argument(
+        "-e", "--eventfile", default=None, help="Relative path to eventfile"
+    )
     args = parser.parse_args()
 
     if args.version:
         print(perf_helpers.get_tool_version())
         sys.exit()
 
-    if os.geteuid() != 0:
-        crash("Must run PerfSpect as root, please re-run")
+    is_root = os.geteuid() == 0
+    if not is_root:
+        logging.warning(
+            "User is not root. See README.md for requirements and instructions on how to run as non-root user."
+        )
+        try:
+            input("Press Enter to continue as non-root user or Ctrl-c to exit...")
+        except KeyboardInterrupt:
+            print("\nExiting...")
+            sys.exit()
+
+    if not is_root:
+        # check kernel.perf_event_paranoid. It needs to be zero for non-root users.
+        paranoid = perf_helpers.check_perf_event_paranoid()
+        if paranoid is None:
+            crash("kernel.perf_event_paranoid could not be determined")
+        if paranoid != 0:
+            crash(
+                "kernel.perf_event_paranoid is set to "
+                + str(paranoid)
+                + ". Run as root or set it to 0"
+            )
 
     # disable nmi watchdog before collecting perf
-    nmi_watchdog = perf_helpers.disable_nmi_watchdog()
+    nmi_watchdog_status = perf_helpers.nmi_watchdog_enabled()
+    if nmi_watchdog_status is None:
+        crash("NMI watchdog status could not be determined")
+
+    if is_root and nmi_watchdog_status:
+        perf_helpers.disable_nmi_watchdog()
+
     interval = 5000
     collect_psi = False
 
@@ -319,48 +437,47 @@ def validate_file(fname):
     if args.muxinterval > 1000:
         crash("Input argument muxinterval is too large, max is [1s or 1000ms]")
 
-    # select architecture default event file if not supplied
-    have_uncore = True
+    # check if pmu available
+    if "cpu-cycles" not in perf_helpers.get_perf_list():
+        crash(
+            "PMU's not available. Run baremetal or in a VM which exposes PMUs (sometimes full socket)"
+        )
+
     procinfo = perf_helpers.get_cpuinfo()
     arch, cpuname = perf_helpers.get_arch_and_name(procinfo)
     if not arch:
         crash(
             f"Unrecognized CPU architecture. Supported architectures: {', '.join(SUPPORTED_ARCHITECTURES)}"
         )
-    eventfile = None
-    if arch == "broadwell":
-        eventfile = "bdx.txt"
-    elif arch == "skylake" or arch == "cascadelake":
-        eventfile = "clx_skx.txt"
-    elif arch == "icelake":
-        eventfile = "icx.txt"
-    elif arch == "sapphirerapids" or arch == "emeraldrapids":
-        eventfile = "spr_emr.txt"
-    elif arch == "sierraforest":
-        eventfile = "srf.txt"
 
-    if eventfile is None:
-        crash(f"failed to match architecture ({arch}) to event file name.")
+    # Can we use the fixed purpose PMU counters for TMA events?
+    # The fixed-purpose PMU counters for TMA events are not supported on architectures older than Icelake
+    # They are also not supported on some VMs, e.g., AWS ICX and SPR VMs
+    supports_tma_fixed_events = False
+    if arch == "icelake" or arch == "sapphirerapids" or arch == "emeraldrapids":
+        supports_tma_fixed_events = fixed_tma_supported()
+        if not supports_tma_fixed_events:
+            logging.warning(
+                "Due to lack of vPMU support, some TMA events will not be collected"
+            )
 
-    # Convert path of event file to relative path if being packaged by pyInstaller into a binary
-    if getattr(sys, "frozen", False):
-        basepath = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
-        eventfilename = eventfile
-        eventfile = os.path.join(basepath, eventfile)
-    elif __file__:
-        eventfile = script_path + "/events/" + eventfile
-        eventfilename = eventfile
+    # Can we use the fixed-purpose PMU counter for the cpu-cycles event?
+    supports_cycles_fixed_event = fixed_cycles_supported(arch)
+
+    # Can we use the fixed-purpose PMU counter for the instructions event?
+    supports_instructions_fixed_event = fixed_instructions_supported(arch)
+
+    # select architecture default event file if not supplied
+    if args.eventfile is not None:
+        eventfile = args.eventfile
     else:
-        crash("Unknown application type")
+        eventfile = get_eventfile_path(arch, script_path, supports_tma_fixed_events)
+    if eventfile is None:
+        crash(f"failed to match architecture ({arch}) to event file name.")
 
-    # check if pmu available
-    if "cpu-cycles" not in perf_helpers.get_perf_list():
-        crash(
-            "PMU's not available. Run baremetal or in a VM which exposes PMUs (sometimes full socket)"
-        )
+    logging.info("Event file: " + eventfile)
 
-    # get perf events to collect
-    include_tma = True
+    supports_uncore_events = True
     sys_devs = perf_helpers.get_sys_devices()
     if (
         "uncore_cha" not in sys_devs
@@ -369,39 +486,50 @@ def validate_file(fname):
         and "uncore_qpi" not in sys_devs
         and "uncore_imc" not in sys_devs
     ):
-        logging.info("disabling uncore (possibly in a vm?)")
-        have_uncore = False
+        logging.info("uncore devices not found (possibly in a vm?)")
+        supports_uncore_events = False
+
+    supports_ref_cycles_event = ref_cycles_supported()
 
-    if arch == "icelake":
-        include_tma = tma_supported()
-        if not include_tma:
-            logging.warning(
-                "Due to lack of vPMU support, TMA L1 events will not be collected"
-            )
-    if arch == "sapphirerapids" or arch == "emeraldrapids":
-        include_tma = tma_supported()
-        if not include_tma:
-            logging.warning(
-                "Due to lack of vPMU support, TMA L1 & L2 events will not be collected"
-            )
     events, collection_events = prep_events.prepare_perf_events(
         eventfile,
-        (args.pid is not None or args.cid is not None or not have_uncore),
-        include_tma,
-        not have_uncore,
-        ref_cycles_supported(),
+        (args.pid is not None or args.cid is not None or not supports_uncore_events),
+        supports_tma_fixed_events,
+        supports_uncore_events,
+        supports_ref_cycles_event,
     )
 
     # check output file is writable
     if not perf_helpers.check_file_writeable(args.outcsv):
         crash("Output file %s not writeable " % args.outcsv)
 
+    # adjust mux interval
     mux_intervals = perf_helpers.get_perf_event_mux_interval()
     if args.muxinterval > 0:
-        logging.info(
-            "changing default perf mux interval to " + str(args.muxinterval) + "ms"
-        )
-        perf_helpers.set_perf_event_mux_interval(False, args.muxinterval, mux_intervals)
+        if is_root:
+            logging.info(
+                "changing perf mux interval to " + str(args.muxinterval) + "ms"
+            )
+            perf_helpers.set_perf_event_mux_interval(
+                False, args.muxinterval, mux_intervals
+            )
+        else:
+            for device, mux in mux_intervals.items():
+                mux_int = -1
+                try:
+                    mux_int = int(mux)
+                except ValueError:
+                    crash("Failed to parse mux interval on " + device)
+                if mux_int != args.muxinterval:
+                    crash(
+                        "mux interval on "
+                        + device
+                        + " is set to "
+                        + str(mux_int)
+                        + ". Run as root or set it to "
+                        + str(args.muxinterval)
+                        + "."
+                    )
 
     # parse cgroups
     cgroups = []
@@ -411,10 +539,25 @@ def validate_file(fname):
     if args.pid is not None or args.cid is not None:
         logging.info("Not collecting uncore events in this run mode")
 
+    pmu_driver_version = perf_helpers.get_pmu_driver_version()
+
     # log some metadata
     logging.info("Architecture: " + arch)
     logging.info("Model: " + cpuname)
     logging.info("Kernel version: " + perf_helpers.get_version())
+    logging.info("PMU driver version: " + pmu_driver_version)
+    logging.info("Uncore events supported: " + str(supports_uncore_events))
+    logging.info(
+        "Fixed counter TMA events supported: " + str(supports_tma_fixed_events)
+    )
+    logging.info(
+        "Fixed counter cpu-cycles event supported: " + str(supports_cycles_fixed_event)
+    )
+    logging.info(
+        "Fixed counter instructions event supported: "
+        + str(supports_instructions_fixed_event)
+    )
+    logging.info("ref-cycles event supported: " + str(supports_ref_cycles_event))
     logging.info("Cores per socket: " + str(perf_helpers.get_cpu_count()))
     logging.info("Socket: " + str(perf_helpers.get_socket_count()))
     logging.info("Hyperthreading on: " + str(perf_helpers.get_ht_status()))
@@ -482,6 +625,8 @@ def validate_file(fname):
         arch,
         cpuname,
         cpuid_info,
+        pmu_driver_version,
+        supports_tma_fixed_events,
         args.muxinterval,
         args.cpu,
         args.socket,
@@ -491,10 +636,11 @@ def validate_file(fname):
     os.chmod(args.outcsv, 0o666)  # nosec
 
     # reset nmi_watchdog to what it was before running perfspect
-    if nmi_watchdog != 0:
+    if is_root and nmi_watchdog_status is True:
         perf_helpers.enable_nmi_watchdog()
 
-    logging.info("changing perf mux interval back to default")
-    perf_helpers.set_perf_event_mux_interval(True, 1, mux_intervals)
+    if is_root:
+        logging.info("changing perf mux interval back to default")
+        perf_helpers.set_perf_event_mux_interval(True, 1, mux_intervals)
 
     logging.info("perf stat dumped to %s" % args.outcsv)
diff --git a/perf-collect.spec b/perf-collect.spec
index 5f86876..5a53fc8 100644
--- a/perf-collect.spec
+++ b/perf-collect.spec
@@ -7,7 +7,7 @@ block_cipher = None
 a = Analysis(
     ['perf-collect.py'],
     pathex=[],
-    datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/spr_emr.txt', '.'), ('./events/srf.txt', '.')],
+    datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/icx_nofixedtma.txt', '.'), ('./events/spr_emr.txt', '.'), ('./events/spr_emr_nofixedtma.txt', '.'), ('./events/srf.txt', '.')],
     hiddenimports=[],
     hookspath=[],
     hooksconfig={},
diff --git a/perf-postprocess.py b/perf-postprocess.py
index a2d32a0..08d4dac 100644
--- a/perf-postprocess.py
+++ b/perf-postprocess.py
@@ -114,6 +114,13 @@ def get_args(script_path):
         type=int,
         help="Generate per-transaction metrics using the provided transactions/sec.",
     )
+    parser.add_argument(
+        "-m",
+        "--metricfile",
+        default=None,
+        help="Relative path to metrics file in json format",
+        dest="metric_file",
+    )
 
     args = parser.parse_args()
 
@@ -349,6 +356,8 @@ def get_metadata_as_dict(meta_data_lines, txns=None):
             "Model",
             "kernel version",
             "PerfSpect version",
+            "PMUDriverVersion",
+            "FixedTMASupported",
         ]:
             if info in line:
                 meta_data["metadata"][info] = line.split(",", 1)[1]
@@ -414,16 +423,22 @@ def get_event_groups(event_lines):
     return groups
 
 
-def get_metric_file_name(microarchitecture):
+def get_metric_file_name(microarchitecture, fixed_tma_supported):
     metric_file = ""
     if microarchitecture == "broadwell":
         metric_file = "metric_bdx.json"
     elif microarchitecture == "skylake" or microarchitecture == "cascadelake":
         metric_file = "metric_skx_clx.json"
     elif microarchitecture == "icelake":
-        metric_file = "metric_icx.json"
+        if fixed_tma_supported:
+            metric_file = "metric_icx.json"
+        else:
+            metric_file = "metric_icx_nofixedtma.json"
     elif microarchitecture == "sapphirerapids" or microarchitecture == "emeraldrapids":
-        metric_file = "metric_spr_emr.json"
+        if fixed_tma_supported:
+            metric_file = "metric_spr_emr.json"
+        else:
+            metric_file = "metric_spr_emr_nofixedtma.json"
     elif microarchitecture == "sierraforest":
         metric_file = "metric_srf.json"
     else:
@@ -445,9 +460,11 @@ def validate_file(fname):
         crash(str(fname) + " not accessible")
 
 
-def get_metrics_formula(architecture, txns=None):
+def get_metrics_formula(architecture, fixed_tma_supported, metric_file=None, txns=None):
     # get the metric file name based on architecture
-    metric_file = get_metric_file_name(architecture)
+    if metric_file is None:
+        metric_file = get_metric_file_name(architecture, fixed_tma_supported)
+    logging.info("Metric file: " + metric_file)
     validate_file(metric_file)
 
     with open(metric_file, "r") as f_metric:
@@ -475,7 +492,7 @@ def get_socket_number(sockets_dict, CPU):
 
 
 def extract_dataframe(perf_data_lines, meta_data, perf_mode):
-    logging.info("Formatting event data")
+    logging.info("Parsing event data")
     # parse event data into dataframe and set header names
     perf_data_df = pd.DataFrame(perf_data_lines)
     if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled":
@@ -732,7 +749,7 @@ def log_skip_metric(metric, instance, msg):
 
 
 # group_start_end_index_dict is both an input and output argument
-# if empty, the start and end indexes for each geroup will be added
+# if empty, the start and end indexes for each group will be added
 # if not, the start and end indexes for each group will be read from it
 def get_groups_to_dataframes(
     time_slice_df, group_to_event, group_start_end_index_dict, perf_mode
@@ -1027,7 +1044,6 @@ def generate_metrics(
         group_to_df = get_groups_to_dataframes(
             time_slice_df, group_to_event, group_start_end_index_dict, perf_mode
         )
-
         time_metrics_result[time_slice] = evaluate_metrics(
             verbose, filtered_metrics, metadata, group_to_event, group_to_df, errors
         )
@@ -1174,7 +1190,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode):
     args = get_args(script_path)
     input_file_path = args.rawfile
     out_file_path = args.outfile
-    # read all metadata, perf evernts, and perf data lines
+    # read all metadata, perf events, and perf data lines
     # Note: this might not be feasible for very large files
     meta_data_lines, perf_event_lines, perf_data_lines = get_all_data_lines(
         input_file_path
@@ -1200,7 +1216,12 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode):
     perf_data_df = extract_dataframe(perf_data_lines, meta_data, perf_mode)
 
     # parse metrics expressions
-    metrics = get_metrics_formula(meta_data["constants"]["CONST_ARCH"], args.pertxn)
+    metrics = get_metrics_formula(
+        meta_data["constants"]["CONST_ARCH"],
+        meta_data["metadata"]["FixedTMASupported"] == "True",
+        args.metric_file,
+        args.pertxn,
+    )
 
     if args.rawevents:  # generate raw events for system, socket and CPU
         generate_raw_events(perf_data_df, out_file_path, perf_mode)
diff --git a/src/perf_helpers.py b/src/perf_helpers.py
index c6d4456..69cc2a0 100644
--- a/src/perf_helpers.py
+++ b/src/perf_helpers.py
@@ -112,8 +112,9 @@ def get_imc_cha_upi_count():
     return imc_count, cha_count, upi_count
 
 
-# device ids are not consecutive in some cases
-def get_channel_ids(pattern):
+# return a sorted list of device ids for a given device type pattern, e.g., uncore_cha_, uncore_imc_, etc.
+# note: this is necessary because device ids are not always consecutive
+def get_device_ids(pattern):
     sysdevices = os.listdir("/sys/bus/event_source/devices")
     devices = pattern + "[0-9]*"
     ids = []
@@ -138,14 +139,33 @@ def get_perf_event_mux_interval():
     return mux_interval
 
 
+# Returns true/false depending on state of the NMI watchdog timer, or None on error.
+def nmi_watchdog_enabled():
+    try:
+        proc_output = subprocess.check_output(["cat", "/proc/sys/kernel/nmi_watchdog"])
+    except (subprocess.CalledProcessError, FileNotFoundError) as e:
+        logging.warning(f"Failed to get nmi_watchdog status: {e}")
+        return None
+    try:
+        nmi_watchdog_status = int(proc_output.decode().strip())
+    except (ValueError) as e:
+        logging.warning(f"Failed to interpret nmi_watchdog status: {e}")
+        return None
+    return nmi_watchdog_status == 1
+
+
 # disable nmi watchdog and return its initial status
 # to restore it after collection
 def disable_nmi_watchdog():
+    nmi_watchdog_status = nmi_watchdog_enabled()
+    if nmi_watchdog_status is None:
+        logging.error("Failed to get nmi_watchdog status.")
+        return None
     try:
-        proc_output = subprocess.check_output(["cat", "/proc/sys/kernel/nmi_watchdog"])
-        nmi_watchdog_status = int(proc_output.decode().strip())
-        if nmi_watchdog_status == 1:
-            proc_output = subprocess.check_output(["sysctl", "kernel.nmi_watchdog=0"])
+        if nmi_watchdog_status:
+            proc_output = subprocess.check_output(
+                ["sysctl", "kernel.nmi_watchdog=0"], stderr=subprocess.STDOUT
+            )
             new_watchdog_status = int(
                 proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "")
             )
@@ -158,7 +178,16 @@ def disable_nmi_watchdog():
             logging.info("nmi_watchdog already disabled. No change needed.")
         return nmi_watchdog_status
     except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e:
-        crash(f"Failed to disable nmi_watchdog: {e}")
+        logging.warning(f"Failed to disable nmi_watchdog: {e}")
+
+
+def check_perf_event_paranoid():
+    try:
+        return int(
+            subprocess.check_output(["cat", "/proc/sys/kernel/perf_event_paranoid"])
+        )
+    except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e:
+        logging.warning(f"Failed to check perf_event_paranoid: {e}")
 
 
 # enable nmi watchdog
@@ -183,15 +212,19 @@ def set_perf_event_mux_interval(reset, interval_ms, mux_interval):
         if os.path.isdir(dirpath):
             muxfile = os.path.join(dirpath, "perf_event_mux_interval_ms")
             if os.path.isfile(muxfile):
-                with open(muxfile, "w") as f_mux:
-                    val = 0
-                    if reset:
-                        val = int(mux_interval[f])
-                    else:
-                        if int(mux_interval[f]):
-                            val = int(interval_ms)
-                    if val:
-                        f_mux.write(str(val))
+                try:
+                    with open(muxfile, "w") as f_mux:
+                        val = 0
+                        if reset:
+                            val = int(mux_interval[f])
+                        else:
+                            if int(mux_interval[f]):
+                                val = int(interval_ms)
+                        if val:
+                            f_mux.write(str(val))
+                except OSError as e:
+                    logging.warning(f"Failed to write mux interval: {e}")
+                    break
 
 
 # get linux kernel version
@@ -399,3 +432,13 @@ def get_cgroups(cid):
     for c in cgroups:
         logging.info("attaching to cgroup: " + c)
     return cgroups
+
+
+def get_pmu_driver_version():
+    command = "dmesg | grep -A 1 'Intel PMU driver' | tail -1 | awk '{print $NF}'"
+    try:
+        version_number = subprocess.check_output(command, shell=True).decode().strip()
+        return version_number
+    except subprocess.CalledProcessError as e:
+        print(f"Error executing command: {e}")
+        return None
diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py
index c236645..f839186 100644
--- a/src/prepare_perf_events.py
+++ b/src/prepare_perf_events.py
@@ -43,15 +43,13 @@ def expand_unc(line):
     line = line.strip()
     name = line.split("/")[0]
     unc_name = "uncore_" + name
-    unc_count = 0
-    sys_devs = helper.get_sys_devices()
-    if unc_name in sys_devs:
-        unc_count = int(sys_devs[unc_name])
+    ids = helper.get_device_ids(unc_name + "_")
+    unc_count = len(ids)
     if unc_count > 1:
-        line = line.replace(name, unc_name + "_0")
+        line = line.replace(name, unc_name + "_" + str(ids[0]))
         if "name=" in line:
             prettyname = (line.split("'"))[1].strip()
-            line = line.replace(prettyname, prettyname + ".0")
+            line = line.replace(prettyname, prettyname + "." + str(ids[0]))
     return line, unc_count
 
 
@@ -63,8 +61,8 @@ def is_cpu_event(line):
     if (
         (len(tmp_list) == 1 or tmp_list[0] == "cpu" or tmp_list[0].startswith("cstate"))
         and "OCR." not in line
-        and "uops_retired.ms" not in line
-        and "int_misc.unknown_branch_cycles" not in line
+        and "uops_retired.ms" not in line.lower()
+        and "int_misc.unknown_branch_cycles" not in line.lower()
         and "power/" not in line
     ):
         return True
@@ -74,7 +72,7 @@ def is_cpu_event(line):
 # enumerate uncore events across all devices
 def enumerate_uncore(group, pattern, count):
     uncore_group = ""
-    ids = helper.get_channel_ids(pattern)
+    ids = helper.get_device_ids(pattern)
     for i in range(count - 1):
         old = pattern + str(ids[i])
         new = pattern + str(ids[i + 1])
@@ -109,34 +107,40 @@ def get_cgroup_events_format(cgroups, events, num_events):
     return perf_format
 
 
-def filter_events(event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles):
+def filter_events(
+    event_file,
+    cpu_only,
+    supports_tma_fixed_events,
+    supports_uncore_events,
+    supports_ref_cycles,
+):
     if not os.path.isfile(event_file):
         crash("event file not found")
     collection_events = []
     unsupported_events = []
     perf_list = helper.get_perf_list()
-    seperate_cycles = []
-    if in_vm:
-        # since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group
-        if supports_ref_cycles:
-            seperate_cycles = [
-                "cpu-cycles,",
-                "cpu-cycles:k,",
-                "ref-cycles,",
-                "instructions;",
-            ]
-        else:
-            seperate_cycles = [
-                "cpu-cycles,",
-                "cpu-cycles:k,",
-                "instructions;",
-            ]
+    # seperate_cycles = []
+    # if not supports_uncore_events:
+    #     # since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group
+    #     if supports_ref_cycles:
+    #         seperate_cycles = [
+    #             "cpu-cycles,",
+    #             "cpu-cycles:k,",
+    #             "ref-cycles,",
+    #             "instructions;",
+    #         ]
+    #     else:
+    #         seperate_cycles = [
+    #             "cpu-cycles,",
+    #             "cpu-cycles:k,",
+    #             "instructions;",
+    #         ]
 
     def process(line):
         line = line.strip()
         if line == "" or line.startswith("#") or (cpu_only and not is_cpu_event(line)):
             return
-        if not TMA_supported and (
+        if not supports_tma_fixed_events and (
             "name='TOPDOWN.SLOTS'" in line or "name='PERF_METRICS." in line
         ):
             return
@@ -152,13 +156,13 @@ def process(line):
 
     with open(event_file, "r") as fin:
         for line in fin:
-            if in_vm and "cpu-cycles" in line:
-                continue
+            # if in_vm and "cpu-cycles" in line:
+            #     continue
             if not supports_ref_cycles and "ref-cycles" in line:
                 continue
             process(line)
-        for line in seperate_cycles:
-            process(line)
+        # for line in seperate_cycles:
+        #     process(line)
         if len(unsupported_events) > 0:
             logging.warning(
                 f"Perf unsupported events not counted: {unsupported_events}"
@@ -167,7 +171,11 @@ def process(line):
 
 
 def prepare_perf_events(
-    event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles
+    event_file,
+    cpu_only,
+    supports_tma_fixed_events,
+    supports_uncore_events,
+    supports_ref_cycles,
 ):
     start_group = "'{"
     end_group = "}'"
@@ -176,7 +184,11 @@ def prepare_perf_events(
     new_group = True
 
     collection_events, unsupported_events = filter_events(
-        event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles
+        event_file,
+        cpu_only,
+        supports_tma_fixed_events,
+        supports_uncore_events,
+        supports_ref_cycles,
     )
     core_event = []
     uncore_event = []