From c83ac3909ea239f9f0f746a422ab92bf7b81f72a Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 17:12:47 -0600 Subject: [PATCH 1/8] add action to list resctrl and perf support --- .../workflows/get-resctrl-and-perf-info.yaml | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 .github/workflows/get-resctrl-and-perf-info.yaml diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml new file mode 100644 index 0000000..361fad6 --- /dev/null +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -0,0 +1,95 @@ +name: Get Resctrl and Perf info +on: workflow_dispatch # Manual trigger for testing + +# Add permissions needed for OIDC authentication +permissions: + id-token: write # Required for requesting the JWT + +jobs: + start-runner: + name: Start EC2 runner + runs-on: ubuntu-latest + outputs: + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ secrets.AWS_REGION }} + role-session-name: github-runner-session + + - name: Start EC2 runner + id: start-ec2-runner + uses: machulav/ec2-github-runner@v2.3.8 + with: + mode: start + github-token: ${{ secrets.REPO_ADMIN_TOKEN }} + ec2-image-id: ami-0cb91c7de36eed2cb # Ubuntu Server 24.04 LTS (HVM), SSD Volume Type + ec2-instance-type: c7i.metal-24xl + market-type: spot + subnet-id: ${{ secrets.AWS_SUBNET_ID }} + security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} + pre-runner-script: | + sudo yum update -y && \ + sudo yum install docker git libicu -y + sudo systemctl enable docker + aws-resource-tags: > + [ + {"Key": "Name", "Value": "github-runner"}, + {"Key": "Repository", "Value": "${{ github.repository }}"}, + {"Key": "Workflow", "Value": "${{ github.workflow }}"}, + {"Key": "RunId", "Value": "${{ github.run_id }}"}, + {"Key": "RunNumber", "Value": "${{ github.run_number }}"}, + {"Key": "SHA", "Value": "${{ github.sha }}"}, + {"Key": "Branch", "Value": "${{ github.ref_name }}"}, + {"Key": "Actor", "Value": "${{ github.actor }}"} + ] + + do-job: + needs: start-runner + runs-on: ${{ needs.start-runner.outputs.label }} + steps: + - name: List perf counters + run: | + perf list + - name: List perf - detailed + run: | + perf list --log-desc --details + - name: Check perf paranoid + run: | + ls /proc/sys/kernel/perf_event_paranoid + cat /proc/sys/kernel/perf_event_paranoid + - name: Check intel_cqm + run: | + echo "*** Listing /sys/devices/intel_cqm" + ls -la /sys/devices/intel_cqm || true + echo "*** Traversing /sys/devices/intel_cqm/events" + find /sys/devices/intel_cqm/events || true + echo "checking type" + cat /sys/devices/intel_cqm/type || true + echo "reading llc_occupancy" + cat /sys/devices/intel_cqm/events/llc_occupancy || true + cat /sys/devices/intel_cqm/events/llc_occupancy.scale || true + + stop-runner: + name: Stop EC2 runner + needs: [start-runner, do-job] + runs-on: ubuntu-latest + if: always() # Run even if previous jobs fail + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ secrets.AWS_REGION }} + role-session-name: github-runner-session + + - name: Stop EC2 runner + uses: machulav/ec2-github-runner@v2.3.8 + with: + mode: stop + github-token: ${{ secrets.REPO_ADMIN_TOKEN }} + label: ${{ needs.start-runner.outputs.label }} + ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} From 42c356ca2603a8f1f05f8e78f95b2bcc3f08b349 Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 17:21:03 -0600 Subject: [PATCH 2/8] no capacity - try not specifying subnet ID (probably won't work) --- .github/workflows/get-resctrl-and-perf-info.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml index 361fad6..28ad0ac 100644 --- a/.github/workflows/get-resctrl-and-perf-info.yaml +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -29,7 +29,7 @@ jobs: ec2-image-id: ami-0cb91c7de36eed2cb # Ubuntu Server 24.04 LTS (HVM), SSD Volume Type ec2-instance-type: c7i.metal-24xl market-type: spot - subnet-id: ${{ secrets.AWS_SUBNET_ID }} + # subnet-id: ${{ secrets.AWS_SUBNET_ID }} security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} pre-runner-script: | sudo yum update -y && \ From 1156424a32b78cda6a55a516f1cc415f0f8cd6af Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 17:44:57 -0600 Subject: [PATCH 3/8] re-add subnet ID --- .github/workflows/get-resctrl-and-perf-info.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml index 28ad0ac..361fad6 100644 --- a/.github/workflows/get-resctrl-and-perf-info.yaml +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -29,7 +29,7 @@ jobs: ec2-image-id: ami-0cb91c7de36eed2cb # Ubuntu Server 24.04 LTS (HVM), SSD Volume Type ec2-instance-type: c7i.metal-24xl market-type: spot - # subnet-id: ${{ secrets.AWS_SUBNET_ID }} + subnet-id: ${{ secrets.AWS_SUBNET_ID }} security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} pre-runner-script: | sudo yum update -y && \ From d406886f8ff1be6202746052e11c683e3672724d Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 18:02:03 -0600 Subject: [PATCH 4/8] switch to m7i --- .github/workflows/get-resctrl-and-perf-info.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml index 361fad6..c87a345 100644 --- a/.github/workflows/get-resctrl-and-perf-info.yaml +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -27,7 +27,7 @@ jobs: mode: start github-token: ${{ secrets.REPO_ADMIN_TOKEN }} ec2-image-id: ami-0cb91c7de36eed2cb # Ubuntu Server 24.04 LTS (HVM), SSD Volume Type - ec2-instance-type: c7i.metal-24xl + ec2-instance-type: m7i.metal-24xl market-type: spot subnet-id: ${{ secrets.AWS_SUBNET_ID }} security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} From ee5f8247fc1695ad995420516bb650f4d685d3f8 Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 18:04:25 -0600 Subject: [PATCH 5/8] fix detailed perf list --- .github/workflows/get-resctrl-and-perf-info.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml index c87a345..0967d60 100644 --- a/.github/workflows/get-resctrl-and-perf-info.yaml +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -56,7 +56,7 @@ jobs: perf list - name: List perf - detailed run: | - perf list --log-desc --details + perf list --long-desc --details || true - name: Check perf paranoid run: | ls /proc/sys/kernel/perf_event_paranoid From 016cde2482f66f5f2c636278746761b78902e087 Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 18:14:47 -0600 Subject: [PATCH 6/8] add RDT check --- .../workflows/get-resctrl-and-perf-info.yaml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml index 0967d60..818496d 100644 --- a/.github/workflows/get-resctrl-and-perf-info.yaml +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -61,6 +61,28 @@ jobs: run: | ls /proc/sys/kernel/perf_event_paranoid cat /proc/sys/kernel/perf_event_paranoid + - name: Check RDT Capabilities + run: | + sudo mkdir -p /sys/fs/resctrl + sudo mount -t resctrl resctrl /sys/fs/resctrl || true + + echo "Mounting resctrl filesystem" + mount | grep resctrl || true + + echo "Checking RDT capabilities" + ls /sys/fs/resctrl/info || true + + echo "Monitoring features:" + cat /sys/fs/resctrl/info/L3_MON/mon_features || true + + echo "Number of CAT classes:" + cat /sys/fs/resctrl/info/L3/num_closids || true + + echo "CPU RDT features:" + grep -E "cat_l3|cdp_l3|cqm_occup_llc|cqm_mbm_total|cqm_mbm_local" /proc/cpuinfo || true + + # we do not unmount, maybe mounting affects the intel_cqm checks below + #sudo umount /sys/fs/resctrl || true - name: Check intel_cqm run: | echo "*** Listing /sys/devices/intel_cqm" From 16a73316fd0ae14d023f540c430ebd700edb4398 Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 18:40:56 -0600 Subject: [PATCH 7/8] add shutdown --poweroff --- .github/workflows/get-resctrl-and-perf-info.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml index 818496d..f407f42 100644 --- a/.github/workflows/get-resctrl-and-perf-info.yaml +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -94,6 +94,9 @@ jobs: echo "reading llc_occupancy" cat /sys/devices/intel_cqm/events/llc_occupancy || true cat /sys/devices/intel_cqm/events/llc_occupancy.scale || true + - name: Power off + run: | + shutdown --poweroff now stop-runner: name: Stop EC2 runner From 029fa156caa85ae65644fe624ae4670d33597030 Mon Sep 17 00:00:00 2001 From: Jonathan Perry Date: Mon, 27 Jan 2025 18:52:15 -0600 Subject: [PATCH 8/8] add query for number of RMIDs --- .github/workflows/get-resctrl-and-perf-info.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml index f407f42..5525e87 100644 --- a/.github/workflows/get-resctrl-and-perf-info.yaml +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -75,6 +75,9 @@ jobs: echo "Monitoring features:" cat /sys/fs/resctrl/info/L3_MON/mon_features || true + echo "Number of available RMIDs:" + cat /sys/fs/resctrl/info/L3_MON/num_rmids || true + echo "Number of CAT classes:" cat /sys/fs/resctrl/info/L3/num_closids || true