diff --git a/.github/workflows/get-resctrl-and-perf-info.yaml b/.github/workflows/get-resctrl-and-perf-info.yaml new file mode 100644 index 0000000..5525e87 --- /dev/null +++ b/.github/workflows/get-resctrl-and-perf-info.yaml @@ -0,0 +1,123 @@ +name: Get Resctrl and Perf info +on: workflow_dispatch # Manual trigger for testing + +# Add permissions needed for OIDC authentication +permissions: + id-token: write # Required for requesting the JWT + +jobs: + start-runner: + name: Start EC2 runner + runs-on: ubuntu-latest + outputs: + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ secrets.AWS_REGION }} + role-session-name: github-runner-session + + - name: Start EC2 runner + id: start-ec2-runner + uses: machulav/ec2-github-runner@v2.3.8 + with: + mode: start + github-token: ${{ secrets.REPO_ADMIN_TOKEN }} + ec2-image-id: ami-0cb91c7de36eed2cb # Ubuntu Server 24.04 LTS (HVM), SSD Volume Type + ec2-instance-type: m7i.metal-24xl + market-type: spot + subnet-id: ${{ secrets.AWS_SUBNET_ID }} + security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} + pre-runner-script: | + sudo yum update -y && \ + sudo yum install docker git libicu -y + sudo systemctl enable docker + aws-resource-tags: > + [ + {"Key": "Name", "Value": "github-runner"}, + {"Key": "Repository", "Value": "${{ github.repository }}"}, + {"Key": "Workflow", "Value": "${{ github.workflow }}"}, + {"Key": "RunId", "Value": "${{ github.run_id }}"}, + {"Key": "RunNumber", "Value": "${{ github.run_number }}"}, + {"Key": "SHA", "Value": "${{ github.sha }}"}, + {"Key": "Branch", "Value": "${{ github.ref_name }}"}, + {"Key": "Actor", "Value": "${{ github.actor }}"} + ] + + do-job: + needs: start-runner + runs-on: ${{ needs.start-runner.outputs.label }} + steps: + - name: List perf counters + run: | + perf list + - name: List perf - detailed + run: | + perf list --long-desc --details || true + - name: Check perf paranoid + run: | + ls /proc/sys/kernel/perf_event_paranoid + cat /proc/sys/kernel/perf_event_paranoid + - name: Check RDT Capabilities + run: | + sudo mkdir -p /sys/fs/resctrl + sudo mount -t resctrl resctrl /sys/fs/resctrl || true + + echo "Mounting resctrl filesystem" + mount | grep resctrl || true + + echo "Checking RDT capabilities" + ls /sys/fs/resctrl/info || true + + echo "Monitoring features:" + cat /sys/fs/resctrl/info/L3_MON/mon_features || true + + echo "Number of available RMIDs:" + cat /sys/fs/resctrl/info/L3_MON/num_rmids || true + + echo "Number of CAT classes:" + cat /sys/fs/resctrl/info/L3/num_closids || true + + echo "CPU RDT features:" + grep -E "cat_l3|cdp_l3|cqm_occup_llc|cqm_mbm_total|cqm_mbm_local" /proc/cpuinfo || true + + # we do not unmount, maybe mounting affects the intel_cqm checks below + #sudo umount /sys/fs/resctrl || true + - name: Check intel_cqm + run: | + echo "*** Listing /sys/devices/intel_cqm" + ls -la /sys/devices/intel_cqm || true + echo "*** Traversing /sys/devices/intel_cqm/events" + find /sys/devices/intel_cqm/events || true + echo "checking type" + cat /sys/devices/intel_cqm/type || true + echo "reading llc_occupancy" + cat /sys/devices/intel_cqm/events/llc_occupancy || true + cat /sys/devices/intel_cqm/events/llc_occupancy.scale || true + - name: Power off + run: | + shutdown --poweroff now + + stop-runner: + name: Stop EC2 runner + needs: [start-runner, do-job] + runs-on: ubuntu-latest + if: always() # Run even if previous jobs fail + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ secrets.AWS_REGION }} + role-session-name: github-runner-session + + - name: Stop EC2 runner + uses: machulav/ec2-github-runner@v2.3.8 + with: + mode: stop + github-token: ${{ secrets.REPO_ADMIN_TOKEN }} + label: ${{ needs.start-runner.outputs.label }} + ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}