forked from unvariance/collector
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'unvariance:main' into feature/unvariance#9-resctrl_supp…
…ort_checker
- Loading branch information
Showing
17 changed files
with
1,279 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"name": "Kernel Module Development", | ||
"dockerFile": "Dockerfile.devcontainer", | ||
"workspaceFolder": "/workspace", | ||
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached", | ||
"customizations": { | ||
"vscode": { | ||
"extensions": [ | ||
"ms-vscode.cpptools", | ||
"ms-vscode.makefile-tools", | ||
"ms-azuretools.vscode-docker", | ||
"ms-vscode.cmake-tools" | ||
] | ||
} | ||
}, | ||
"remoteUser": "root" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,9 @@ on: | |
push: | ||
branches: | ||
- main | ||
paths: | ||
- docs/** | ||
- mkdocs.yml | ||
permissions: | ||
contents: write | ||
jobs: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
name: Get Resctrl and Perf info | ||
on: workflow_dispatch # Manual trigger for testing | ||
|
||
# Add permissions needed for OIDC authentication | ||
permissions: | ||
id-token: write # Required for requesting the JWT | ||
|
||
jobs: | ||
start-runner: | ||
name: Start EC2 runner | ||
runs-on: ubuntu-latest | ||
outputs: | ||
label: ${{ steps.start-ec2-runner.outputs.label }} | ||
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | ||
aws-region: ${{ secrets.AWS_REGION }} | ||
role-session-name: github-runner-session | ||
|
||
- name: Start EC2 runner | ||
id: start-ec2-runner | ||
uses: machulav/[email protected] | ||
with: | ||
mode: start | ||
github-token: ${{ secrets.REPO_ADMIN_TOKEN }} | ||
ec2-image-id: ami-0cb91c7de36eed2cb # Ubuntu Server 24.04 LTS (HVM), SSD Volume Type | ||
ec2-instance-type: m7i.metal-24xl | ||
market-type: spot | ||
subnet-id: ${{ secrets.AWS_SUBNET_ID }} | ||
security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} | ||
pre-runner-script: | | ||
sudo yum update -y && \ | ||
sudo yum install docker git libicu -y | ||
sudo systemctl enable docker | ||
aws-resource-tags: > | ||
[ | ||
{"Key": "Name", "Value": "github-runner"}, | ||
{"Key": "Repository", "Value": "${{ github.repository }}"}, | ||
{"Key": "Workflow", "Value": "${{ github.workflow }}"}, | ||
{"Key": "RunId", "Value": "${{ github.run_id }}"}, | ||
{"Key": "RunNumber", "Value": "${{ github.run_number }}"}, | ||
{"Key": "SHA", "Value": "${{ github.sha }}"}, | ||
{"Key": "Branch", "Value": "${{ github.ref_name }}"}, | ||
{"Key": "Actor", "Value": "${{ github.actor }}"} | ||
] | ||
do-job: | ||
needs: start-runner | ||
runs-on: ${{ needs.start-runner.outputs.label }} | ||
steps: | ||
- name: List perf counters | ||
run: | | ||
perf list | ||
- name: List perf - detailed | ||
run: | | ||
perf list --long-desc --details || true | ||
- name: Check perf paranoid | ||
run: | | ||
ls /proc/sys/kernel/perf_event_paranoid | ||
cat /proc/sys/kernel/perf_event_paranoid | ||
- name: Check RDT Capabilities | ||
run: | | ||
sudo mkdir -p /sys/fs/resctrl | ||
sudo mount -t resctrl resctrl /sys/fs/resctrl || true | ||
echo "Mounting resctrl filesystem" | ||
mount | grep resctrl || true | ||
echo "Checking RDT capabilities" | ||
ls /sys/fs/resctrl/info || true | ||
echo "Monitoring features:" | ||
cat /sys/fs/resctrl/info/L3_MON/mon_features || true | ||
echo "Number of available RMIDs:" | ||
cat /sys/fs/resctrl/info/L3_MON/num_rmids || true | ||
echo "Number of CAT classes:" | ||
cat /sys/fs/resctrl/info/L3/num_closids || true | ||
echo "CPU RDT features:" | ||
grep -E "cat_l3|cdp_l3|cqm_occup_llc|cqm_mbm_total|cqm_mbm_local" /proc/cpuinfo || true | ||
# we do not unmount, maybe mounting affects the intel_cqm checks below | ||
#sudo umount /sys/fs/resctrl || true | ||
- name: Check intel_cqm | ||
run: | | ||
echo "*** Listing /sys/devices/intel_cqm" | ||
ls -la /sys/devices/intel_cqm || true | ||
echo "*** Traversing /sys/devices/intel_cqm/events" | ||
find /sys/devices/intel_cqm/events || true | ||
echo "checking type" | ||
cat /sys/devices/intel_cqm/type || true | ||
echo "reading llc_occupancy" | ||
cat /sys/devices/intel_cqm/events/llc_occupancy || true | ||
cat /sys/devices/intel_cqm/events/llc_occupancy.scale || true | ||
- name: Power off | ||
run: | | ||
shutdown --poweroff now | ||
stop-runner: | ||
name: Stop EC2 runner | ||
needs: [start-runner, do-job] | ||
runs-on: ubuntu-latest | ||
if: always() # Run even if previous jobs fail | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | ||
aws-region: ${{ secrets.AWS_REGION }} | ||
role-session-name: github-runner-session | ||
|
||
- name: Stop EC2 runner | ||
uses: machulav/[email protected] | ||
with: | ||
mode: stop | ||
github-token: ${{ secrets.REPO_ADMIN_TOKEN }} | ||
label: ${{ needs.start-runner.outputs.label }} | ||
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
name: Test Kernel Module | ||
on: | ||
workflow_dispatch: # Manual trigger for testing | ||
inputs: | ||
instance-type: | ||
description: 'EC2 instance type to use' | ||
required: false | ||
default: 'm7i.metal-24xl' | ||
type: string | ||
push: | ||
branches: | ||
- main | ||
paths: | ||
- module/** | ||
|
||
permissions: | ||
id-token: write # Required for requesting the JWT | ||
|
||
jobs: | ||
start-runner: | ||
name: Start EC2 runner | ||
runs-on: ubuntu-latest | ||
outputs: | ||
label: ${{ steps.start-ec2-runner.outputs.label }} | ||
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | ||
aws-region: ${{ secrets.AWS_REGION }} | ||
role-session-name: github-runner-session | ||
|
||
- name: Start EC2 runner | ||
id: start-ec2-runner | ||
uses: machulav/[email protected] | ||
with: | ||
mode: start | ||
github-token: ${{ secrets.REPO_ADMIN_TOKEN }} | ||
ec2-image-id: ami-0884d2865dbe9de4b # Ubuntu 22.04 LTS in us-east-2 | ||
ec2-instance-type: ${{ inputs.instance-type || 'm7i.metal-24xl' }} | ||
market-type: spot | ||
subnet-id: ${{ secrets.AWS_SUBNET_ID }} | ||
security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} | ||
aws-resource-tags: > | ||
[ | ||
{"Key": "Name", "Value": "github-runner"}, | ||
{"Key": "Repository", "Value": "${{ github.repository }}"}, | ||
{"Key": "Workflow", "Value": "${{ github.workflow }}"}, | ||
{"Key": "RunId", "Value": "${{ github.run_id }}"}, | ||
{"Key": "RunNumber", "Value": "${{ github.run_number }}"}, | ||
{"Key": "SHA", "Value": "${{ github.sha }}"}, | ||
{"Key": "Branch", "Value": "${{ github.ref_name }}"}, | ||
{"Key": "Actor", "Value": "${{ github.actor }}"} | ||
] | ||
test-module: | ||
needs: start-runner | ||
runs-on: ${{ needs.start-runner.outputs.label }} | ||
timeout-minutes: 2 # Add timeout in case system hangs | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Disable IPv6 | ||
run: | | ||
# Disable IPv6 via sysctl | ||
sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 | ||
sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 | ||
sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 | ||
# Force apt to use IPv4 | ||
echo 'Acquire::ForceIPv4 "true";' | sudo tee /etc/apt/apt.conf.d/99force-ipv4 | ||
- name: Configure apt to use HTTPS | ||
run: | | ||
# Update all archive URLs to use HTTPS | ||
sudo sed -i 's/http:/https:/g' /etc/apt/sources.list | ||
# Install apt-transport-https (might fail initially, hence the || true) | ||
sudo apt-get update || true | ||
sudo apt-get install -y apt-transport-https ca-certificates | ||
# Update again with HTTPS now configured | ||
sudo apt-get update | ||
- name: Install build dependencies | ||
run: | | ||
# Install base dependencies | ||
sudo apt-get install -y build-essential linux-headers-$(uname -r) | ||
- name: Build kernel module | ||
working-directory: module | ||
run: | | ||
# Try to compile and capture the warning message | ||
make 2>&1 | tee compile_output.txt || true | ||
# Extract gcc version from the warning message | ||
KERNEL_GCC_VERSION=$(grep "The kernel was built by:" compile_output.txt | grep -oP 'gcc-\K\d+' || echo "") | ||
echo "Detected kernel compiler version: ${KERNEL_GCC_VERSION}" | ||
# Install specific gcc version if detected | ||
if [ ! -z "$KERNEL_GCC_VERSION" ]; then | ||
echo "Installing gcc-${KERNEL_GCC_VERSION}" | ||
sudo apt-get install -y gcc-${KERNEL_GCC_VERSION} | ||
# Configure as default gcc | ||
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${KERNEL_GCC_VERSION} 100 | ||
sudo update-alternatives --set gcc /usr/bin/gcc-${KERNEL_GCC_VERSION} | ||
else | ||
echo "Warning: Could not detect kernel compiler version" | ||
fi | ||
# Verify gcc version | ||
gcc --version | ||
# Now try the actual build | ||
make | ||
ls -l build/collector.ko | ||
- name: Check RDT Capabilities | ||
run: | | ||
sudo mkdir -p /sys/fs/resctrl || true | ||
sudo mount -t resctrl resctrl /sys/fs/resctrl || true | ||
echo "Mounting resctrl filesystem" | ||
mount | grep resctrl || true | ||
echo "Checking RDT capabilities" | ||
ls /sys/fs/resctrl/info || true | ||
echo "Monitoring features:" | ||
cat /sys/fs/resctrl/info/L3_MON/mon_features || true | ||
echo "Number of available RMIDs:" | ||
cat /sys/fs/resctrl/info/L3_MON/num_rmids || true | ||
echo "Number of CAT classes:" | ||
cat /sys/fs/resctrl/info/L3/num_closids || true | ||
echo "head -n 35 /proc/cpuinfo:" | ||
head -n 35 /proc/cpuinfo || true | ||
echo "CPU RDT features (head):" | ||
grep -E "cat_l3|cdp_l3|cqm_occup_llc|cqm_mbm_total|cqm_mbm_local" /proc/cpuinfo | head || true | ||
# we do not unmount, maybe mounting affects the intel_cqm checks below | ||
#sudo umount /sys/fs/resctrl || true | ||
- name: Load and test module | ||
id: load-and-test-module | ||
continue-on-error: true | ||
working-directory: module | ||
run: | | ||
# Check undefined symbols | ||
sudo modinfo -F depends build/collector.ko | ||
sudo objdump -d build/collector.ko | grep undefined || true | ||
# Load module | ||
echo "insmod build/collector.ko:" | ||
sudo insmod build/collector.ko | ||
# Verify module is loaded | ||
echo "lsmod | grep collector:" | ||
lsmod | grep collector | ||
# Check kernel logs for module initialization | ||
echo "dmesg | grep 'Memory Collector':" | ||
dmesg -c | grep "Memory Collector" || true | ||
# Unload module | ||
echo "rmmod collector:" | ||
sudo rmmod collector | ||
# Verify module unloaded successfully | ||
echo "lsmod | grep collector:" | ||
! lsmod | grep collector | ||
if lsmod | grep -q collector; then | ||
echo "Error: Module still loaded" | ||
exit 1 | ||
fi | ||
# Check kernel logs for cleanup message | ||
echo "dmesg | grep 'Memory Collector':" | ||
dmesg -c | grep "Memory Collector" || true | ||
- name: Check dmesg on failure | ||
if: steps.load-and-test-module.outcome == 'failure' | ||
run: | | ||
echo "load and test module failed, showing last kernel messages:" | ||
sudo dmesg | tail -n 100 | ||
exit 1 | ||
- name: Install trace dependencies | ||
run: | | ||
sudo apt-get install -y trace-cmd | ||
- name: Run module test script | ||
working-directory: module | ||
run: | | ||
# run 10 times in quick succession to stress-test insmod/rmmod and collector | ||
for i in {1..10}; do | ||
echo "*** Run $i:" | ||
./test_module.sh | ||
done | ||
stop-runner: | ||
name: Stop EC2 runner | ||
needs: [start-runner, test-module] | ||
runs-on: ubuntu-latest | ||
if: always() # Run even if previous jobs fail | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | ||
aws-region: ${{ secrets.AWS_REGION }} | ||
role-session-name: github-runner-session | ||
|
||
- name: Stop EC2 runner | ||
uses: machulav/[email protected] | ||
with: | ||
mode: stop | ||
github-token: ${{ secrets.REPO_ADMIN_TOKEN }} | ||
label: ${{ needs.start-runner.outputs.label }} | ||
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |
Oops, something went wrong.