Merge pull request #92 from yonch/main #12
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test Kernel Module | |
on: | |
workflow_dispatch: # Manual trigger for testing | |
inputs: | |
instance-type: | |
description: 'EC2 instance type to use' | |
required: false | |
default: 'm7i.metal-24xl' | |
type: string | |
run-benchmarks: | |
description: 'Run sync timer benchmarks' | |
required: false | |
default: false | |
type: boolean | |
push: | |
branches: | |
- main | |
paths: | |
- module/** | |
- .github/workflows/test-kernel-module.yaml | |
permissions: | |
id-token: write # Required for requesting the JWT | |
jobs: | |
start-runner: | |
name: Start EC2 runner | |
runs-on: ubuntu-latest | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
role-session-name: github-runner-session | |
- name: Start EC2 runner | |
id: start-ec2-runner | |
uses: machulav/[email protected] | |
with: | |
mode: start | |
github-token: ${{ secrets.REPO_ADMIN_TOKEN }} | |
ec2-image-id: ami-0884d2865dbe9de4b # Ubuntu 22.04 LTS in us-east-2 | |
ec2-instance-type: ${{ inputs.instance-type || 'm7i.xlarge' }} # m7i.metal-24xl for RDT, c5.9xlarge for perf support | |
market-type: spot | |
subnet-id: ${{ secrets.AWS_SUBNET_ID }} | |
security-group-id: ${{ secrets.AWS_SECURITY_GROUP_ID }} | |
aws-resource-tags: > | |
[ | |
{"Key": "Name", "Value": "github-runner"}, | |
{"Key": "Repository", "Value": "${{ github.repository }}"}, | |
{"Key": "Workflow", "Value": "${{ github.workflow }}"}, | |
{"Key": "RunId", "Value": "${{ github.run_id }}"}, | |
{"Key": "RunNumber", "Value": "${{ github.run_number }}"}, | |
{"Key": "SHA", "Value": "${{ github.sha }}"}, | |
{"Key": "Branch", "Value": "${{ github.ref_name }}"}, | |
{"Key": "Actor", "Value": "${{ github.actor }}"} | |
] | |
test-module: | |
needs: start-runner | |
runs-on: ${{ needs.start-runner.outputs.label }} | |
timeout-minutes: ${{ inputs.run-benchmarks == true && 5 || 2 }} # Add timeout in case system hangs | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Disable IPv6 | |
run: | | |
# Disable IPv6 via sysctl | |
sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 | |
sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 | |
sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1 | |
# Force apt to use IPv4 | |
echo 'Acquire::ForceIPv4 "true";' | sudo tee /etc/apt/apt.conf.d/99force-ipv4 | |
- name: Configure apt to use HTTPS | |
run: | | |
# Update all archive URLs to use HTTPS | |
sudo sed -i 's/http:/https:/g' /etc/apt/sources.list | |
# Install apt-transport-https (might fail initially, hence the || true) | |
sudo apt-get update || true | |
sudo apt-get install -y apt-transport-https ca-certificates | |
# Update again with HTTPS now configured | |
sudo apt-get update | |
- name: Install build dependencies | |
run: | | |
# Install base dependencies | |
sudo apt-get install -y build-essential linux-headers-$(uname -r) | |
- name: Build kernel module | |
working-directory: module | |
run: | | |
# Try to compile and capture the warning message | |
make 2>&1 | tee compile_output.txt || true | |
# Extract gcc version from the warning message | |
KERNEL_GCC_VERSION=$(grep "The kernel was built by:" compile_output.txt | grep -oP 'gcc-\K\d+' || echo "") | |
echo "Detected kernel compiler version: ${KERNEL_GCC_VERSION}" | |
# Install specific gcc version if detected | |
if [ ! -z "$KERNEL_GCC_VERSION" ]; then | |
echo "Installing gcc-${KERNEL_GCC_VERSION}" | |
sudo apt-get install -y gcc-${KERNEL_GCC_VERSION} | |
# Configure as default gcc | |
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${KERNEL_GCC_VERSION} 100 | |
sudo update-alternatives --set gcc /usr/bin/gcc-${KERNEL_GCC_VERSION} | |
else | |
echo "Warning: Could not detect kernel compiler version" | |
fi | |
# Verify gcc version | |
gcc --version | |
# Now try the actual build | |
make | |
ls -l build/collector.ko | |
- name: Run RMID allocator tests | |
id: rmid-allocator-test | |
continue-on-error: true | |
working-directory: module | |
run: | | |
echo "Running RMID allocator unit tests..." | |
chmod +x test_rmid_allocator.sh | |
./test_rmid_allocator.sh | |
# Check test results from dmesg | |
echo "Test output from dmesg:" | |
dmesg | grep "rmid_allocator_test" || true | |
dmesg | grep "test_result:" || true | |
# Fail if any test failed | |
if dmesg | grep -q "test_result:.*:fail"; then | |
echo "RMID allocator tests failed" | |
exit 1 | |
fi | |
- name: Run procfs tests | |
id: procfs-test | |
continue-on-error: true | |
working-directory: module | |
run: | | |
echo "Running procfs unit tests..." | |
chmod +x test_procfs.sh | |
./test_procfs.sh | |
# Check test results from dmesg | |
echo "Test output from dmesg:" | |
dmesg | grep "procfs_test" || true | |
dmesg | grep "test_result:" || true | |
# Fail if any test failed | |
if dmesg | grep -q "test_result:.*:fail"; then | |
echo "procfs tests failed" | |
exit 1 | |
fi | |
- name: Run sync timer tests | |
id: sync-timer-test | |
continue-on-error: true | |
working-directory: module | |
run: | | |
echo "Running sync timer unit tests..." | |
chmod +x test_sync_timer.sh | |
./test_sync_timer.sh | |
# Check test results from dmesg | |
echo "Test output from dmesg:" | |
dmesg | grep "sync_timer_test" || true | |
dmesg | grep "test_result:" || true | |
# Fail if any test failed | |
if dmesg | grep -q "test_result:.*:fail"; then | |
echo "sync timer tests failed" | |
exit 1 | |
fi | |
- name: Install stress tools | |
if: ${{ inputs.run-benchmarks }} | |
run: | | |
sudo apt-get install -y stress-ng jq | |
- name: Run benchmarks | |
if: ${{ inputs.run-benchmarks }} | |
working-directory: module | |
run: | | |
./benchmark_sync_timer_stress.sh > benchmark_results.csv | |
echo "Benchmark results:" | |
cat benchmark_results.csv | |
- name: Upload benchmark results | |
if: ${{ inputs.run-benchmarks }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: benchmark-results | |
path: module/benchmark_results.csv | |
- name: Check dmesg on RMID test failure | |
if: steps.rmid-allocator-test.outcome == 'failure' | |
run: | | |
echo "RMID allocator tests failed, showing last kernel messages:" | |
sudo dmesg | tail -n 100 | |
exit 1 | |
- name: Check dmesg on procfs test failure | |
if: steps.procfs-test.outcome == 'failure' | |
run: | | |
echo "procfs tests failed, showing last kernel messages:" | |
sudo dmesg | tail -n 100 | |
exit 1 | |
- name: Check dmesg on sync timer test failure | |
if: steps.sync-timer-test.outcome == 'failure' | |
run: | | |
echo "sync timer tests failed, showing last kernel messages:" | |
sudo dmesg | tail -n 100 | |
exit 1 | |
- name: Check RDT Capabilities | |
run: | | |
sudo mkdir -p /sys/fs/resctrl || true | |
sudo mount -t resctrl resctrl /sys/fs/resctrl || true | |
echo "Mounting resctrl filesystem" | |
mount | grep resctrl || true | |
echo "Checking RDT capabilities" | |
ls /sys/fs/resctrl/info || true | |
echo "Monitoring features:" | |
cat /sys/fs/resctrl/info/L3_MON/mon_features || true | |
echo "Number of available RMIDs:" | |
cat /sys/fs/resctrl/info/L3_MON/num_rmids || true | |
echo "Number of CAT classes:" | |
cat /sys/fs/resctrl/info/L3/num_closids || true | |
echo "head -n 35 /proc/cpuinfo:" | |
head -n 35 /proc/cpuinfo || true | |
echo "CPU RDT features (head):" | |
grep -E "cat_l3|cdp_l3|cqm_occup_llc|cqm_mbm_total|cqm_mbm_local" /proc/cpuinfo | head || true | |
- name: Load and test module | |
id: load-and-test-module | |
continue-on-error: true | |
working-directory: module | |
run: | | |
# Check undefined symbols | |
sudo modinfo -F depends build/collector.ko | |
sudo objdump -d build/collector.ko | grep undefined || true | |
# Load module | |
echo "insmod build/collector.ko:" | |
sudo insmod build/collector.ko | |
# Verify module is loaded | |
echo "lsmod | grep collector:" | |
lsmod | grep collector | |
# Check kernel logs for module initialization | |
echo "dmesg | grep 'Memory Collector':" | |
dmesg -c | grep "Memory Collector" || true | |
# Unload module | |
echo "rmmod collector:" | |
sudo rmmod collector | |
# Verify module unloaded successfully | |
echo "lsmod | grep collector:" | |
! lsmod | grep collector | |
if lsmod | grep -q collector; then | |
echo "Error: Module still loaded" | |
exit 1 | |
fi | |
# Check kernel logs for cleanup message | |
echo "dmesg | grep 'Memory Collector':" | |
dmesg -c | grep "Memory Collector" || true | |
- name: Check dmesg on failure | |
if: steps.load-and-test-module.outcome == 'failure' | |
run: | | |
echo "load and test module failed, showing last kernel messages:" | |
sudo dmesg | tail -n 100 | |
exit 1 | |
- name: Install trace dependencies | |
run: | | |
sudo apt-get install -y trace-cmd | |
- name: Run module test script | |
working-directory: module | |
run: | | |
# run 10 times in quick succession to stress-test insmod/rmmod and collector | |
for i in {1..10}; do | |
echo "*** Run $i:" | |
./test_module.sh | |
done | |
stop-runner: | |
name: Stop EC2 runner | |
needs: [start-runner, test-module] | |
runs-on: ubuntu-latest | |
if: always() # Run even if previous jobs fail | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: ${{ secrets.AWS_ROLE_ARN }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
role-session-name: github-runner-session | |
- name: Stop EC2 runner | |
uses: machulav/[email protected] | |
with: | |
mode: stop | |
github-token: ${{ secrets.REPO_ADMIN_TOKEN }} | |
label: ${{ needs.start-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |