-
Notifications
You must be signed in to change notification settings - Fork 34
132 lines (126 loc) · 5.78 KB
/
gpu-bench.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Run regression check only when attempting to merge, shown as skipped status check beforehand
name: GPU benchmark regression test
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
branches: [dev]
merge_group:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
# Run comparative benchmark against dev, open issue on regression
gpu-benchmark:
if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
name: Run benchmarks on GPU
runs-on: [self-hosted, gpu-bench]
steps:
- uses: actions/checkout@v4
with:
repository: lurk-lab/ci-workflows
- uses: ./.github/actions/gpu-setup
with:
gpu-framework: 'cuda'
- uses: ./.github/actions/ci-env
- uses: actions/checkout@v4
# Install dependencies
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@v2
with:
tool: [email protected]
- name: Install criterion
run: |
cargo install cargo-criterion
cargo install criterion-table
- name: Set bench output format and base SHA
run: |
echo "BENCH_OUTPUT=commit-comment" | tee -a $GITHUB_ENV
echo "BENCH_NUM_CONS=16384,1038732" | tee -a $GITHUB_ENV
echo "BASE_COMMIT=${{ github.event.merge_group.base_sha }}" | tee -a $GITHUB_ENV
GPU_NAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | tail -n1)
echo "GPU_ID=$(echo $GPU_NAME | awk '{ print $NF }')" | tee -a $GITHUB_ENV
echo "GPU_NAME=$GPU_NAME" | tee -a $GITHUB_ENV
# Checkout base branch for comparative bench
- uses: actions/checkout@v4
with:
ref: dev
path: dev
# Copy the script so the base can bench with the same parameters
- name: Run GPU bench on base branch
run: |
# Copy justfile to dev, overwriting existing config with that of PR branch
cp ../benches/justfile .
# Run benchmark
just gpu-bench-ci supernova-ci
# Copy bench output to PR branch
cp supernova-ci-${{ env.BASE_COMMIT }}.json ..
working-directory: ${{ github.workspace }}/dev
- name: Run GPU bench on PR branch
run: |
just gpu-bench-ci supernova-ci
cp supernova-ci-${{ github.sha }}.json ..
working-directory: ${{ github.workspace }}/benches
- name: copy the benchmark template and prepare it with data
run: |
cp .github/tables.toml .
# Get CPU model
CPU_MODEL=$(grep '^model name' /proc/cpuinfo | head -1 | awk -F ': ' '{ print $2 }')
# Get vCPU count
NUM_VCPUS=$(nproc --all)
# Get total RAM in GB
TOTAL_RAM=$(grep MemTotal /proc/meminfo | awk '{$2=$2/(1024^2); print int($2), "GB RAM";}')
WORKFLOW_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
# Use conditionals to ensure that only non-empty variables are inserted
[[ ! -z "${{ env.GPU_NAME }}" ]] && sed -i "/^\"\"\"$/i ${{ env.GPU_NAME }}" tables.toml
[[ ! -z "$CPU_MODEL" ]] && sed -i "/^\"\"\"$/i $CPU_MODEL" tables.toml
[[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS vCPUs" tables.toml
[[ ! -z "$TOTAL_RAM" ]] && sed -i "/^\"\"\"$/i $TOTAL_RAM" tables.toml
sed -i "/^\"\"\"$/i Workflow run: $WORKFLOW_URL" tables.toml
echo "WORKFLOW_URL=$WORKFLOW_URL" | tee -a $GITHUB_ENV
working-directory: ${{ github.workspace }}
# Create a `criterion-table` and write in commit comment
- name: Run `criterion-table`
run: |
cat supernova-ci-${{ env.BASE_COMMIT }}.json supernova-ci-${{ github.sha }}.json | criterion-table > BENCHMARKS.md
- name: Write bench on commit comment
uses: peter-evans/commit-comment@v3
with:
body-path: BENCHMARKS.md
# TODO: Set `$BENCH_NOISE_THRESHOLD` via `cardinalby/export-env-action` or hardcode to 1.3
# Check for a slowdown >= `$BENCH_NOISE_THRESHOLD` (fallback is 30%/1.3x). If so, open an issue but don't block merge
# Since we are parsing for slowdowns, we simply add 1 to the noise threshold decimal to get the regression factor
- name: Check for perf regression
id: regression-check
run: |
REGRESSIONS=$(grep -o '[0-9.]*x slower' BENCHMARKS.md | cut -d 'x' -f1)
echo $REGRESSIONS
if [ ! -z "${{ env.BENCH_NOISE_THRESHOLD}}" ]; then
REGRESSION_FACTOR=$(echo "${{ env.BENCH_NOISE_THRESHOLD }}+1" | bc)
else
REGRESSION_FACTOR=1.3
fi
echo "NOISE_THRESHOLD=$(echo "($REGRESSION_FACTOR-1)*100" | bc)" | tee -a $GITHUB_ENV
for r in $REGRESSIONS
do
if (( $(echo "$r >= $REGRESSION_FACTOR" | bc -l) ))
then
echo "regression=true" | tee -a $GITHUB_OUTPUT
fi
done
# Not possible to use ${{ github.event.number }} with the `merge_group` trigger
- name: Get PR number from merge branch
if: steps.regression-check.outputs.regression == 'true'
run: |
echo "PR_NUMBER=$(echo ${{ github.event.merge_group.head_ref }} | sed -e 's/.*pr-\(.*\)-.*/\1/')" | tee -a $GITHUB_ENV
- uses: JasonEtco/create-an-issue@v2
if: steps.regression-check.outputs.regression == 'true'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ env.PR_NUMBER }}
GIT_SHA: ${{ github.sha }}
WORKFLOW_URL: ${{ env.WORKFLOW_URL }}
NOISE_THRESHOLD: ${{ env.NOISE_THRESHOLD }}
with:
update_existing: true
filename: .github/PERF_REGRESSION.md