-
Notifications
You must be signed in to change notification settings - Fork 23
257 lines (252 loc) · 10.9 KB
/
_linux_ut.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
name: Linux UT Test
on:
workflow_call:
inputs:
pytorch:
required: false
type: string
default: 'main'
description: Pytorch branch/commit
keep_torch_xpu_ops:
required: false
type: string
default: 'false'
description: Keep torch-xpu-ops pin. `true` means use pined commit
triton:
required: false
type: string
default: ''
description: Triton commit. Use pytorch pined commit by default
ut:
required: true
type: string
default: ''
description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu` Delimiter is comma
abi:
required: false
type: string
default: 1
description: ABI version. Default abi as 1.
python:
required: false
type: string
default: '3.10'
description: Python version
runner:
required: true
type: string
default: 'linux.idc.xpu'
description: Runner label
driver:
required: false
type: string
default: 'lts'
description: Driver lts/rolling
permissions: read-all
jobs:
Torch-XPU-UT-Tests:
runs-on: ${{ inputs.runner }}
timeout-minutes: 900
env:
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Prepare Stock Pytorch
run: |
pwd
which conda && conda clean -ay
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ../ && rm -rf pytorch
pip install requests
git clone https://github.com/pytorch/pytorch pytorch
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
# apply PRs for stock pytorch
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
fi
fi
- name: Triton Installation
run: |
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ../pytorch
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
if [ -z ${{ inputs.triton }} ]; then
TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
else
TRITON_COMMIT_ID="${{ inputs.triton }}"
fi
echo ${TRITON_REPO}@${TRITON_COMMIT_ID}
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
fi
- name: Build Pytorch XPU
run: |
source activate xpu_op_${ZE_AFFINITY_MASK}
source .github/scripts/env.sh ${{ inputs.pytorch }}
pip install mkl-static==2025.0.1 mkl-include==2025.0.1
if [[ ${{ inputs.abi }} == '0' ]]; then
export _GLIBCXX_USE_CXX11_ABI=0
else
export _GLIBCXX_USE_CXX11_ABI=1
fi
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
cd ../pytorch
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
pip install -r requirements.txt
WERROR=1 python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
else
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
cd ../pytorch
git reset --hard && git checkout ${TORCH_COMMIT_ID}
TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
rm -rf third_party/torch-xpu-ops
git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
cd third_party/torch-xpu-ops
git checkout ${TORCH_XPU_OPS_COMMIT}
cd ../..
fi
pip install -r .ci/docker/requirements-ci.txt
- name: Torch Config
run: |
source activate xpu_op_${ZE_AFFINITY_MASK}
source .github/scripts/env.sh ${{ inputs.pytorch }}
python -c "import torch; print(torch.__config__.show())"
python -c "import torch; print(torch.__config__.parallel_info())"
python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
python -c "import triton; print(triton.__version__)"
cd ..
python pytorch/torch/utils/collect_env.py
rm -rf /tmp/torchinductor_*
rm -rf ~/.triton/cache
- name: Run XPU OP Examples
if: contains(inputs.ut, 'op_regression') || github.event_name == 'schedule'
run: |
cd ${{ github.workspace }}
mkdir -p ut_log/op_regression
xpu-smi discovery
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ${{ github.workspace }}
cd test/regressions
pip install pytest
timeout 8000 pytest -v 2>&1 | tee ${{ github.workspace }}/ut_log/op_regression/op_regression_test.log
- name: Run XPU OP Regressions test on device 1
if: contains(inputs.ut, 'op_regression_dev1') || github.event_name == 'schedule'
run: |
cd ${{ github.workspace }}
mkdir -p ut_log/op_regression_dev1
xpu-smi discovery
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
export ZE_AFFINITY_MASK_OLD=${ZE_AFFINITY_MASK}
unset ZE_AFFINITY_MASK
cd ${{ github.workspace }}
cd test/regressions
pip install pytest
timeout 8000 pytest -v test_operation_on_device_1.py 2>&1 | tee ${{ github.workspace }}/ut_log/op_regression_dev1/op_regression_dev1_test.log
export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK_OLD}
- name: Run XPU OP Extended UT
if: contains(inputs.ut, 'op_extended') || github.event_name == 'schedule'
run: |
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_TEST_WITH_SLOW=1
cd ${{ github.workspace }}
mkdir -p ut_log/op_extended
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
timeout 10000 python run_test_with_skip.py 2>&1 | tee ${{ github.workspace }}/ut_log/op_extended/op_extended_test.log
- name: Run XPU OP UT
if: contains(inputs.ut, 'op_ut') || github.event_name == 'schedule'
run: |
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_ENABLE_XPU_FALLBACK=1
export PYTORCH_TEST_WITH_SLOW=1
cd ${{ github.workspace }}
mkdir -p ut_log/op_ut
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
timeout 10000 python run_test_with_skip.py 2>&1 | tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_skip_test.log
# Cases run with a on-demand white list, since some suites are too
# slow to go through all operators on CPU. So add cases on-demand
# when XPU implementatoin is done.
# test_foreach, test_decomp
timeout 10000 python run_test_with_only.py 2>&1 | tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_only_test.log
- name: Run Torch XPU UT
if: contains(inputs.ut, 'torch_xpu') || github.event_name == 'schedule'
run: |
source .github/scripts/env.sh ${{ inputs.pytorch }}
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ${{ github.workspace }}
mkdir -p ut_log/torch_xpu
cd ../pytorch
TEST_REPORTS_DIR=$(pwd)/test/test-reports
rm -rf "$TEST_REPORTS_DIR" && mkdir -p "$TEST_REPORTS_DIR"
# Run Pytorch XPU binary UT
for xpu_case in build/bin/*{xpu,sycl}*; do
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
case_name=$(basename "$xpu_case")
echo "Testing ${case_name} ..."
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml 2>&1 | tee ${{ github.workspace }}/ut_log/torch_xpu/binary_ut_torch_xpu_${case_name}_test.log
fi
done
# Run Pytorch XPU python UT
export PYTORCH_TEST_WITH_SLOW=1
export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
test_cmd="python test/run_test.py --include "
# All Inductor UT under test/inductor
for test in $(ls test/inductor | grep test);
do
test_cmd="${test_cmd} inductor/$test";
done
# All xpu ut under test/xpu
for test in $(ls test/xpu | grep test);
do
test_cmd="${test_cmd} xpu/$test";
done
if [ -f "test/test_xpu.py" ]; then
test_cmd="${test_cmd} test_xpu.py"
fi
eval $test_cmd 2>&1 | tee ${{ github.workspace }}/ut_log/torch_xpu/Inductor_ut_torch_xpu_test.log
- name: UT Test Results Check
shell: bash
run: |
function contains() {
contains_status="echo 'Start $2 ...'"
{
[[ $1 =~ (^|,)$2($|,) ]]
} || {
echo "[Warning] $2 is not suppotted type! Skipped!"
contains_status="continue"
}
}
set -xe
for ut_suite in $(echo ${{ inputs.ut }} |sed 's/,/ /g')
do
contains "op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu" $ut_suite
$contains_status
cd ${{ github.workspace }}/ut_log/${ut_suite}
cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
bash ut_result_check.sh ${ut_suite}
done
- name: Upload Inductor XPU UT Log
if: always()
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
path: ${{ github.workspace }}/ut_log