-
Notifications
You must be signed in to change notification settings - Fork 29
353 lines (350 loc) · 15.2 KB
/
_linux_transformers.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
name: Linux Transformers Test
on:
pull_request:
branches:
- main
paths:
- '.github/scripts/spec.py'
- '.github/workflows/_linux_transformers.yml'
workflow_dispatch:
inputs:
pytorch:
required: false
type: string
default: 'nightly'
description: Pytorch branch/commit
python:
required: false
type: string
default: '3.10'
description: Python version
runner:
required: true
type: string
default: 'linux.idc.xpu'
description: Runner label
driver:
required: false
type: string
default: 'lts'
description: Driver lts/rolling
nightly_whl:
required: false
type: string
default: ''
description: Pytorch nightly wheel version
transformers:
required: false
type: string
default: 'v4.47.0'
description: Transformers version
permissions: read-all
jobs:
Torch-XPU-Transformers-Tests:
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
env:
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }}
transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }}
PYTORCH_DEBUG_XPU_FALLBACK: '1'
TRANSFORMERS_TEST_DEVICE_SPEC: 'spec.py'
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
with:
path: torch-xpu-ops
- name: Checkout Transformers
uses: actions/checkout@v4
with:
repository: huggingface/transformers
ref: ${{ env.transformers }}
path: transformers
- name: Prepare OS environment
run: |
sudo apt-get update
sudo apt-get install -y \
espeak-ng \
git-lfs \
pkg-config \
libavcodec-dev \
libavdevice-dev \
libavfilter-dev \
libavformat-dev \
libavutil-dev \
libswresample-dev \
libswscale-dev
git lfs install
- name: Prepare Conda ENV
run: |
which conda && conda clean -ay
conda remove --all -y -n huggingface_transformers_test || rm -rf $(dirname ${CONDA_EXE})/../envs/huggingface_transformers_test
conda create -y -n huggingface_transformers_test python=${{ env.python }}
source activate huggingface_transformers_test
- name: Prepare Stock XPU Pytorch
run: |
pwd
source activate huggingface_transformers_test
if [ -z "${{ inputs.nightly_whl }}" ]; then
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
else
pip install torch==$(echo ${{ inputs.nightly_whl }}) torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
fi
- name: Prepare Transformers
run: |
pwd
source activate huggingface_transformers_test
cd transformers
pip install -e .
pip install -e ".[dev-torch,testing,video]"
rm -rf tests_log && mkdir -p tests_log
rm -rf reports
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./
- name: Report installed versions
run: |
source activate huggingface_transformers_test
echo "pip installed packages:"
pip list | tee ${{ github.workspace }}/transformers/tests_log/pip_list.txt
echo "lspci gpu devices:"
lspci -d ::0380 | tee ${{ github.workspace }}/transformers/tests_log/lspci_0380.txt
echo "GPU render nodes:"
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt
- name: Sanitry check installed packages
run: |
source activate huggingface_transformers_test
# These checks are to exit earlier if for any reason Transformers
# reinstalled torch packages back to CUDA versions (not expected).
pip show torch | grep Version | grep xpu
pip show torchaudio | grep Version | grep xpu
pip show torchvision | grep Version | grep xpu
python -c 'import torch; exit(not torch.xpu.is_available())'
- name: Run -k backbone tests
env:
TEST_CASE: 'tests_backbone'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE -k backbone tests || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Run tests/*.py
env:
TEST_CASE: 'tests_py'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/*.py || true
- name: Run tests/benchmark
env:
TEST_CASE: 'tests_benchmark'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/benchmark || true
- name: Run tests/generation
env:
TEST_CASE: 'tests_generation'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * torch.distributed.* not yet supported by XPU
pattern="not TestFSDPGeneration"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/generation -k "$pattern" || true
- name: Run tests/models
env:
TEST_CASE: 'tests_models'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * https://github.com/huggingface/transformers/issues/35252 (CUDA specific tests)
# * https://github.com/pytorch/pytorch/issues/140965 (aten::_linalg_eigvals)
pattern=" \
not test_model_parallelization and \
not test_model_parallel_equal_results and \
not test_resize_embeddings_untied and \
not test_resize_tokens_embeddings"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/models -k "$pattern" || true
- name: Run tests/pipelines
env:
TEST_CASE: 'tests_pipelines'
run: |
source activate huggingface_transformers_test
cd transformers
# Some tests are known to fail w/o clear pattern
# TODO: drop ||true after triage and fixes
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/pipelines || true
- name: Run tests/trainer
env:
TEST_CASE: 'tests_trainer'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * Some ray tests hang, reason unknown
# * torch.distributed.* not yet supported by XPU
pattern=" \
not ray and \
not TestTrainerDistributed and \
not TestTrainerDistributedXPU and \
not TestFSDPTrainer"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/trainer -k "$pattern" || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Run tests/utils
env:
TEST_CASE: 'tests_utils'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * Network proxy connection issue, reason unknown
pattern="not test_load_img_url_timeout"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/utils -k "$pattern" || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Check for errors in tests
run: |
FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//')
echo "Failed cases: [$(echo $FAILED_CASES | sed 's/,/, /g')]"
test -z "$FAILED_CASES"
- name: Print results table
if: ${{ ! cancelled() }}
run: |
# Helper function to return number preceeding given pattern, i.e:
# === 25 failed, 11 warnings, 0 errors ===
# Call as follows:
# parse_stat $line "failed"
function parse_stat() {
stat=$(cat $1 | grep $2 | sed "s/.* \([0-9]*\) $2.*/\1/")
if [ -n "$stat" ]; then echo $stat; else echo "0"; fi
}
cd transformers
{
echo "### Results"
echo "| Test group | Errors | Failed | Deselected | Passed | Skipped |"
echo "| --- | --- | --- | --- | --- | --- |"
for stat in $(find reports -name stats.txt); do
# Each stat.txt is located in: reports/$test_group/stats.txt
test_group=$(echo $stat | cut -f 2 -d/)
# Get failed, passed, skipped, etc. counters
failed=$(parse_stat $stat failed)
passed=$(parse_stat $stat passed)
deselected=$(parse_stat $stat deselected)
skipped=$(parse_stat $stat skipped)
warnings=$(parse_stat $stat warnings)
errors=$(parse_stat $stat errors)
echo "| $test_group | $errors | $failed | $deselected | $passed | $skipped |"
done
} >> $GITHUB_STEP_SUMMARY
- name: Print failure lines
if: ${{ ! cancelled() }}
run: |
cd transformers
{
echo "### Failure lines"
echo "| Test group |File | Error | Comment |"
echo "| --- | --- | --- | --- |"
rm -rf _failures.txt
for failure in $(find reports -name failures_line.txt); do
# Each failure_line.txt is located in: reports/$test_group/failure_line.txt
test_group=$(echo $failure | cut -f2 -d/)
tail -n +2 $failure | sed "s/^/$test_group /" >> _failures.txt
done
# failures_line.txt file does not have test case information,
# so we can just sort the output and report uniq values
sort _failures.txt | uniq > _failures_uniq.txt
while read line; do
test_group=$(echo $line | cut -f1 -d" ")
file=$(echo $line | cut -f2 -d" " | sed "s/\(.*\):$/\1/")
error=$(echo $line | cut -f3 -d" " | sed "s/\(.*\):$/\1/")
# Failure comments often contain special characters which complicate
# parsing failure lines. But fortunately we know for sure where comments
# start. So we just output all contents starting from this position and
# wrap everything in <pre></pre> to avoid collisions with Markdown formatting.
comment="<pre>$(echo $line | cut -f4- -d' ' | sed 's/\(.*\):$/\1/')</pre>"
echo "| $test_group | $file | $error | $comment |"
done <_failures_uniq.txt
} >> $GITHUB_STEP_SUMMARY
- name: Print not implemented XPU backend ops
run: |
cd transformers
{
echo "### Not implemented ops"
echo "| Test group | Operator | Status |"
echo "| --- | --- | --- |"
rm -rf _ops.txt && touch _ops.txt
for log in $(find reports -name failures_line.txt); do
# Each failure_line.txt is located in: reports/$test_group/failure_line.txt
test_group=$(echo $log | cut -f2 -d/)
ops=$(grep NotImplementedError $log | grep "for the XPU device" | sed "s/.*The operator '\(.*\)' is not.*/\1/")
for op in $ops; do
echo "| $test_group | <pre>$op</pre> | not implemented |" >> _ops.txt
done
done
for log in $(find reports -name warnings.txt); do
# Each warnings.txt is located in: reports/$test_group/warnings.txt
test_group=$(echo $log | cut -f2 -d/)
ops=$(grep UserWarning $log | grep "on the XPU backend" | sed "s/.*The operator '\(.*\) on the XPU.*/\1/")
for op in $ops; do
echo "| $test_group | <pre>$op</pre> | fallback to CPU happens |" >> _ops.txt
done
done
sort _ops.txt | uniq
} >> $GITHUB_STEP_SUMMARY
- name: Print annotations
if: ${{ ! cancelled() }}
run: |
source activate huggingface_transformers_test
{
echo "### Annotations"
echo "| | |"
echo "| --- | --- |"
echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |"
echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |"
echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |"
packages=" \
level-zero \
libigc1 \
libigc2 \
libze1 \
libze-intel-gpu1 \
intel-i915-dkms \
intel-level-zero-gpu \
intel-opencl-icd"
for package in $packages; do
package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/ */ /g" | cut -f3 -d" ")
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
done
packages="accelerate \
numpy \
torch \
torchaudio \
torchvision \
transformers"
for package in $packages; do
package_version=$(python -c "import $package; print($package.__version__)" || true)
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
done
# printing annotations for GPU cards
var="[$(cat /sys/class/drm/render*/device/vendor || true)]"
echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed 's/ /,/g') |"
var="[$(cat /sys/class/drm/render*/device/device || true)]"
echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed 's/ /,/g') |"
var=$(python -c "import torch; print(torch.version.xpu)" || true)
echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |"
var=$(python -c "import torch; print(torch.xpu.device_count())" || true)
echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |"
# printing annotations with key environment variables
echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |"
echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |"
echo "| jobs.$GITHUB_JOB.env.PYTORCH_ENABLE_XPU_FALLBACK | $PYTORCH_ENABLE_XPU_FALLBACK |"
echo "| jobs.$GITHUB_JOB.env.PYTORCH_DEBUG_XPU_FALLBACK | $PYTORCH_DEBUG_XPU_FALLBACK |"
} >> $GITHUB_STEP_SUMMARY
- name: Upload Test log
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Torch-XPU-Transformers-Log-${{ github.event.pull_request.number || github.sha }}
path: |
${{ github.workspace }}/transformers/reports
${{ github.workspace }}/transformers/tests_log