Skip to content

Commit

Permalink
Merge branch 'main' into penghuic/pytest_by_script
Browse files Browse the repository at this point in the history
  • Loading branch information
PenghuiCheng authored Dec 26, 2024
2 parents 05da31c + 7d66fe1 commit dccd2d2
Show file tree
Hide file tree
Showing 14 changed files with 439 additions and 8 deletions.
3 changes: 1 addition & 2 deletions .github/scripts/apply_torch_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
# Fallback to CPU for XPU FP64
"https://github.com/pytorch/pytorch/pull/126516",
# Modify the tolerance level in TIMM benchmark
# "https://github.com/pytorch/pytorch/pull/129735",
"https://github.com/mengfei25/pytorch/pull/21",
"https://github.com/pytorch/pytorch/pull/143739",
]
)
parser.add_argument('--extra-pr-list', '-e', nargs='+',default=[])
Expand Down
2 changes: 2 additions & 0 deletions .github/scripts/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ if [ "$1" != "nightly_wheel" ];then
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source /opt/intel/oneapi/umf/latest/env/vars.sh
source /opt/intel/oneapi/pti/latest/env/vars.sh
source /opt/intel/oneapi/ccl/latest/env/vars.sh
source /opt/intel/oneapi/mpi/latest/env/vars.sh
else
echo "Don't need to source DL-Essential for nightly wheel"
fi
203 changes: 197 additions & 6 deletions .github/workflows/_linux_transformers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }}
transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }}
TRANSFORMERS_TEST_DEVICE_SPEC: 'spec.py'
steps:
- name: Checkout torch-xpu-ops
Expand All @@ -60,7 +61,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: huggingface/transformers
ref: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }}
ref: ${{ env.transformers }}
path: transformers
- name: Prepare OS environment
run: |
Expand Down Expand Up @@ -103,15 +104,16 @@ jobs:
rm -rf reports
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./
- name: Report installed versions
id: installed
run: |
source activate huggingface_transformers_test
echo "TORCH_BRANCH_ID=$(python -c 'import torch; print(torch.__version__)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "pip installed packages:"
pip list | tee ${{ github.workspace }}/transformers/tests_log/pip_list.txt
echo "lspci gpu devices:"
lspci -d ::0380 | tee ${{ github.workspace }}/transformers/tests_log/lspci_0380.txt
echo "GPU render nodes:"
cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt
echo "xpu-smi output:"
xpu-smi discovery -y --json --dump -1
- name: Sanitry check installed packages
run: |
source activate huggingface_transformers_test
Expand All @@ -120,11 +122,200 @@ jobs:
pip show torch | grep Version | grep xpu
pip show torchaudio | grep Version | grep xpu
pip show torchvision | grep Version | grep xpu
- name: Run XPU backbone
python -c 'import torch; exit(not torch.xpu.is_available())'
- name: Run -k backbone tests
env:
TEST_CASE: 'tests_backbone'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=tests_benchmark -k backbone tests
python3 -m pytest -rsf --make-reports=$TEST_CASE -k backbone tests || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Run tests/*.py
env:
TEST_CASE: 'tests_py'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/*.py || true
- name: Run tests/benchmark
env:
TEST_CASE: 'tests_benchmark'
run: |
source activate huggingface_transformers_test
cd transformers
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/benchmark || true
- name: Run tests/generation
env:
TEST_CASE: 'tests_generation'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * torch.distributed.* not yet supported by XPU
pattern="not TestFSDPGeneration"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/generation -k "$pattern" || true
- name: Run tests/models
env:
TEST_CASE: 'tests_models'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * https://github.com/huggingface/transformers/issues/35252 (CUDA specific tests)
# * https://github.com/pytorch/pytorch/issues/140965 (aten::_linalg_eigvals)
pattern=" \
not test_model_parallelization and \
not test_model_parallel_equal_results and \
not test_resize_embeddings_untied and \
not test_resize_tokens_embeddings"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/models -k "$pattern" || true
- name: Run tests/pipelines
env:
TEST_CASE: 'tests_pipelines'
run: |
source activate huggingface_transformers_test
cd transformers
# Some tests are known to fail w/o clear pattern
# TODO: drop ||true after triage and fixes
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/pipelines || true
- name: Run tests/trainer
env:
TEST_CASE: 'tests_trainer'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * Some ray tests hang, reason unknown
# * torch.distributed.* not yet supported by XPU
pattern=" \
not ray and \
not TestTrainerDistributed and \
not TestTrainerDistributedXPU and \
not TestFSDPTrainer"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/trainer -k "$pattern" || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Run tests/utils
env:
TEST_CASE: 'tests_utils'
run: |
source activate huggingface_transformers_test
cd transformers
# Excluding tests due to:
# * Network proxy connection issue, reason unknown
pattern="not test_load_img_url_timeout"
python3 -m pytest -rsf --make-reports=$TEST_CASE tests/utils -k "$pattern" || \
(echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
- name: Check for errors in tests
run: |
FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//')
echo "Failed cases: [$(echo $FAILED_CASES | sed 's/,/, /g')]"
test -z "$FAILED_CASES"
- name: Print results table
if: ${{ ! cancelled() }}
run: |
# Helper function to return number preceeding given pattern, i.e:
# === 25 failed, 11 warnings, 0 errors ===
# Call as follows:
# parse_stat $line "failed"
function parse_stat() {
stat=$(cat $1 | grep $2 | sed "s/.* \([0-9]*\) $2.*/\1/")
if [ -n "$stat" ]; then echo $stat; else echo "0"; fi
}
cd transformers
{
echo "### Results"
echo "| Test group | Errors | Failed | Deselected | Passed | Skipped |"
echo "| --- | --- | --- | --- | --- | --- |"
for stat in $(find reports -name stats.txt); do
# Each stat.txt is located in: reports/$test_group/stats.txt
test_group=$(echo $stat | cut -f 2 -d/)
# Get failed, passed, skipped, etc. counters
failed=$(parse_stat $stat failed)
passed=$(parse_stat $stat passed)
deselected=$(parse_stat $stat deselected)
skipped=$(parse_stat $stat skipped)
warnings=$(parse_stat $stat warnings)
errors=$(parse_stat $stat errors)
echo "| $test_group | $errors | $failed | $deselected | $passed | $skipped |"
done
} >> $GITHUB_STEP_SUMMARY
- name: Print failure lines
if: ${{ ! cancelled() }}
run: |
cd transformers
{
echo "### Failure lines"
echo "| Test group |File | Error | Comment |"
echo "| --- | --- | --- | --- |"
rm -rf _failures.txt
for failure in $(find reports -name failures_line.txt); do
# Each failure_line.txt is located in: reports/$test_group/failure_line.txt
test_group=$(echo $failure | cut -f2 -d/)
tail -n +2 $failure | sed "s/^/$test_group /" >> _failures.txt
done
# failures_line.txt file does not have test case information,
# so we can just sort the output and report uniq values
sort _failures.txt | uniq > _failures_uniq.txt
while read line; do
test_group=$(echo $line | cut -f1 -d" ")
file=$(echo $line | cut -f2 -d" " | sed "s/\(.*\):$/\1/")
error=$(echo $line | cut -f3 -d" " | sed "s/\(.*\):$/\1/")
# Failure comments often contain special characters which complicate
# parsing failure lines. But fortunately we know for sure where comments
# start. So we just output all contents starting from this position and
# wrap everything in <pre></pre> to avoid collisions with Markdown formatting.
comment="<pre>$(echo $line | cut -f4- -d' ' | sed 's/\(.*\):$/\1/')</pre>"
echo "| $test_group | $file | $error | $comment |"
done <_failures_uniq.txt
} >> $GITHUB_STEP_SUMMARY
- name: Print annotations
if: ${{ ! cancelled() }}
run: |
source activate huggingface_transformers_test
{
echo "### Annotations"
echo "| | |"
echo "| --- | --- |"
echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |"
echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |"
echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |"
packages=" \
level-zero \
libigc1 \
libigc2 \
libze1 \
libze-intel-gpu1 \
intel-i915-dkms \
intel-level-zero-gpu \
intel-opencl-icd"
for package in $packages; do
package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/ */ /g" | cut -f3 -d" ")
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
done
packages="accelerate \
numpy \
torch \
torchaudio \
torchvision \
transformers"
for package in $packages; do
package_version=$(python -c "import $package; print($package.__version__)" || true)
echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
done
# printing annotations for GPU cards
var="[$(cat /sys/class/drm/render*/device/vendor || true)]"
echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed 's/ /,/g') |"
var="[$(cat /sys/class/drm/render*/device/device || true)]"
echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed 's/ /,/g') |"
var=$(python -c "import torch; print(torch.version.xpu)" || true)
echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |"
var=$(python -c "import torch; print(torch.xpu.device_count())" || true)
echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |"
# printing annotations with key environment variables
echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |"
echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |"
} >> $GITHUB_STEP_SUMMARY
- name: Upload Test log
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/transformers/SDPUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace sdp {

using c10::array_of;

bool check_all_tensors_on_device(sdp_params const& params, bool debug) {
// Check that all tensors are on the GPU device
// This should be handled by the stub dispatch, but whe call
Expand Down
22 changes: 22 additions & 0 deletions test/xpu/extended/run_test_with_skip_bmg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import pytest
import sys
from skip_list_common import skip_dict
from skip_list_win import skip_dict as skip_dict_win
from skip_list_win_bmg import skip_dict as skip_dict_win_bmg

IS_WINDOWS = sys.platform == "win32"

skip_list = skip_dict["test_ops_xpu.py"]
if IS_WINDOWS:
skip_list += skip_dict_win["test_ops_xpu.py"] + skip_dict_win_bmg["test_ops_xpu.py"]

skip_options = "not " + skip_list[0]
for skip_case in skip_list[1:]:
skip_option = " and not " + skip_case
skip_options += skip_option

os.environ["PYTORCH_TEST_WITH_SLOW"]="1"
test_command = ["-k", skip_options, "test_ops_xpu.py", "-v"]
res = pytest.main(test_command)
sys.exit(res)
22 changes: 22 additions & 0 deletions test/xpu/extended/run_test_with_skip_lnl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import pytest
import sys
from skip_list_common import skip_dict
from skip_list_win import skip_dict as skip_dict_win
from skip_list_win_lnl import skip_dict as skip_dict_win_lnl

IS_WINDOWS = sys.platform == "win32"

skip_list = skip_dict["test_ops_xpu.py"]
if IS_WINDOWS:
skip_list += skip_dict_win["test_ops_xpu.py"] + skip_dict_win_lnl["test_ops_xpu.py"]

skip_options = "not " + skip_list[0]
for skip_case in skip_list[1:]:
skip_option = " and not " + skip_case
skip_options += skip_option

os.environ["PYTORCH_TEST_WITH_SLOW"]="1"
test_command = ["-k", skip_options, "test_ops_xpu.py", "-v"]
res = pytest.main(test_command)
sys.exit(res)
22 changes: 22 additions & 0 deletions test/xpu/extended/run_test_with_skip_mtl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import pytest
import sys
from skip_list_common import skip_dict
from skip_list_win import skip_dict as skip_dict_win
from skip_list_win_mtl import skip_dict as skip_dict_win_mtl

IS_WINDOWS = sys.platform == "win32"

skip_list = skip_dict["test_ops_xpu.py"]
if IS_WINDOWS:
skip_list += skip_dict_win["test_ops_xpu.py"] + skip_dict_win_mtl["test_ops_xpu.py"]

skip_options = "not " + skip_list[0]
for skip_case in skip_list[1:]:
skip_option = " and not " + skip_case
skip_options += skip_option

os.environ["PYTORCH_TEST_WITH_SLOW"]="1"
test_command = ["-k", skip_options, "test_ops_xpu.py", "-v"]
res = pytest.main(test_command)
sys.exit(res)
13 changes: 13 additions & 0 deletions test/xpu/extended/skip_list_win_bmg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
skip_dict = {
"test_ops_xpu.py": (
# https://github.com/intel/torch-xpu-ops/issues/1173
# Fatal Python error: Illegal instruction
"test_compare_cpu_grid_sampler_2d_xpu_float64",
"test_compare_cpu_cosh_xpu_complex64",
"test_compare_cpu_nn_functional_softshrink_xpu_bfloat16",
"test_compare_cpu_nn_functional_softshrink_xpu_float16",
"test_compare_cpu_nn_functional_softshrink_xpu_float32",
"test_compare_cpu_nn_functional_softshrink_xpu_float64",
"test_compare_cpu_square_xpu_complex128",
),
}
13 changes: 13 additions & 0 deletions test/xpu/extended/skip_list_win_lnl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
skip_dict = {
"test_ops_xpu.py": (
# https://github.com/intel/torch-xpu-ops/issues/1173
# Fatal Python error: Illegal instruction
"test_compare_cpu_grid_sampler_2d_xpu_float64",
"test_compare_cpu_cosh_xpu_complex64",
"test_compare_cpu_nn_functional_softshrink_xpu_bfloat16",
"test_compare_cpu_nn_functional_softshrink_xpu_float16",
"test_compare_cpu_nn_functional_softshrink_xpu_float32",
"test_compare_cpu_nn_functional_softshrink_xpu_float64",
"test_compare_cpu_square_xpu_complex128",
),
}
20 changes: 20 additions & 0 deletions test/xpu/extended/skip_list_win_mtl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
skip_dict = {
# failed on MTL windows, skip first for Preci
"test_ops_xpu.py": (
"test_compare_cpu_sqrt_xpu_complex64",
"test_backward_nn_functional_adaptive_avg_pool2d_xpu_float32",

"test_compare_cpu_cosh_xpu_complex128",
"test_compare_cpu_frexp_xpu_bfloat16",
"test_compare_cpu_frexp_xpu_float16",
"test_compare_cpu_frexp_xpu_float32",
"test_compare_cpu_frexp_xpu_float64",
"test_compare_cpu_max_pool2d_with_indices_backward_xpu_bfloat16",
"test_compare_cpu_max_pool2d_with_indices_backward_xpu_float16",
"test_compare_cpu_max_pool2d_with_indices_backward_xpu_float32",
"test_compare_cpu_max_pool2d_with_indices_backward_xpu_float64",
"test_compare_cpu_nn_functional_avg_pool2d_xpu_bfloat16",
"test_compare_cpu_nn_functional_avg_pool2d_xpu_float32",
"test_compare_cpu_nn_functional_avg_pool3d_xpu_float32",
),
}
Loading

0 comments on commit dccd2d2

Please sign in to comment.