From e2e802f5ee117b1544eb75928a7c15eccdfe12fa Mon Sep 17 00:00:00 2001
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Date: Fri, 20 Dec 2024 08:39:23 -0800
Subject: [PATCH 1/7] ci: print annotations for key package versions in
 transformers test (#1184)

Annotations are available on a summary page of executed workflow.

---------

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 .github/workflows/_linux_transformers.yml | 50 +++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml
index fd099fcb6..95aee8e7e 100644
--- a/.github/workflows/_linux_transformers.yml
+++ b/.github/workflows/_linux_transformers.yml
@@ -50,6 +50,7 @@ jobs:
       DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
       python: ${{ inputs.python != '' && inputs.python || '3.10' }}
       pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }}
+      transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }}
       TRANSFORMERS_TEST_DEVICE_SPEC: 'spec.py'
     steps:
       - name: Checkout torch-xpu-ops
@@ -60,7 +61,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           repository: huggingface/transformers
-          ref: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }}
+          ref: ${{ env.transformers }}
           path: transformers
       - name: Prepare OS environment
         run: |
@@ -106,12 +107,54 @@ jobs:
         id: installed
         run: |
           source activate huggingface_transformers_test
-          echo "TORCH_BRANCH_ID=$(python -c 'import torch; print(torch.__version__)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
-          echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
           echo "pip installed packages:"
           pip list | tee ${{ github.workspace }}/transformers/tests_log/pip_list.txt
+          echo "lspci gpu devices:"
+          lspci -d ::0380 | tee ${{ github.workspace }}/transformers/tests_log/lspci_0380.txt
           echo "GPU render nodes:"
           cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt
+          # printing annotations for the key packages
+          echo "### Annotations" >> $GITHUB_STEP_SUMMARY
+          echo "| | |" >> $GITHUB_STEP_SUMMARY
+          echo "| --- | --- |" >> $GITHUB_STEP_SUMMARY
+          echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |" >> $GITHUB_STEP_SUMMARY
+          echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |" >> $GITHUB_STEP_SUMMARY
+          echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |" >> $GITHUB_STEP_SUMMARY
+          packages=" \
+            level-zero \
+            libigc1 \
+            libigc2 \
+            libze1 \
+            libze-intel-gpu1 \
+            intel-i915-dkms \
+            intel-level-zero-gpu \
+            intel-opencl-icd"
+          for package in $packages; do
+            package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/  */ /g" | cut -f3 -d" ")
+            echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" >> $GITHUB_STEP_SUMMARY
+          done
+          packages="accelerate \
+            numpy \
+            torch \
+            torchaudio \
+            torchvision \
+            transformers"
+          for package in $packages; do
+            package_version=$(python -c "import $package; print($package.__version__)")
+            echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" >> $GITHUB_STEP_SUMMARY
+          done
+          # printing annotations for GPU cards
+          var="[$(cat /sys/class/drm/render*/device/vendor)]"
+          echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed "s/ /,/g") |" >> $GITHUB_STEP_SUMMARY
+          var="[$(cat /sys/class/drm/render*/device/device)]"
+          echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed "s/ /,/g") |" >> $GITHUB_STEP_SUMMARY
+          var=$(python -c "import torch; print(torch.version.xpu)")
+          echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |" >> $GITHUB_STEP_SUMMARY
+          var=$(python -c "import torch; print(torch.xpu.device_count())")
+          echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |" >> $GITHUB_STEP_SUMMARY
+          # printing annotations with key environment variables
+          echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |" >> $GITHUB_STEP_SUMMARY
+          echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |" >> $GITHUB_STEP_SUMMARY
       - name: Sanitry check installed packages
         run: |
           source activate huggingface_transformers_test
@@ -120,6 +163,7 @@ jobs:
           pip show torch | grep Version | grep xpu
           pip show torchaudio | grep Version | grep xpu
           pip show torchvision | grep Version | grep xpu
+          python -c 'import torch; exit(not torch.xpu.is_available())'
       - name: Run XPU backbone
         run: |
           source activate huggingface_transformers_test

From 7137aeb9fcda0ce3344b9d02a1d47fc8bc35f430 Mon Sep 17 00:00:00 2001
From: "Wang, Chuanqi" <chuanqi.wang@intel.com>
Date: Sun, 22 Dec 2024 20:04:31 +0800
Subject: [PATCH 2/7] [CI] Add ccl and mpi env source for XCCL backend related
 PR test (#1179)

---
 .github/scripts/env.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh
index 4fd192c06..9cfd67477 100644
--- a/.github/scripts/env.sh
+++ b/.github/scripts/env.sh
@@ -4,6 +4,8 @@ if [ "$1" != "nightly_wheel" ];then
     source /opt/intel/oneapi/compiler/latest/env/vars.sh
     source /opt/intel/oneapi/umf/latest/env/vars.sh
     source /opt/intel/oneapi/pti/latest/env/vars.sh
+    source /opt/intel/oneapi/ccl/latest/env/vars.sh
+    source /opt/intel/oneapi/mpi/latest/env/vars.sh
 else
     echo "Don't need to source DL-Essential for nightly wheel"
 fi

From 6899263daf50a9848b47c561a6e08d72991f52dc Mon Sep 17 00:00:00 2001
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Date: Sun, 22 Dec 2024 21:14:13 -0800
Subject: [PATCH 3/7] ci/transformers: add pipeline and trainer tests (#1185)

Changes:
* Add testing of tests/pipelines
* Add testing of tests/trainer
* Add printing results summary (to workload summary page)
* Add printing failure lines (to workload summary page)

Summary page contains sections in this order:
* Results summary
* Failure lines
* Annotations

---------

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 .github/workflows/_linux_transformers.yml | 168 ++++++++++++++++------
 1 file changed, 123 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml
index 95aee8e7e..65dde1b6d 100644
--- a/.github/workflows/_linux_transformers.yml
+++ b/.github/workflows/_linux_transformers.yml
@@ -104,7 +104,6 @@ jobs:
           rm -rf reports
           cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./
       - name: Report installed versions
-        id: installed
         run: |
           source activate huggingface_transformers_test
           echo "pip installed packages:"
@@ -113,48 +112,6 @@ jobs:
           lspci -d ::0380 | tee ${{ github.workspace }}/transformers/tests_log/lspci_0380.txt
           echo "GPU render nodes:"
           cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt
-          # printing annotations for the key packages
-          echo "### Annotations" >> $GITHUB_STEP_SUMMARY
-          echo "| | |" >> $GITHUB_STEP_SUMMARY
-          echo "| --- | --- |" >> $GITHUB_STEP_SUMMARY
-          echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |" >> $GITHUB_STEP_SUMMARY
-          echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |" >> $GITHUB_STEP_SUMMARY
-          echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |" >> $GITHUB_STEP_SUMMARY
-          packages=" \
-            level-zero \
-            libigc1 \
-            libigc2 \
-            libze1 \
-            libze-intel-gpu1 \
-            intel-i915-dkms \
-            intel-level-zero-gpu \
-            intel-opencl-icd"
-          for package in $packages; do
-            package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/  */ /g" | cut -f3 -d" ")
-            echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" >> $GITHUB_STEP_SUMMARY
-          done
-          packages="accelerate \
-            numpy \
-            torch \
-            torchaudio \
-            torchvision \
-            transformers"
-          for package in $packages; do
-            package_version=$(python -c "import $package; print($package.__version__)")
-            echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" >> $GITHUB_STEP_SUMMARY
-          done
-          # printing annotations for GPU cards
-          var="[$(cat /sys/class/drm/render*/device/vendor)]"
-          echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed "s/ /,/g") |" >> $GITHUB_STEP_SUMMARY
-          var="[$(cat /sys/class/drm/render*/device/device)]"
-          echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed "s/ /,/g") |" >> $GITHUB_STEP_SUMMARY
-          var=$(python -c "import torch; print(torch.version.xpu)")
-          echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |" >> $GITHUB_STEP_SUMMARY
-          var=$(python -c "import torch; print(torch.xpu.device_count())")
-          echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |" >> $GITHUB_STEP_SUMMARY
-          # printing annotations with key environment variables
-          echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |" >> $GITHUB_STEP_SUMMARY
-          echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |" >> $GITHUB_STEP_SUMMARY
       - name: Sanitry check installed packages
         run: |
           source activate huggingface_transformers_test
@@ -164,11 +121,132 @@ jobs:
           pip show torchaudio | grep Version | grep xpu
           pip show torchvision | grep Version | grep xpu
           python -c 'import torch; exit(not torch.xpu.is_available())'
-      - name: Run XPU backbone
+      - name: Run -k backbone tests
         run: |
           source activate huggingface_transformers_test
           cd transformers
-          python3 -m pytest -rsf --make-reports=tests_benchmark -k backbone tests
+          python3 -m pytest -rsf --make-reports=tests_backbone -k backbone tests
+      - name: Run tests/pipelines
+        run: |
+          source activate huggingface_transformers_test
+          cd transformers
+          # Some tests are known to fail w/o clear pattern
+          # TODO: drop ||true after triage and fixes
+          python3 -m pytest -rsf --make-reports=tests_pipelines tests/pipelines || true
+      - name: Run tests/trainer
+        run: |
+          source activate huggingface_transformers_test
+          cd transformers
+          # Excluding tests due to:
+          # * Some ray tests hang, reason unknown
+          # * torch.distributed.* not yet supported by XPU
+          pattern=" \
+            not ray and \
+            not TestTrainerDistributed and \
+            not TestTrainerDistributedXPU and \
+            not TestFSDPTrainer"
+          python3 -m pytest -rsf --make-reports=tests_trainer tests/trainer -k "$pattern"
+      - name: Print results table
+        if: ${{ ! cancelled() }}
+        run: |
+          # Helper function to return number preceeding given pattern, i.e:
+          #   === 25 failed, 11 warnings, 0 errors ===
+          # Call as follows:
+          #   parse_stat $line "failed"
+          function parse_stat() {
+            stat=$(cat $1 | grep $2 | sed "s/.* \([0-9]*\) $2.*/\1/")
+            if [ -n "$stat" ]; then echo $stat; else echo "0"; fi
+          }
+          cd transformers
+          {
+            echo "### Results"
+            echo "| Test group | Errors | Failed | Passed | Skipped |"
+            echo "| --- | --- | --- | --- | --- |"
+            for stat in $(find reports -name stats.txt); do
+              # Each stat.txt is located in: reports/$test_group/stats.txt
+              test_group=$(echo $stat | cut -f 2 -d/)
+              # Get failed, passed, skipped, etc. counters
+              failed=$(parse_stat $stat failed)
+              passed=$(parse_stat $stat passed)
+              skipped=$(parse_stat $stat skipped)
+              warnings=$(parse_stat $stat warnings)
+              errors=$(parse_stat $stat errors)
+              echo "| $test_group | $errors | $failed | $passed | $skipped |"
+            done
+          } >> $GITHUB_STEP_SUMMARY
+      - name: Print failure lines
+        if: ${{ ! cancelled() }}
+        run: |
+          cd transformers
+          {
+            echo "### Failure lines"
+            echo "| File | Error | Comment |"
+            echo "| --- | --- | --- |"
+            rm -rf _failures.txt
+            for failure in $(find reports -name failures_line.txt); do
+              tail -n +2 $failure >> _failures.txt
+            done
+            # failures_line.txt file does not have test case information,
+            # so we can just sort the output and report uniq values
+            sort _failures.txt | uniq > _failures_uniq.txt
+            while read line; do
+              file=$(echo $line | cut -f1 -d" " | sed "s/\(.*\):$/\1/")
+              error=$(echo $line | cut -f2 -d" " | sed "s/\(.*\):$/\1/")
+              # Failure comments often contain special characters which complicate
+              # parsing failure lines. But fortunately we know for sure where comments
+              # start. So we just output all contents starting from this position and
+              # wrap everything in <pre></pre> to avoid collisions with Markdown formatting.
+              comment="<pre>$(echo $line | cut -f3- -d' ' | sed 's/\(.*\):$/\1/')</pre>"
+              echo "| $file | $error | $comment |"
+            done <_failures_uniq.txt
+          } >> $GITHUB_STEP_SUMMARY
+      - name: Print annotations
+        if: ${{ ! cancelled() }}
+        run: |
+          source activate huggingface_transformers_test
+          {
+            echo "### Annotations"
+            echo "| | |"
+            echo "| --- | --- |"
+            echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |"
+            echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |"
+            echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |"
+            packages=" \
+              level-zero \
+              libigc1 \
+              libigc2 \
+              libze1 \
+              libze-intel-gpu1 \
+              intel-i915-dkms \
+              intel-level-zero-gpu \
+              intel-opencl-icd"
+            for package in $packages; do
+              package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/  */ /g" | cut -f3 -d" ")
+              echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
+            done
+            packages="accelerate \
+              numpy \
+              torch \
+              torchaudio \
+              torchvision \
+              transformers"
+            for package in $packages; do
+              package_version=$(python -c "import $package; print($package.__version__)" || true)
+              echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |"
+            done
+            # printing annotations for GPU cards
+            var="[$(cat /sys/class/drm/render*/device/vendor || true)]"
+            echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed 's/ /,/g') |"
+            var="[$(cat /sys/class/drm/render*/device/device || true)]"
+            echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed 's/ /,/g') |"
+            var=$(python -c "import torch; print(torch.version.xpu)" || true)
+            echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |"
+            var=$(python -c "import torch; print(torch.xpu.device_count())" || true)
+            echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |"
+            # printing annotations with key environment variables
+            echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |"
+            echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |"
+          } >> $GITHUB_STEP_SUMMARY
       - name: Upload Test log
         if: ${{ ! cancelled() }}
         uses: actions/upload-artifact@v4

From 212ee906b6d1aa4c0360395803cc2ee43b2d9741 Mon Sep 17 00:00:00 2001
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Date: Mon, 23 Dec 2024 18:09:35 -0800
Subject: [PATCH 4/7] ci/transformers: run tests in utils, benchmark,
 generation, models (#1190)

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 .github/workflows/_linux_transformers.yml | 93 +++++++++++++++++++----
 1 file changed, 80 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml
index 65dde1b6d..f79227658 100644
--- a/.github/workflows/_linux_transformers.yml
+++ b/.github/workflows/_linux_transformers.yml
@@ -122,18 +122,64 @@ jobs:
           pip show torchvision | grep Version | grep xpu
           python -c 'import torch; exit(not torch.xpu.is_available())'
       - name: Run -k backbone tests
+        env:
+          TEST_CASE: 'tests_backbone'
         run: |
           source activate huggingface_transformers_test
           cd transformers
-          python3 -m pytest -rsf --make-reports=tests_backbone -k backbone tests
+          python3 -m pytest -rsf --make-reports=$TEST_CASE -k backbone tests || \
+            (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
+      - name: Run tests/*.py
+        env:
+          TEST_CASE: 'tests_py'
+        run: |
+          source activate huggingface_transformers_test
+          cd transformers
+          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/*.py || true
+      - name: Run tests/benchmark
+        env:
+          TEST_CASE: 'tests_benchmark'
+        run: |
+          source activate huggingface_transformers_test
+          cd transformers
+          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/benchmark || true
+      - name: Run tests/generation
+        env:
+          TEST_CASE: 'tests_generation'
+        run: |
+          source activate huggingface_transformers_test
+          cd transformers
+          # Excluding tests due to:
+          # * torch.distributed.* not yet supported by XPU
+          pattern="not TestFSDPGeneration"
+          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/generation -k "$pattern" || true
+      - name: Run tests/models
+        env:
+          TEST_CASE: 'tests_models'
+        run: |
+          source activate huggingface_transformers_test
+          cd transformers
+          # Excluding tests due to:
+          # * https://github.com/huggingface/transformers/issues/35252 (CUDA specific tests)
+          # * https://github.com/pytorch/pytorch/issues/140965 (aten::_linalg_eigvals)
+          pattern=" \
+            not test_model_parallelization and \
+            not test_model_parallel_equal_results and \
+            not test_resize_embeddings_untied and \
+            not test_resize_tokens_embeddings"
+          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/models -k "$pattern" || true
       - name: Run tests/pipelines
+        env:
+          TEST_CASE: 'tests_pipelines'
         run: |
           source activate huggingface_transformers_test
           cd transformers
           # Some tests are known to fail w/o clear pattern
           # TODO: drop ||true after triage and fixes
-          python3 -m pytest -rsf --make-reports=tests_pipelines tests/pipelines || true
+          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/pipelines || true
       - name: Run tests/trainer
+        env:
+          TEST_CASE: 'tests_trainer'
         run: |
           source activate huggingface_transformers_test
           cd transformers
@@ -145,7 +191,24 @@ jobs:
             not TestTrainerDistributed and \
             not TestTrainerDistributedXPU and \
             not TestFSDPTrainer"
-          python3 -m pytest -rsf --make-reports=tests_trainer tests/trainer -k "$pattern"
+          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/trainer -k "$pattern" || \
+            (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
+      - name: Run tests/utils
+        env:
+          TEST_CASE: 'tests_utils'
+        run: |
+          source activate huggingface_transformers_test
+          cd transformers
+          # Excluding tests due to:
+          # * Network proxy connection issue, reason unknown
+          pattern="not test_load_img_url_timeout"
+          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/utils -k "$pattern" || \
+            (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
+      - name: Check for errors in tests
+        run: |
+          FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//')
+          echo "Failed cases: [$(echo $FAILED_CASES | sed 's/,/, /g')]"
+          test -z "$FAILED_CASES"
       - name: Print results table
         if: ${{ ! cancelled() }}
         run: |
@@ -160,18 +223,19 @@ jobs:
           cd transformers
           {
             echo "### Results"
-            echo "| Test group | Errors | Failed | Passed | Skipped |"
-            echo "| --- | --- | --- | --- | --- |"
+            echo "| Test group | Errors | Failed | Deselected | Passed | Skipped |"
+            echo "| --- | --- | --- | --- | --- | --- |"
             for stat in $(find reports -name stats.txt); do
               # Each stat.txt is located in: reports/$test_group/stats.txt
               test_group=$(echo $stat | cut -f 2 -d/)
               # Get failed, passed, skipped, etc. counters
               failed=$(parse_stat $stat failed)
               passed=$(parse_stat $stat passed)
+              deselected=$(parse_stat $stat deselected)
               skipped=$(parse_stat $stat skipped)
               warnings=$(parse_stat $stat warnings)
               errors=$(parse_stat $stat errors)
-              echo "| $test_group | $errors | $failed | $passed | $skipped |"
+              echo "| $test_group | $errors | $failed | $deselected | $passed | $skipped |"
             done
           } >> $GITHUB_STEP_SUMMARY
       - name: Print failure lines
@@ -180,24 +244,27 @@ jobs:
           cd transformers
           {
             echo "### Failure lines"
-            echo "| File | Error | Comment |"
-            echo "| --- | --- | --- |"
+            echo "| Test group |File | Error | Comment |"
+            echo "| --- | --- | --- | --- |"
             rm -rf _failures.txt
             for failure in $(find reports -name failures_line.txt); do
-              tail -n +2 $failure >> _failures.txt
+              # Each failure_line.txt is located in: reports/$test_group/failure_line.txt
+              test_group=$(echo $failure | cut -f2 -d/)
+              tail -n +2 $failure | sed "s/^/$test_group /" >> _failures.txt
             done
             # failures_line.txt file does not have test case information,
             # so we can just sort the output and report uniq values
             sort _failures.txt | uniq > _failures_uniq.txt
             while read line; do
-              file=$(echo $line | cut -f1 -d" " | sed "s/\(.*\):$/\1/")
-              error=$(echo $line | cut -f2 -d" " | sed "s/\(.*\):$/\1/")
+              test_group=$(echo $line | cut -f1 -d" ")
+              file=$(echo $line | cut -f2 -d" " | sed "s/\(.*\):$/\1/")
+              error=$(echo $line | cut -f3 -d" " | sed "s/\(.*\):$/\1/")
               # Failure comments often contain special characters which complicate
               # parsing failure lines. But fortunately we know for sure where comments
               # start. So we just output all contents starting from this position and
               # wrap everything in <pre></pre> to avoid collisions with Markdown formatting.
-              comment="<pre>$(echo $line | cut -f3- -d' ' | sed 's/\(.*\):$/\1/')</pre>"
-              echo "| $file | $error | $comment |"
+              comment="<pre>$(echo $line | cut -f4- -d' ' | sed 's/\(.*\):$/\1/')</pre>"
+              echo "| $test_group | $file | $error | $comment |"
             done <_failures_uniq.txt
           } >> $GITHUB_STEP_SUMMARY
       - name: Print annotations

From bc99386b32af44a1122be154e4689f72d4d0fbef Mon Sep 17 00:00:00 2001
From: Yutao Xu <yutao.xu@intel.com>
Date: Tue, 24 Dec 2024 16:22:50 +0800
Subject: [PATCH 5/7] Apply new tolerance modification patch for E2E (#1203)

Fixing building error related to the patch:
https://github.com/pytorch/pytorch/pull/129735.
Apply https://github.com/pytorch/pytorch/pull/143739 for replacement.
---
 .github/scripts/apply_torch_pr.py         | 3 +--
 src/ATen/native/transformers/SDPUtils.cpp | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/apply_torch_pr.py b/.github/scripts/apply_torch_pr.py
index 89fa32fdf..bbe89ed7d 100644
--- a/.github/scripts/apply_torch_pr.py
+++ b/.github/scripts/apply_torch_pr.py
@@ -12,8 +12,7 @@
         # Fallback to CPU for XPU FP64
         "https://github.com/pytorch/pytorch/pull/126516",
         # Modify the tolerance level in TIMM benchmark
-        # "https://github.com/pytorch/pytorch/pull/129735",
-        "https://github.com/mengfei25/pytorch/pull/21",
+        "https://github.com/pytorch/pytorch/pull/143739",
     ]
 )
 parser.add_argument('--extra-pr-list', '-e', nargs='+',default=[])
diff --git a/src/ATen/native/transformers/SDPUtils.cpp b/src/ATen/native/transformers/SDPUtils.cpp
index db4409493..eca5f9829 100644
--- a/src/ATen/native/transformers/SDPUtils.cpp
+++ b/src/ATen/native/transformers/SDPUtils.cpp
@@ -4,6 +4,8 @@
 
 namespace sdp {
 
+using c10::array_of;
+
 bool check_all_tensors_on_device(sdp_params const& params, bool debug) {
   // Check that all tensors are on the GPU device
   // This should be handled by the stub dispatch, but whe call

From 0f48ac07e42ce30d2d07447f4b49bb4ab23f8e64 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Wed, 25 Dec 2024 09:10:36 +0800
Subject: [PATCH 6/7] Add skip lists for LNL, BGM and MTL devices (#1187)

Add skip lists for LNL, BGM and MTL devices

---------

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
Signed-off-by: Cheng <penghui.cheng@intel.com>
---
 test/xpu/extended/run_test_with_skip_bmg.py | 22 ++++++++++++
 test/xpu/extended/run_test_with_skip_lnl.py | 22 ++++++++++++
 test/xpu/extended/run_test_with_skip_mtl.py | 22 ++++++++++++
 test/xpu/extended/skip_list_win_bmg.py      | 13 +++++++
 test/xpu/extended/skip_list_win_lnl.py      | 13 +++++++
 test/xpu/extended/skip_list_win_mtl.py      | 20 +++++++++++
 test/xpu/run_test_with_skip_bmg.py          | 24 +++++++++++++
 test/xpu/run_test_with_skip_lnl.py          | 24 +++++++++++++
 test/xpu/skip_list_win_bmg.py               | 39 +++++++++++++++++++++
 test/xpu/skip_list_win_lnl.py               | 38 ++++++++++++++++++++
 10 files changed, 237 insertions(+)
 create mode 100644 test/xpu/extended/run_test_with_skip_bmg.py
 create mode 100644 test/xpu/extended/run_test_with_skip_lnl.py
 create mode 100644 test/xpu/extended/run_test_with_skip_mtl.py
 create mode 100644 test/xpu/extended/skip_list_win_bmg.py
 create mode 100644 test/xpu/extended/skip_list_win_lnl.py
 create mode 100644 test/xpu/extended/skip_list_win_mtl.py
 create mode 100644 test/xpu/run_test_with_skip_bmg.py
 create mode 100644 test/xpu/run_test_with_skip_lnl.py
 create mode 100644 test/xpu/skip_list_win_bmg.py
 create mode 100644 test/xpu/skip_list_win_lnl.py

diff --git a/test/xpu/extended/run_test_with_skip_bmg.py b/test/xpu/extended/run_test_with_skip_bmg.py
new file mode 100644
index 000000000..6499550f5
--- /dev/null
+++ b/test/xpu/extended/run_test_with_skip_bmg.py
@@ -0,0 +1,22 @@
+import os
+import pytest
+import sys
+from skip_list_common import skip_dict
+from skip_list_win import skip_dict as skip_dict_win
+from skip_list_win_bmg import skip_dict as skip_dict_win_bmg
+
+IS_WINDOWS = sys.platform == "win32"
+
+skip_list = skip_dict["test_ops_xpu.py"]
+if IS_WINDOWS:
+    skip_list += skip_dict_win["test_ops_xpu.py"] + skip_dict_win_bmg["test_ops_xpu.py"]
+
+skip_options = "not " + skip_list[0]
+for skip_case in skip_list[1:]:
+    skip_option = " and not " + skip_case
+    skip_options += skip_option
+
+os.environ["PYTORCH_TEST_WITH_SLOW"]="1"
+test_command = ["-k", skip_options, "test_ops_xpu.py", "-v"]
+res = pytest.main(test_command)
+sys.exit(res)
diff --git a/test/xpu/extended/run_test_with_skip_lnl.py b/test/xpu/extended/run_test_with_skip_lnl.py
new file mode 100644
index 000000000..a795ca07a
--- /dev/null
+++ b/test/xpu/extended/run_test_with_skip_lnl.py
@@ -0,0 +1,22 @@
+import os
+import pytest
+import sys
+from skip_list_common import skip_dict
+from skip_list_win import skip_dict as skip_dict_win
+from skip_list_win_lnl import skip_dict as skip_dict_win_lnl
+
+IS_WINDOWS = sys.platform == "win32"
+
+skip_list = skip_dict["test_ops_xpu.py"]
+if IS_WINDOWS:
+    skip_list += skip_dict_win["test_ops_xpu.py"] + skip_dict_win_lnl["test_ops_xpu.py"]
+
+skip_options = "not " + skip_list[0]
+for skip_case in skip_list[1:]:
+    skip_option = " and not " + skip_case
+    skip_options += skip_option
+
+os.environ["PYTORCH_TEST_WITH_SLOW"]="1"
+test_command = ["-k", skip_options, "test_ops_xpu.py", "-v"]
+res = pytest.main(test_command)
+sys.exit(res)
diff --git a/test/xpu/extended/run_test_with_skip_mtl.py b/test/xpu/extended/run_test_with_skip_mtl.py
new file mode 100644
index 000000000..6ed39a64e
--- /dev/null
+++ b/test/xpu/extended/run_test_with_skip_mtl.py
@@ -0,0 +1,22 @@
+import os
+import pytest
+import sys
+from skip_list_common import skip_dict
+from skip_list_win import skip_dict as skip_dict_win
+from skip_list_win_mtl import skip_dict as skip_dict_win_mtl
+
+IS_WINDOWS = sys.platform == "win32"
+
+skip_list = skip_dict["test_ops_xpu.py"]
+if IS_WINDOWS:
+    skip_list += skip_dict_win["test_ops_xpu.py"] + skip_dict_win_mtl["test_ops_xpu.py"]
+
+skip_options = "not " + skip_list[0]
+for skip_case in skip_list[1:]:
+    skip_option = " and not " + skip_case
+    skip_options += skip_option
+
+os.environ["PYTORCH_TEST_WITH_SLOW"]="1"
+test_command = ["-k", skip_options, "test_ops_xpu.py", "-v"]
+res = pytest.main(test_command)
+sys.exit(res)
\ No newline at end of file
diff --git a/test/xpu/extended/skip_list_win_bmg.py b/test/xpu/extended/skip_list_win_bmg.py
new file mode 100644
index 000000000..2ee1dd31e
--- /dev/null
+++ b/test/xpu/extended/skip_list_win_bmg.py
@@ -0,0 +1,13 @@
+skip_dict = {
+    "test_ops_xpu.py": (
+        # https://github.com/intel/torch-xpu-ops/issues/1173
+        # Fatal Python error: Illegal instruction
+        "test_compare_cpu_grid_sampler_2d_xpu_float64",
+        "test_compare_cpu_cosh_xpu_complex64",
+        "test_compare_cpu_nn_functional_softshrink_xpu_bfloat16",
+        "test_compare_cpu_nn_functional_softshrink_xpu_float16",
+        "test_compare_cpu_nn_functional_softshrink_xpu_float32",
+        "test_compare_cpu_nn_functional_softshrink_xpu_float64",
+        "test_compare_cpu_square_xpu_complex128",
+    ),
+}
diff --git a/test/xpu/extended/skip_list_win_lnl.py b/test/xpu/extended/skip_list_win_lnl.py
new file mode 100644
index 000000000..2ee1dd31e
--- /dev/null
+++ b/test/xpu/extended/skip_list_win_lnl.py
@@ -0,0 +1,13 @@
+skip_dict = {
+    "test_ops_xpu.py": (
+        # https://github.com/intel/torch-xpu-ops/issues/1173
+        # Fatal Python error: Illegal instruction
+        "test_compare_cpu_grid_sampler_2d_xpu_float64",
+        "test_compare_cpu_cosh_xpu_complex64",
+        "test_compare_cpu_nn_functional_softshrink_xpu_bfloat16",
+        "test_compare_cpu_nn_functional_softshrink_xpu_float16",
+        "test_compare_cpu_nn_functional_softshrink_xpu_float32",
+        "test_compare_cpu_nn_functional_softshrink_xpu_float64",
+        "test_compare_cpu_square_xpu_complex128",
+    ),
+}
diff --git a/test/xpu/extended/skip_list_win_mtl.py b/test/xpu/extended/skip_list_win_mtl.py
new file mode 100644
index 000000000..b0d971c6e
--- /dev/null
+++ b/test/xpu/extended/skip_list_win_mtl.py
@@ -0,0 +1,20 @@
+skip_dict = {
+    # failed on MTL windows, skip first for Preci
+    "test_ops_xpu.py": (
+        "test_compare_cpu_sqrt_xpu_complex64",
+        "test_backward_nn_functional_adaptive_avg_pool2d_xpu_float32",
+
+        "test_compare_cpu_cosh_xpu_complex128",
+        "test_compare_cpu_frexp_xpu_bfloat16",
+        "test_compare_cpu_frexp_xpu_float16",
+        "test_compare_cpu_frexp_xpu_float32",
+        "test_compare_cpu_frexp_xpu_float64",
+        "test_compare_cpu_max_pool2d_with_indices_backward_xpu_bfloat16",
+        "test_compare_cpu_max_pool2d_with_indices_backward_xpu_float16",
+        "test_compare_cpu_max_pool2d_with_indices_backward_xpu_float32",
+        "test_compare_cpu_max_pool2d_with_indices_backward_xpu_float64",
+        "test_compare_cpu_nn_functional_avg_pool2d_xpu_bfloat16",
+        "test_compare_cpu_nn_functional_avg_pool2d_xpu_float32",
+        "test_compare_cpu_nn_functional_avg_pool3d_xpu_float32",
+    ),
+}
diff --git a/test/xpu/run_test_with_skip_bmg.py b/test/xpu/run_test_with_skip_bmg.py
new file mode 100644
index 000000000..9bd360296
--- /dev/null
+++ b/test/xpu/run_test_with_skip_bmg.py
@@ -0,0 +1,24 @@
+import os
+import sys
+from skip_list_common import skip_dict
+from skip_list_win import skip_dict as skip_dict_win
+from skip_list_win_bmg import skip_dict as skip_dict_win_bmg
+from xpu_test_utils import launch_test
+
+
+res = 0
+IS_WINDOWS = sys.platform == "win32"
+
+for key in skip_dict:
+    skip_list = skip_dict[key]
+    if IS_WINDOWS and key in skip_dict_win:
+        skip_list += skip_dict_win[key]
+    if IS_WINDOWS and key in skip_dict_win_bmg:
+        skip_list += skip_dict_win_bmg[key]
+    res += launch_test(key, skip_list)
+
+if os.name == "nt":
+    sys.exit(res)
+else:    
+    exit_code = os.WEXITSTATUS(res)
+    sys.exit(exit_code)
\ No newline at end of file
diff --git a/test/xpu/run_test_with_skip_lnl.py b/test/xpu/run_test_with_skip_lnl.py
new file mode 100644
index 000000000..4413626ea
--- /dev/null
+++ b/test/xpu/run_test_with_skip_lnl.py
@@ -0,0 +1,24 @@
+import os
+import sys
+from skip_list_common import skip_dict
+from skip_list_win import skip_dict as skip_dict_win
+from skip_list_win_lnl import skip_dict as skip_dict_win_lnl
+from xpu_test_utils import launch_test
+
+
+res = 0
+IS_WINDOWS = sys.platform == "win32"
+
+for key in skip_dict:
+    skip_list = skip_dict[key]
+    if IS_WINDOWS and key in skip_dict_win:
+        skip_list += skip_dict_win[key]
+    if IS_WINDOWS and key in skip_dict_win_lnl:
+        skip_list += skip_dict_win_lnl[key]
+    res += launch_test(key, skip_list)
+
+if os.name == "nt":
+    sys.exit(res)
+else:    
+    exit_code = os.WEXITSTATUS(res)
+    sys.exit(exit_code)
\ No newline at end of file
diff --git a/test/xpu/skip_list_win_bmg.py b/test/xpu/skip_list_win_bmg.py
new file mode 100644
index 000000000..a91d4f4a5
--- /dev/null
+++ b/test/xpu/skip_list_win_bmg.py
@@ -0,0 +1,39 @@
+skip_dict = {
+    # tensor(0.-0.j, device='xpu:0', dtype=torch.complex32) tensor(nan+nanj, device='xpu:0', dtype=torch.complex32) (1.5707964+0j)
+    "test_unary_ufuncs_xpu.pyy": (
+        "test_reference_numerics_small_acos_xpu_complex32",
+        "test_reference_numerics_small_asin_xpu_complex32",
+        "test_reference_numerics_small_asinh_xpu_complex32",
+        "test_reference_numerics_small_atan_xpu_complex32",
+        "test_reference_numerics_small_atanh_xpu_complex32",
+        # Need to check compiler std::sin() on inf+infj
+        "test_reference_numerics_extremal__refs_sin_xpu_complex128",
+        "test_reference_numerics_extremal__refs_sin_xpu_complex64",
+        "test_reference_numerics_extremal_nn_functional_tanhshrink_xpu_complex128",
+        "test_reference_numerics_extremal_nn_functional_tanhshrink_xpu_complex64",
+        "test_reference_numerics_extremal_sin_xpu_complex128",
+        "test_reference_numerics_extremal_sin_xpu_complex64",
+        "test_reference_numerics_extremal_sinh_xpu_complex128",
+        "test_reference_numerics_extremal_sinh_xpu_complex64",
+        "test_reference_numerics_large__refs_sin_xpu_complex32",
+        "test_reference_numerics_large_sin_xpu_complex32",
+        # Known issue of exp accuracy
+        # tensor(13437.7000-501.j, device='xpu:0', dtype=torch.complex128) tensor(inf+infj, device='xpu:0', dtype=torch.complex128) (-inf+infj)
+        "test_reference_numerics_large__refs_exp_xpu_complex128",
+        "test_reference_numerics_large_exp_xpu_complex128",
+        "test_reference_numerics_small_exp_xpu_complex32",
+        ":test_reference_numerics_normal_special_i1_xpu_float32",
+        "test_reference_numerics_normal_sigmoid_xpu_complex32",
+        "test_reference_numerics_small_sigmoid_xpu_complex32",
+    ),
+    # https://github.com/intel/torch-xpu-ops/issues/1171
+    # AssertionError: 'Assertion maxind >= 0 && maxind < outputImageSize failed' not found in '\nAssertHandler::printMessage\n' : The expected error was not found
+    "nn\test_pooling_xpu.py": (
+        "test_MaxUnpool_index_errors_case1_xpu",
+        "test_MaxUnpool_index_errors_case2_xpu",
+        "test_MaxUnpool_index_errors_case4_xpu",
+        "test_MaxUnpool_index_errors_case6_xpu",
+        "test_MaxUnpool_index_errors_case7_xpu",
+        "test_MaxUnpool_index_errors_case9_xpu",
+    ),
+}
diff --git a/test/xpu/skip_list_win_lnl.py b/test/xpu/skip_list_win_lnl.py
new file mode 100644
index 000000000..a9e8bfc3f
--- /dev/null
+++ b/test/xpu/skip_list_win_lnl.py
@@ -0,0 +1,38 @@
+skip_dict = {
+    # tensor(0.-0.j, device='xpu:0', dtype=torch.complex32) tensor(nan+nanj, device='xpu:0', dtype=torch.complex32) (1.5707964+0j)
+    "test_unary_ufuncs_xpu.pyy": (
+        "test_reference_numerics_small_acos_xpu_complex32",
+        "test_reference_numerics_small_asin_xpu_complex32",
+        "test_reference_numerics_small_asinh_xpu_complex32",
+        "test_reference_numerics_small_atan_xpu_complex32",
+        "test_reference_numerics_small_atanh_xpu_complex32",
+        # Need to check compiler std::sin() on inf+infj
+        "test_reference_numerics_extremal__refs_sin_xpu_complex128",
+        "test_reference_numerics_extremal__refs_sin_xpu_complex64",
+        "test_reference_numerics_extremal_nn_functional_tanhshrink_xpu_complex128",
+        "test_reference_numerics_extremal_nn_functional_tanhshrink_xpu_complex64",
+        "test_reference_numerics_extremal_sin_xpu_complex128",
+        "test_reference_numerics_extremal_sin_xpu_complex64",
+        "test_reference_numerics_extremal_sinh_xpu_complex128",
+        "test_reference_numerics_extremal_sinh_xpu_complex64",
+        "test_reference_numerics_large__refs_sin_xpu_complex32",
+        "test_reference_numerics_large_sin_xpu_complex32",
+        # Known issue of exp accuracy
+        # tensor(13437.7000-501.j, device='xpu:0', dtype=torch.complex128) tensor(inf+infj, device='xpu:0', dtype=torch.complex128) (-inf+infj)
+        "test_reference_numerics_large__refs_exp_xpu_complex128",
+        "test_reference_numerics_large_exp_xpu_complex128",
+        "test_reference_numerics_small_exp_xpu_complex32",
+        ":test_reference_numerics_normal_special_i1_xpu_float32",
+        "test_reference_numerics_normal_sigmoid_xpu_complex32",
+    ),
+    # https://github.com/intel/torch-xpu-ops/issues/1171
+    # AssertionError: 'Assertion maxind >= 0 && maxind < outputImageSize failed' not found in '\nAssertHandler::printMessage\n' : The expected error was not found
+    "nn\test_pooling_xpu.py": (
+        "test_MaxUnpool_index_errors_case1_xpu",
+        "test_MaxUnpool_index_errors_case2_xpu",
+        "test_MaxUnpool_index_errors_case4_xpu",
+        "test_MaxUnpool_index_errors_case6_xpu",
+        "test_MaxUnpool_index_errors_case7_xpu",
+        "test_MaxUnpool_index_errors_case9_xpu",
+    ),
+}

From 7d66fe1150223dc77fb65b994579d14f2bd96402 Mon Sep 17 00:00:00 2001
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Date: Wed, 25 Dec 2024 18:24:15 -0800
Subject: [PATCH 7/7] ci/transformers: dump xpu-smi output in the log (#1206)

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Co-authored-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 .github/workflows/_linux_transformers.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml
index f79227658..a539260a6 100644
--- a/.github/workflows/_linux_transformers.yml
+++ b/.github/workflows/_linux_transformers.yml
@@ -112,6 +112,8 @@ jobs:
           lspci -d ::0380 | tee ${{ github.workspace }}/transformers/tests_log/lspci_0380.txt
           echo "GPU render nodes:"
           cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt
+          echo "xpu-smi output:"
+          xpu-smi discovery -y --json --dump -1
       - name: Sanitry check installed packages
         run: |
           source activate huggingface_transformers_test