Merge branch 'main' into hjhee/asinh

intel · Jul 10, 2024 · d722631 · d722631
2 parents 3b20011 + 4db0b0c
commit d722631
Show file tree

Hide file tree

Showing 20 changed files with 486 additions and 337 deletions.
diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml
@@ -41,7 +41,7 @@ runs:
       shell: bash
       run: |
         source activate e2e_ci
-        source /opt/intel/oneapi/compiler/latest/env/vars.sh
+        source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
         if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
           cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git
           cd audio && git checkout $TORCHAUDIO_COMMIT_ID
@@ -80,7 +80,7 @@ runs:
         source activate e2e_ci
         cp .github/scripts/inductor_xpu_test.sh ../pytorch
         cd ../pytorch
-        source /opt/intel/oneapi/compiler/latest/env/vars.sh
+        source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
         rm -f ${{ github.workspace }}/summary_accuracy.log
         # check param
         function contains() {
@@ -198,7 +198,7 @@ runs:
         source activate e2e_ci
         cp .github/scripts/inductor_perf_summary.py ../pytorch
         cd ../pytorch
-        source /opt/intel/oneapi/compiler/latest/env/vars.sh
+        source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
         pip install styleFrame scipy pandas
         set -xe
         for suite in $(echo ${{ inputs.suite }} |sed 's/,/ /g')

diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
@@ -1,32 +1,37 @@
-name: inductor-xpu-ut-test
+name: Linux UT Test
 
 on:
   workflow_call:
     inputs:
-      torch_xpu_ops_update:
+      pytorch:
         required: false
         type: string
-        default: 'true'
-        description: True means update xpu_ops when building pytorch, otherwise means not
-      ut_suite:
+        default: 'main'
+        description: Pytorch branch/commit
+      keep_torch_xpu_ops:
+        required: false
+        type: string
+        default: 'false'
+        description: Keep torch-xpu-ops pin. `true` means use pined commit
+      ut:
         required: true
         type: string
-        default: 'op_example,op_extended,op_ut,torch_xpu'
-        description: op_example,op_extended,op_ut,torch_xpu. Delimiter is comma
-      pytorch_branch:
+        default: ''
+        description: UT scope. `op_example,op_extended,op_ut,torch_xpu` Delimiter is comma
+      python:
         required: false
         type: string
-        default: 'main'
-        description: Set pytorch branch
+        default: '3.10'
+        description: Python version
       runner:
         required: true
         type: string
         default: 'linux.idc.xpu'
-        description: Set runner
+        description: Runner label
 
 
 jobs:
-  Inductor-XPU-UT-Tests:
+  Torch-XPU-UT-Tests:
     runs-on: ${{ inputs.runner }} 
     timeout-minutes: 900
     steps:
@@ -36,60 +41,60 @@ jobs:
         run: |
           pwd
           cd ../ && rm -rf pytorch
-          git clone -b ${{ inputs.pytorch_branch }} https://github.com/pytorch/pytorch
-          cd pytorch && git log -n 1 && git submodule sync && git submodule update --init --recursive
-          if [ -z ${{ inputs.torch_xpu_ops_update }} ]; then
-            rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
+          git clone https://github.com/pytorch/pytorch pytorch
+          cd pytorch && git checkout ${{ inputs.pytorch }} 
+          # apply PRs for stock pytorch
+          pip install requests
+          python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
+          git status && git show -s
+          git submodule sync && git submodule update --init --recursive
+          if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
+            echo "Don't replace torch-xpu-ops!"
           else
-            if [[ ${{ inputs.torch_xpu_ops_update }} == 'true' ]]; then
-              rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
-            else
-              echo "Not update torch-xpu-ops"
-            fi
+            rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
+            # Workaround for torch-xpu-ops ci test
+            sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
           fi
-          # Workaround for torch-xpu-ops ci test
-          sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
       - name: Build Pytorch XPU
         run: |
           which conda && conda clean -ay
           conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
                 rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
-          conda create -n xpu_op_${ZE_AFFINITY_MASK} python=3.10 cmake ninja -y
+          conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y
           source activate xpu_op_${ZE_AFFINITY_MASK}
           conda install -c intel mkl-static mkl-include -y
           cd ../pytorch
           pip install -r requirements.txt
           export USE_XPU=1
-          source /opt/intel/oneapi/compiler/latest/env/vars.sh
+          source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
           export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
           python setup.py bdist_wheel
           pip install --force-reinstall dist/*.whl
           git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
           pip install -r .ci/docker/requirements-ci.txt
       - name: Run XPU OP Examples
-        if: contains(inputs.ut_suite, 'op_example')
+        if: contains(inputs.ut, 'op_example') || github.event_name == 'schedule'
         run: |
           cd ${{ github.workspace }}
-          mkdir -p ut_log
           xpu-smi discovery
-          source /opt/intel/oneapi/compiler/latest/env/vars.sh
+          source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
           source activate xpu_op_${ZE_AFFINITY_MASK}
           cd ${{ github.workspace }}
           cd examples
           pip install pytest
           timeout 8000 pytest -v 
       - name: Run XPU OP Extended UT
-        if: contains(inputs.ut_suite, 'op_extended')
+        if: contains(inputs.ut, 'op_extended') || github.event_name == 'schedule'
         run: |
-          source /opt/intel/oneapi/compiler/latest/env/vars.sh
+          source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
           source activate xpu_op_${ZE_AFFINITY_MASK}
           export PYTORCH_TEST_WITH_SLOW=1
           cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
           timeout 10000 python run_test_with_skip.py 
       - name: Run XPU OP UT
-        if: contains(inputs.ut_suite, 'op_ut')
+        if: contains(inputs.ut, 'op_ut') || github.event_name == 'schedule'
         run: |
-          source /opt/intel/oneapi/compiler/latest/env/vars.sh
+          source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
           source activate xpu_op_${ZE_AFFINITY_MASK}
           export PYTORCH_ENABLE_XPU_FALLBACK=1
           export PYTORCH_TEST_WITH_SLOW=1
@@ -101,9 +106,9 @@ jobs:
           # test_foreach, test_decomp
           timeout 10000 python run_test_with_only.py 
       - name: Run Torch XPU UT
-        if: contains(inputs.ut_suite, 'torch_xpu')
+        if: contains(inputs.ut, 'torch_xpu') || github.event_name == 'schedule'
         run: |
-          source /opt/intel/oneapi/compiler/latest/env/vars.sh
+          source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
           source activate xpu_op_${ZE_AFFINITY_MASK}
           cd ../pytorch
           TEST_REPORTS_DIR=$(pwd)/test/test-reports
@@ -117,7 +122,21 @@ jobs:
             fi
           done
           # Run Pytorch XPU python UT
-          export PYTORCH_ENABLE_XPU_FALLBACK=1
-          sed -i 's/selected_tests = exclude_tests(XPU_BLOCKLIST.*/selected_tests = XPU_TEST/g' ./test/run_test.py
-          python test/run_test.py --xpu 
+          export PYTORCH_TEST_WITH_SLOW=1
+          export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
 
+          test_cmd="python test/run_test.py --include "
+          # All Inductor UT under test/inductor
+          for test in $(ls test/inductor | grep test);
+          do 
+              test_cmd="${test_cmd} inductor/$test";
+          done
+          # All xpu ut under test/xpu
+          for test in $(ls test/xpu | grep test);
+          do 
+              test_cmd="${test_cmd} xpu/$test";
+          done
+          if [ -f "test/test_xpu.py" ]; then
+            test_cmd="${test_cmd} test_xpu.py"
+          fi
+          eval $test_cmd
diff --git a/.github/workflows/inductor_xpu_e2e_ci.yml b/.github/workflows/inductor_xpu_e2e_ci.yml