diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index ccbabf407..2ba270735 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -32,6 +32,11 @@ inputs: hf_token: required: false description: HUGGING_FACE_HUB_TOKEN for torchbench test + pytorch: + required: false + type: string + default: 'main' + description: Pytorch branch/commit runs: using: composite @@ -43,12 +48,14 @@ runs: source activate e2e_ci source .github/scripts/env.sh if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then - cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git - cd audio && git checkout $TORCHAUDIO_COMMIT_ID - python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl - cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git - cd vision && git checkout $TORCHVISION_COMMIT_ID - python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl + if [[ ${{ inputs.pytorch }} != *" wheel"* ]]; then + cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git + cd audio && git checkout $TORCHAUDIO_COMMIT_ID + python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl + cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git + cd vision && git checkout $TORCHVISION_COMMIT_ID + python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl + fi cd ../ && python -c "import torch, torchvision, torchaudio" rm -rf benchmark && git clone https://github.com/pytorch/benchmark.git cd benchmark && git checkout $TORCHBENCH_COMMIT_ID && pip install --no-deps -r requirements.txt @@ -63,9 +70,11 @@ runs: pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION} fi if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then - cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git - cd vision && git checkout $TORCHVISION_COMMIT_ID - python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl + if [[ ${{ inputs.pytorch }} != *" wheel"* ]]; then + cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git + cd vision && git checkout $TORCHVISION_COMMIT_ID + python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl + fi # install timm without dependencies pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID # install timm dependencies without torch and torchvision diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 0c35dc265..8ee93ebf8 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -53,7 +53,7 @@ jobs: source activate xpu_op_${ZE_AFFINITY_MASK} cd ../ && rm -rf pytorch git clone https://github.com/pytorch/pytorch pytorch - cd pytorch && git checkout ${{ inputs.pytorch }} + cd pytorch && git checkout $(echo ${{ inputs.pytorch }} |awk '{print $1}') # apply PRs for stock pytorch pip install requests python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py @@ -72,16 +72,20 @@ jobs: source .github/scripts/env.sh pip install mkl-static mkl-include cd ../pytorch - pip install -r requirements.txt - export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} if [[ ${{ inputs.abi }} == '0' ]]; then export _GLIBCXX_USE_CXX11_ABI=0 else export _GLIBCXX_USE_CXX11_ABI=1 fi - WERROR=1 python setup.py bdist_wheel - pip install --force-reinstall dist/*.whl - git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd .. + if [[ ${{ inputs.pytorch }} != *" wheel"* ]]; then + export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} + pip install -r requirements.txt + WERROR=1 python setup.py bdist_wheel + pip install --force-reinstall dist/*.whl + git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd .. + else + pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu + fi pip install -r .ci/docker/requirements-ci.txt - name: Torch Config run: | diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index f0044aa26..aa921e2ef 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -125,7 +125,7 @@ jobs: cd ../ && rm -rf pytorch source activate e2e_ci git clone https://github.com/pytorch/pytorch pytorch - cd pytorch && git checkout ${{ env.pytorch }} + cd pytorch && git checkout $(echo ${{ env.pytorch }} |awk '{print $1}') # apply PRs for stock pytorch pip install requests python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py @@ -141,6 +141,7 @@ jobs: - name: Identify pinned versions id: pinned run: | + source .github/scripts/env.sh cd ../pytorch if [ -z ${{ inputs.triton }} ]; then echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" @@ -155,7 +156,6 @@ jobs: echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" . /etc/os-release @@ -257,6 +257,7 @@ jobs: mode: ${{ inputs.mode }} scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Summarize archieve files id: summary if: ${{ ! cancelled() }} @@ -270,9 +271,9 @@ jobs: timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) if [ ${timeout_models} -ne 0 ];then TIMEOUT_MODELS="$( - grep -E "timeout models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "timeout" -B 1 + grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log )" - echo "TIMEOUT_MODELS=${TIMEOUT_MODELS}" |sed 's/Summary/\\nSummary/g;s/Timeout/\\nTimeout/g' |tee -a "${GITHUB_OUTPUT}" + echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}" fi if [ ${failed_models} -ne 0 ];then grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1 @@ -352,15 +353,12 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION| $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" - if [ "${{ inputs.triton }}" != "" ];then - test_scope+="; triton=${{ inputs.triton }}" - fi if [ "${{ inputs.model }}" != "" ];then test_scope+="; model=${{ inputs.model }}" fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi - echo "$TIMEOUT_MODELS" >> ${{ github.workspace }}/report.txt + echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt echo "$cc_comment" >> ${{ github.workspace }}/report.txt # Report report_txt=$(cat ${{ github.workspace }}/report.txt) diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index e87071ee4..201c9c341 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -125,7 +125,7 @@ jobs: cd ../ && rm -rf pytorch source activate e2e_ci git clone https://github.com/pytorch/pytorch pytorch - cd pytorch && git checkout ${{ env.pytorch }} + cd pytorch && git checkout $(echo ${{ env.pytorch }} |awk '{print $1}') # apply PRs for stock pytorch pip install requests python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py @@ -141,6 +141,7 @@ jobs: - name: Identify pinned versions id: pinned run: | + source .github/scripts/env.sh cd ../pytorch if [ -z ${{ inputs.triton }} ]; then echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" @@ -155,7 +156,6 @@ jobs: echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" - source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" . /etc/os-release @@ -257,6 +257,7 @@ jobs: mode: ${{ inputs.mode }} scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Summarize archieve files id: summary if: ${{ ! cancelled() }} @@ -270,9 +271,9 @@ jobs: timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) if [ ${timeout_models} -ne 0 ];then TIMEOUT_MODELS="$( - grep -E "timeout models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "timeout" -B 1 + grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log )" - echo "TIMEOUT_MODELS=${TIMEOUT_MODELS}" |sed 's/Summary/\\nSummary/g;s/Timeout/\\nTimeout/g' |tee -a "${GITHUB_OUTPUT}" + echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}" fi if [ ${failed_models} -ne 0 ];then grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1 @@ -352,15 +353,12 @@ jobs: echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | rolling-$DRIVER_VERSION| $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" - if [ "${{ inputs.triton }}" != "" ];then - test_scope+="; triton=${{ inputs.triton }}" - fi if [ "${{ inputs.model }}" != "" ];then test_scope+="; model=${{ inputs.model }}" fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi - echo "$TIMEOUT_MODELS" >> ${{ github.workspace }}/report.txt + echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt echo "$cc_comment" >> ${{ github.workspace }}/report.txt # Report report_txt=$(cat ${{ github.workspace }}/report.txt) diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml new file mode 100644 index 000000000..879f4e094 --- /dev/null +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -0,0 +1,329 @@ +name: Torch Nightly WHL Tests + +on: + schedule: + # GMT+8 21:00 every workday + - cron: '0 14 * * 0-4' + # GMT+8 0:00 Saturday + - cron: '0 17 * * 5' + workflow_dispatch: + inputs: + pytorch: + required: false + type: string + default: 'main' + description: Pytorch branch/commit + ut: + required: false + type: string + default: 'torch_xpu' + description: UT scope. `op_example,op_extended,op_ut,torch_xpu`. Delimiter is comma + suite: + required: true + type: string + default: 'huggingface' + description: Dynamo benchmarks test suite. `huggingface,timm_models,torchbench`. Delimiter is comma + dt: + required: true + type: string + default: 'float32' + description: Data precision of the test. `float32,bfloat16,float16,amp_bf16,amp_fp16`. Delimiter is comma + mode: + required: true + type: string + default: 'inference' + description: Test mode. `inference,training`. Delimiter is comma + scenario: + required: true + type: string + default: 'accuracy' + description: Test scenario. `accuracy,performance`. Delimiter is comma + model: + required: false + type: string + default: '' + description: Model. Will only run this one mode if set + python: + required: false + type: string + default: '3.10' + description: Python version + +permissions: read-all + +concurrency: + group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.ut }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }} + cancel-in-progress: true + +jobs: + Linux-Nightly-Ondemand-UT-WHL-Tests: + if: github.event_name == 'schedule' || ${{ inputs.ut_suite }} + uses: ./.github/workflows/_linux_ut.yml + with: + ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }} + pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} + python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + pytorch: nightly wheel + runner: linux.idc.xpu + + Linux-Nightly-Ondemand-E2E-WHL-Tests: + runs-on: pvc_e2e + # Don't run on forked repos + if: github.repository_owner == 'intel' + timeout-minutes: 3600 + env: + pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} + ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }} + python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + outputs: + TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }} + TORCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCH_COMMIT_ID }} + DRIVER_VERSION: ${{ steps.pinned.outputs.DRIVER_VERSION }} + BUNDLE_VERSION: ${{ steps.pinned.outputs.BUNDLE_VERSION }} + OS_PRETTY_NAME: ${{ steps.pinned.outputs.OS_PRETTY_NAME }} + GCC_VERSION: ${{ steps.pinned.outputs.GCC_VERSION }} + TORCHBENCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCHBENCH_COMMIT_ID }} + TORCHVISION_COMMIT_ID: ${{ steps.pinned.outputs.TORCHVISION_COMMIT_ID }} + TORCHAUDIO_COMMIT_ID: ${{ steps.pinned.outputs.TORCHAUDIO_COMMIT_ID }} + TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }} + TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }} + TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }} + TIMEOUT_MODELS: ${{ steps.summary.outputs.TIMEOUT_MODELS }} + steps: + - name: Checkout torch-xpu-ops + uses: actions/checkout@v4 + - name: Prepare Conda ENV + run: | + which conda && conda clean -ay + conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci + conda create -n e2e_ci python=${{ env.python }} cmake ninja -y + source activate e2e_ci + pip install mkl-static mkl-include + pip install pandas scipy tqdm + - name: Prepare Stock Pytorch + run: | + pwd + source activate e2e_ci + source .github/scripts/env.sh + cd ../ && rm -rf pytorch + git clone https://github.com/pytorch/pytorch pytorch + cd pytorch && git checkout $(echo ${{ env.pytorch }} |awk '{print $1}') + # apply PRs for stock pytorch + pip install requests + python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py + git status && git show -s + cd ../ + pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu + - name: Identify pinned versions + id: pinned + run: | + source activate e2e_ci + source .github/scripts/env.sh + cd ../pytorch + echo "TRITON_COMMIT_ID=$(pip list |grep -w pytorch-triton-xpu |awk '{print $2}')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TORCH_BRANCH_ID=nightly" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TORCH_COMMIT_ID=$(pip list |grep -w torch |awk '{print $2}')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TORCHBENCH_COMMIT_ID=$(> "${GITHUB_ENV}" + echo "TORCHVISION_COMMIT_ID=$(pip list |grep -w torchvision |awk '{print $2}')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TORCHAUDIO_COMMIT_ID=$(pip list |grep -w torchaudio |awk '{print $2}')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + . /etc/os-release + echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo ${GITHUB_ENV} + - name: Show GITHUB_ENV + run: | + echo "$GITHUB_ENV" + rm -rf ../pytorch/inductor_log + rm -rf /tmp/torchinductor_* + + # Nihglty launch + - name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4' + uses: ./.github/actions/inductor-xpu-e2e-test + with: + suite: huggingface + env_prepare: true + dt: float32,bfloat16,float16,amp_bf16,amp_fp16 + mode: inference,training + scenario: accuracy + pytorch: nightly wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Nightly Torchbench BF16 Training Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4' + uses: ./.github/actions/inductor-xpu-e2e-test + with: + suite: torchbench + dt: bfloat16 + mode: training + scenario: accuracy + pytorch: nightly wheel + env_prepare: true + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Nightly Timm_models FP16 Training Accuracy Test + if: github.event_name == 'schedule' && github.event.schedule == '0 14 * * 0-4' + uses: ./.github/actions/inductor-xpu-e2e-test + with: + suite: timm_models + dt: float16 + mode: training + scenario: accuracy + pytorch: nightly wheel + env_prepare: true + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # Weekly launch + - name: Weekly Huggingface Full Test + if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5' + uses: ./.github/actions/inductor-xpu-e2e-test + with: + suite: huggingface + env_prepare: true + dt: float32,bfloat16,float16,amp_bf16,amp_fp16 + mode: inference,training + scenario: accuracy,performance + pytorch: nightly wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Weekly Torchbench Full Test + if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5' + uses: ./.github/actions/inductor-xpu-e2e-test + with: + suite: torchbench + env_prepare: true + dt: float32,bfloat16,float16,amp_bf16,amp_fp16 + mode: inference,training + scenario: accuracy,performance + pytorch: nightly wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + - name: Weekly Timm_models Full Test + if: github.event_name == 'schedule' && github.event.schedule == '0 17 * * 5' + uses: ./.github/actions/inductor-xpu-e2e-test + with: + suite: timm_models + env_prepare: true + dt: float32,bfloat16,float16,amp_bf16,amp_fp16 + mode: inference,training + scenario: accuracy,performance + pytorch: nightly wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # On-demand launch + - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) + if: github.event_name != 'schedule' + uses: ./.github/actions/inductor-xpu-e2e-test + with: + suite: ${{ inputs.suite }} + env_prepare: true + dt: ${{ inputs.dt }} + mode: ${{ inputs.mode }} + scenario: ${{ inputs.scenario }} + pytorch: nightly wheel + hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + + - name: Summarize archieve files + id: summary + if: ${{ ! cancelled() }} + run: | + rm -rf ${{ github.workspace }}/upload_files + cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files + mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/ + find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days + tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs + failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) + timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) + if [ ${timeout_models} -ne 0 ];then + TIMEOUT_MODELS="$( + grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log + )" + echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}" + fi + if [ ${failed_models} -ne 0 ];then + grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1 + exit 1 + fi + - name: Upload Inductor XPU E2E Data + if: ${{ ! cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }} + path: ${{ github.workspace }}/upload_files + + Tests-Failure-And-Report: + if: ${{ ! cancelled() }} + runs-on: [ self-hosted, Linux ] + permissions: + issues: write + env: + GH_TOKEN: ${{ github.token }} + python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + needs: Linux-Nightly-Ondemand-E2E-WHL-Tests + steps: + - name: Report github issue for XPU OPS nightly + if: github.repository_owner == 'intel' + run: | + set -xe + # Test env + build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + repo="${{ github.repository }}" + TORCH_BRANCH_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCH_BRANCH_ID }}" + TORCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCH_COMMIT_ID }}" + DRIVER_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.DRIVER_VERSION }}" + BUNDLE_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.BUNDLE_VERSION }}" + OS_PRETTY_NAME="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.OS_PRETTY_NAME }}" + GCC_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.GCC_VERSION }}" + TORCHBENCH_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCHBENCH_COMMIT_ID }}" + TORCHVISION_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCHVISION_COMMIT_ID }}" + TORCHAUDIO_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TORCHAUDIO_COMMIT_ID }}" + TRANSFORMERS_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TRANSFORMERS_VERSION }}" + TIMM_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TIMM_COMMIT_ID }}" + TRITON_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TRITON_COMMIT_ID }}" + TIMEOUT_MODELS="${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.outputs.TIMEOUT_MODELS }}" + # Test status + if [ "${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.result }}" == "success" ];then + test_status=Success + elif [ "${{ needs.Linux-Nightly-Ondemand-E2E-WHL-Tests.result }}" == "failure" ];then + test_status=Failure + cc_comment="CC ${{ secrets.NIGHTLY_EMAIL_LIST }}" + else + test_status=None + exit 0 + fi + # Test Type + if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then + test_type="On-demand" + test_issue_id=426 + cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}" + elif [ "${{ github.event.schedule }}" == "0 17 * * 5" ];then + test_type="Weekly" + test_issue_id=432 + else + test_type="Nightly" + test_issue_id=432 + fi + # Test report + echo -e "**${test_status}** $test_type WHL Test on $(date +'%F'), See: $build_url\n" > ${{ github.workspace }}/report.txt + printf "Torch-xpu-ops | PyTorch | Triton\n--- | --- | ---\n${GITHUB_WORKFLOW_SHA:0:7} on ${GITHUB_REF_NAME} | " >> ${{ github.workspace }}/report.txt + printf "[${TORCH_COMMIT_ID:0:7}](https://github.com/pytorch/pytorch/commit/${TORCH_COMMIT_ID:0:7}) on $TORCH_BRANCH_ID | " >> ${{ github.workspace }}/report.txt + echo -e "[${TRITON_COMMIT_ID:0:7}](https://github.com/intel/intel-xpu-backend-for-triton/commit/${TRITON_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt + printf "Transformers | Timm | Torchbench | Torchvision | Torchaudio\n--- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt + printf "[${TRANSFORMERS_VERSION:0:7}](https://github.com/huggingface/transformers/commit/${TRANSFORMERS_VERSION:0:7}) | " >> ${{ github.workspace }}/report.txt + printf "[${TIMM_COMMIT_ID:0:7}](https://github.com/huggingface/pytorch-image-models/commit/${TIMM_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt + printf "[${TORCHBENCH_COMMIT_ID:0:7}](https://github.com/pytorch/benchmark/commit/${TORCHBENCH_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt + printf "[${TORCHVISION_COMMIT_ID:0:7}](https://github.com/pytorch/vision/commit/${TORCHVISION_COMMIT_ID:0:7}) | " >> ${{ github.workspace }}/report.txt + echo -e "[${TORCHAUDIO_COMMIT_ID:0:7}](https://github.com/pytorch/audio/commit/${TORCHAUDIO_COMMIT_ID:0:7}) \n" >> ${{ github.workspace }}/report.txt + printf "Device | OS | GCC | Python | Driver(DKMS) | Bundle(DPCPP)\n--- | --- | --- | --- | --- | ---\n" >> ${{ github.workspace }}/report.txt + echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION| $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt + if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then + test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}" + if [ "${{ inputs.model }}" != "" ];then + test_scope+="; model=${{ inputs.model }}" + fi + echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt + fi + echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt + echo "$cc_comment" >> ${{ github.workspace }}/report.txt + # Report + report_txt=$(cat ${{ github.workspace }}/report.txt) + gh --repo $repo issue comment $test_issue_id --body "$report_txt"