From c931d66f27c413d03685aea0a221bcadf20aa4bf Mon Sep 17 00:00:00 2001 From: mengfei25 Date: Wed, 9 Oct 2024 10:31:40 +0800 Subject: [PATCH] Update tests on rolling driver (#952) Enable scratch page --- .github/actions/inductor-xpu-e2e-test/action.yml | 12 +++++++++--- .github/workflows/_linux_ut.yml | 8 ++++++++ .github/workflows/nightly_ondemand_rolling.yml | 11 +++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 4af472cce..4f81534fd 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -37,6 +37,11 @@ inputs: type: string default: 'main' description: Pytorch branch/commit + driver: + required: false + type: string + default: 'lts' + description: Driver lts/rolling runs: using: composite @@ -82,9 +87,11 @@ runs: fi pip install numpy==1.26.4 - name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) - shell: bash env: HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }} + NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} + DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }} + shell: bash run: | source activate e2e_ci source .github/scripts/env.sh @@ -147,9 +154,9 @@ runs: done - name: Summary E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) - shell: bash env: HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }} + shell: bash run: | cd ../pytorch rm -f inductor_log/summary_accuracy.csv @@ -170,4 +177,3 @@ runs: suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g') scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g') python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario} - diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 895620f58..de3c19dee 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -38,6 +38,11 @@ on: type: string default: 'linux.idc.xpu' description: Runner label + driver: + required: false + type: string + default: 'lts' + description: Driver lts/rolling permissions: read-all @@ -45,6 +50,9 @@ jobs: Torch-XPU-UT-Tests: runs-on: ${{ inputs.runner }} timeout-minutes: 900 + env: + NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} + DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }} steps: - name: Checkout torch-xpu-ops uses: actions/checkout@v4 diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 5dad5ecc2..72709d88c 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -74,6 +74,7 @@ jobs: ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + driver: rolling runner: pvc_rolling Linux-Weekly-UT-Tests-ABI-0-Rolling: @@ -82,6 +83,7 @@ jobs: with: abi: 0 ut: op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu + driver: rolling runner: pvc_rolling Linux-Nightly-Ondemand-E2E-Tests-Rolling: @@ -94,6 +96,8 @@ jobs: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + NEOReadDebugKeys: 1 + DisableScratchPages: 1 outputs: TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }} TORCH_COMMIT_ID: ${{ steps.pinned.outputs.TORCH_COMMIT_ID }} @@ -197,6 +201,7 @@ jobs: mode: inference,training scenario: accuracy hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling - name: Nightly Torchbench BF16 Training Accuracy Test if: github.event_name == 'schedule' && github.event.schedule == '30 13 * * 0-4' uses: ./.github/actions/inductor-xpu-e2e-test @@ -207,6 +212,7 @@ jobs: scenario: accuracy env_prepare: true hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling - name: Nightly Timm_models FP16 Training Accuracy Test if: github.event_name == 'schedule' && github.event.schedule == '30 13 * * 0-4' uses: ./.github/actions/inductor-xpu-e2e-test @@ -217,6 +223,7 @@ jobs: scenario: accuracy env_prepare: true hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling # Weekly launch - name: Weekly Huggingface Full Test if: github.event_name == 'schedule' && github.event.schedule == '30 16 * * 5' @@ -228,6 +235,7 @@ jobs: mode: inference,training scenario: accuracy,performance hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling - name: Weekly Torchbench Full Test if: github.event_name == 'schedule' && github.event.schedule == '30 16 * * 5' uses: ./.github/actions/inductor-xpu-e2e-test @@ -238,6 +246,7 @@ jobs: mode: inference,training scenario: accuracy,performance hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling - name: Weekly Timm_models Full Test if: github.event_name == 'schedule' && github.event.schedule == '30 16 * * 5' uses: ./.github/actions/inductor-xpu-e2e-test @@ -248,6 +257,7 @@ jobs: mode: inference,training scenario: accuracy,performance hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling # On-demand launch - name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }}) if: github.event_name != 'schedule' @@ -259,6 +269,7 @@ jobs: mode: ${{ inputs.mode }} scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + driver: rolling - name: Summarize archieve files id: summary