Skip to content

Commit

Permalink
Add nightly and weekly whl tests (#760)
Browse files Browse the repository at this point in the history
  • Loading branch information
mengfei25 authored Aug 16, 2024
1 parent 4ddb5bd commit 5a47831
Show file tree
Hide file tree
Showing 5 changed files with 369 additions and 31 deletions.
27 changes: 18 additions & 9 deletions .github/actions/inductor-xpu-e2e-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ inputs:
hf_token:
required: false
description: HUGGING_FACE_HUB_TOKEN for torchbench test
pytorch:
required: false
type: string
default: 'main'
description: Pytorch branch/commit

runs:
using: composite
Expand All @@ -43,12 +48,14 @@ runs:
source activate e2e_ci
source .github/scripts/env.sh
if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git
cd audio && git checkout $TORCHAUDIO_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl
cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
cd vision && git checkout $TORCHVISION_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
if [[ ${{ inputs.pytorch }} != *" wheel"* ]]; then
cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git
cd audio && git checkout $TORCHAUDIO_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl
cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
cd vision && git checkout $TORCHVISION_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
fi
cd ../ && python -c "import torch, torchvision, torchaudio"
rm -rf benchmark && git clone https://github.com/pytorch/benchmark.git
cd benchmark && git checkout $TORCHBENCH_COMMIT_ID && pip install --no-deps -r requirements.txt
Expand All @@ -63,9 +70,11 @@ runs:
pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION}
fi
if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
cd vision && git checkout $TORCHVISION_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
if [[ ${{ inputs.pytorch }} != *" wheel"* ]]; then
cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
cd vision && git checkout $TORCHVISION_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
fi
# install timm without dependencies
pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID
# install timm dependencies without torch and torchvision
Expand Down
16 changes: 10 additions & 6 deletions .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ../ && rm -rf pytorch
git clone https://github.com/pytorch/pytorch pytorch
cd pytorch && git checkout ${{ inputs.pytorch }}
cd pytorch && git checkout $(echo ${{ inputs.pytorch }} |awk '{print $1}')
# apply PRs for stock pytorch
pip install requests
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
Expand All @@ -72,16 +72,20 @@ jobs:
source .github/scripts/env.sh
pip install mkl-static mkl-include
cd ../pytorch
pip install -r requirements.txt
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
if [[ ${{ inputs.abi }} == '0' ]]; then
export _GLIBCXX_USE_CXX11_ABI=0
else
export _GLIBCXX_USE_CXX11_ABI=1
fi
WERROR=1 python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
if [[ ${{ inputs.pytorch }} != *" wheel"* ]]; then
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
pip install -r requirements.txt
WERROR=1 python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
else
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
fi
pip install -r .ci/docker/requirements-ci.txt
- name: Torch Config
run: |
Expand Down
14 changes: 6 additions & 8 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ jobs:
cd ../ && rm -rf pytorch
source activate e2e_ci
git clone https://github.com/pytorch/pytorch pytorch
cd pytorch && git checkout ${{ env.pytorch }}
cd pytorch && git checkout $(echo ${{ env.pytorch }} |awk '{print $1}')
# apply PRs for stock pytorch
pip install requests
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
Expand All @@ -141,6 +141,7 @@ jobs:
- name: Identify pinned versions
id: pinned
run: |
source .github/scripts/env.sh
cd ../pytorch
if [ -z ${{ inputs.triton }} ]; then
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
Expand All @@ -155,7 +156,6 @@ jobs:
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
. /etc/os-release
Expand Down Expand Up @@ -257,6 +257,7 @@ jobs:
mode: ${{ inputs.mode }}
scenario: ${{ inputs.scenario }}
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

- name: Summarize archieve files
id: summary
if: ${{ ! cancelled() }}
Expand All @@ -270,9 +271,9 @@ jobs:
timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -E "timeout models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "timeout" -B 1
grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
)"
echo "TIMEOUT_MODELS=${TIMEOUT_MODELS}" |sed 's/Summary/\\nSummary/g;s/Timeout/\\nTimeout/g' |tee -a "${GITHUB_OUTPUT}"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
Expand Down Expand Up @@ -352,15 +353,12 @@ jobs:
echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | $DRIVER_VERSION| $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt
if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}"
if [ "${{ inputs.triton }}" != "" ];then
test_scope+="; triton=${{ inputs.triton }}"
fi
if [ "${{ inputs.model }}" != "" ];then
test_scope+="; model=${{ inputs.model }}"
fi
echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt
fi
echo "$TIMEOUT_MODELS" >> ${{ github.workspace }}/report.txt
echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt
echo "$cc_comment" >> ${{ github.workspace }}/report.txt
# Report
report_txt=$(cat ${{ github.workspace }}/report.txt)
Expand Down
14 changes: 6 additions & 8 deletions .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ jobs:
cd ../ && rm -rf pytorch
source activate e2e_ci
git clone https://github.com/pytorch/pytorch pytorch
cd pytorch && git checkout ${{ env.pytorch }}
cd pytorch && git checkout $(echo ${{ env.pytorch }} |awk '{print $1}')
# apply PRs for stock pytorch
pip install requests
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
Expand All @@ -141,6 +141,7 @@ jobs:
- name: Identify pinned versions
id: pinned
run: |
source .github/scripts/env.sh
cd ../pytorch
if [ -z ${{ inputs.triton }} ]; then
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
Expand All @@ -155,7 +156,6 @@ jobs:
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "MODEL_ONLY_NAME=${{ inputs.model }}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
. /etc/os-release
Expand Down Expand Up @@ -257,6 +257,7 @@ jobs:
mode: ${{ inputs.mode }}
scenario: ${{ inputs.scenario }}
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

- name: Summarize archieve files
id: summary
if: ${{ ! cancelled() }}
Expand All @@ -270,9 +271,9 @@ jobs:
timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true)
if [ ${timeout_models} -ne 0 ];then
TIMEOUT_MODELS="$(
grep -E "timeout models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "timeout" -B 1
grep -B 1 "timeout models: [1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log
)"
echo "TIMEOUT_MODELS=${TIMEOUT_MODELS}" |sed 's/Summary/\\nSummary/g;s/Timeout/\\nTimeout/g' |tee -a "${GITHUB_OUTPUT}"
echo "TIMEOUT_MODELS=\"${TIMEOUT_MODELS}\"" |awk '{printf("%s\\n", $0)}' |sed 's/\\n$//' |tee -a "${GITHUB_OUTPUT}"
fi
if [ ${failed_models} -ne 0 ];then
grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1
Expand Down Expand Up @@ -352,15 +353,12 @@ jobs:
echo -e "$RUNNER_NAME | $OS_PRETTY_NAME | $GCC_VERSION | ${{ env.python }} | rolling-$DRIVER_VERSION| $BUNDLE_VERSION \n" >> ${{ github.workspace }}/report.txt
if [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ];then
test_scope="${{ inputs.suite }}/${{ inputs.dt }}/${{ inputs.mode }}/${{ inputs.scenario }}"
if [ "${{ inputs.triton }}" != "" ];then
test_scope+="; triton=${{ inputs.triton }}"
fi
if [ "${{ inputs.model }}" != "" ];then
test_scope+="; model=${{ inputs.model }}"
fi
echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt
fi
echo "$TIMEOUT_MODELS" >> ${{ github.workspace }}/report.txt
echo "$TIMEOUT_MODELS" |awk '{printf("%s\\n", $0)}' >> ${{ github.workspace }}/report.txt
echo "$cc_comment" >> ${{ github.workspace }}/report.txt
# Report
report_txt=$(cat ${{ github.workspace }}/report.txt)
Expand Down
Loading

0 comments on commit 5a47831

Please sign in to comment.