Remove reduce workaround since using xpu::pair directly #2428
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: pull | |
on: | |
pull_request: | |
types: | |
- opened | |
- synchronize | |
- reopened | |
- converted_to_draft | |
- ready_for_review | |
branches: | |
- main | |
- release/* | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} | |
cancel-in-progress: true | |
permissions: read-all | |
jobs: | |
preci-ut: | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} | |
uses: ./.github/workflows/_linux_ut.yml | |
with: | |
ut: op_example,op_extended,op_ut | |
runner: linux.idc.xpu | |
Inductor-XPU-E2E-CI-Tests: | |
runs-on: pvc_e2e | |
# Don't run on forked repos and draft PRs | |
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }} | |
timeout-minutes: 900 | |
steps: | |
- name: Checkout torch-xpu-ops | |
uses: actions/checkout@v4 | |
- name: Prepare Conda ENV | |
run: | | |
which conda && conda clean -ay | |
conda remove --all -y -n e2e_ci || rm -rf $(dirname ${CONDA_EXE})/../envs/e2e_ci | |
conda create -n e2e_ci python=3.10 cmake ninja -y | |
source activate e2e_ci | |
conda install -c intel mkl-static mkl-include -y | |
pip install pandas scipy tqdm | |
- name: Prepare Stock Pytorch | |
run: | | |
pwd | |
cd ../ && rm -rf pytorch | |
source activate e2e_ci | |
git clone -b main https://github.com/pytorch/pytorch pytorch | |
cd pytorch | |
# apply PRs for stock pytorch | |
pip install requests | |
# https://github.com/mengfei25/pytorch/pull/18 internal use only for subset model list | |
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py -e https://github.com/mengfei25/pytorch/pull/18 | |
git status && git show -s | |
git submodule sync && git submodule update --init --recursive | |
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ | |
# Workaround for torch-xpu-ops ci test | |
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt | |
- name: Triton Installation | |
run: | | |
source activate e2e_ci | |
cd ../pytorch | |
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton" | |
TRITON_PINNED_COMMIT=$(cat .ci/docker/ci_commit_pins/triton-xpu.txt) | |
echo ${TRITON_REPO}@${TRITON_PINNED_COMMIT} | |
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python" | |
- name: Build Pytorch XPU | |
run: | | |
source activate e2e_ci | |
cd ../pytorch | |
pip install -r requirements.txt | |
export USE_XPU=1 | |
source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh | |
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} | |
python setup.py bdist_wheel | |
pip install --force-reinstall dist/*.whl | |
- name: Identify pinned versions | |
run: | | |
cd ../pytorch | |
echo "TRITON_COMMIT_ID=$(<.ci/docker/ci_commit_pins/triton-xpu.txt)" >> "${GITHUB_ENV}" | |
echo "TORCHVISION_COMMIT_ID=$(<.github/ci_commit_pins/vision.txt)" >> "${GITHUB_ENV}" | |
echo "TORCHBENCH_COMMIT_ID=$(<third_party/torch-xpu-ops/.github/ci_commit_pins/torchbench.txt)" >> "${GITHUB_ENV}" | |
echo "TORCHAUDIO_COMMIT_ID=$(<.github/ci_commit_pins/audio.txt)" >> "${GITHUB_ENV}" | |
echo "TRANSFORMERS_VERSION=$(<.ci/docker/ci_commit_pins/huggingface.txt)" >> "${GITHUB_ENV}" | |
echo "TIMM_COMMIT_ID=$(<.ci/docker/ci_commit_pins/timm.txt)" >> "${GITHUB_ENV}" | |
- name: Show GITHUB_ENV | |
run: | | |
echo "$GITHUB_ENV" | |
rm -rf ../pytorch/inductor_log | |
rm -rf /tmp/torchinductor_* | |
- name: Huggingface BF16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Huggingface FP16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: huggingface | |
dt: float16 | |
mode: training | |
scenario: accuracy | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Timm_models BF16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: timm_models | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Torchbench BF16 Training Accuracy Test | |
uses: ./.github/actions/inductor-xpu-e2e-test | |
with: | |
suite: torchbench | |
dt: bfloat16 | |
mode: training | |
scenario: accuracy | |
env_prepare: true | |
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} | |
- name: Summarize archieve files | |
if: ${{ ! cancelled() }} | |
run: | | |
rm -rf ${{ github.workspace }}/upload_files | |
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files | |
failed_case=$(grep "Real failed: models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) | |
if [ ${failed_case} -ne 0 ];then | |
grep -E "Real failed: models: [1-9]|Summary for" ${{ github.workspace }}/summary_accuracy.log | |
exit 1 | |
fi | |
- name: Upload Inductor XPU E2E Data | |
if: ${{ ! cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }} | |
path: ${{ github.workspace }}/upload_files |