-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CI] Extract torch build as a standalone job (#1271)
Extract PyTorch build as a standalone job for CI/CD. Add PyTorch build fallback mechanism to last known good commit tracked in #1280, and add a comment for new build failure. 1. created issue #1280 to track latest workable pytorch main commit 2. ci test always pull latest pytorch main with torch-xpu-ops PR branch to build, success goto 3, failed goto 4 3. update commit in 1, continue to do ci tests 4. add comment on issue #1280 with issued pytorch commit, and read the commit in 1 and rebuild the pytorch, continue to do ci tests
- Loading branch information
1 parent
69ff73f
commit 299831d
Showing
5 changed files
with
356 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
name: Linux PyTorch XPU Build | ||
|
||
on: | ||
workflow_call: | ||
inputs: | ||
pytorch: | ||
required: false | ||
type: string | ||
default: 'main' | ||
description: Pytorch branch/commit | ||
keep_torch_xpu_ops: | ||
required: false | ||
type: string | ||
default: 'false' | ||
description: Keep torch-xpu-ops pin. `true` means use pined commit | ||
abi: | ||
required: false | ||
type: string | ||
default: 1 | ||
description: ABI version. Default abi as 1. | ||
python: | ||
required: false | ||
type: string | ||
default: '3.10' | ||
description: Python version | ||
runner: | ||
required: true | ||
type: string | ||
default: 'linux.idc.xpu' | ||
description: Runner label | ||
driver: | ||
required: false | ||
type: string | ||
default: 'lts' | ||
description: Driver lts/rolling | ||
outputs: | ||
whl_name: | ||
description: The name of the wheel file | ||
value: ${{ jobs.Torch-XPU-Build.outputs.whl_name }} | ||
torch_commit_id: | ||
description: The commit id of the torch build | ||
value: ${{ jobs.Torch-XPU-Build.outputs.TORCH_COMMIT_ID }} | ||
|
||
permissions: | ||
issues: write | ||
|
||
jobs: | ||
build: | ||
if: ${{ inputs.pytorch }} != 'nightly_wheel' | ||
runs-on: ${{ inputs.runner }} | ||
outputs: | ||
TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }} | ||
timeout-minutes: 900 | ||
env: | ||
commit_issue: 1280 | ||
GH_TOKEN: ${{ github.token }} | ||
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }} | ||
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }} | ||
steps: | ||
- name: Checkout torch-xpu-ops | ||
uses: actions/checkout@v4 | ||
- name: Prepare Stock Pytorch | ||
run: | | ||
pwd | ||
which conda && conda clean -ay | ||
conda remove --all -y -n xpu_build || \ | ||
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_build | ||
conda create -n xpu_build python=${{ inputs.python }} cmake ninja -y | ||
source activate xpu_build | ||
cd ../ && rm -rf pytorch | ||
pip install requests | ||
git clone https://github.com/pytorch/pytorch pytorch | ||
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then | ||
cd pytorch && git checkout $(echo ${{ inputs.pytorch }}) | ||
# apply PRs for stock pytorch | ||
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py | ||
git status && git show -s | ||
git submodule sync && git submodule update --init --recursive | ||
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then | ||
echo "Don't replace torch-xpu-ops!" | ||
else | ||
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ | ||
# Workaround for torch-xpu-ops ci test | ||
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt | ||
fi | ||
fi | ||
- name: Build Pytorch XPU | ||
run: | | ||
source activate xpu_build | ||
source .github/scripts/env.sh ${{ inputs.pytorch }} | ||
pip install mkl-static==2025.0.1 mkl-include==2025.0.1 | ||
if [[ ${{ inputs.abi }} == '0' ]]; then | ||
export _GLIBCXX_USE_CXX11_ABI=0 | ||
else | ||
export _GLIBCXX_USE_CXX11_ABI=1 | ||
fi | ||
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then | ||
build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | ||
repo="${{ github.repository }}" | ||
last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1) | ||
cd ../pytorch | ||
current_commit=$(git rev-parse HEAD) | ||
echo ">>>>>>>>>>>>branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}" | ||
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} | ||
pip install -r requirements.txt | ||
WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log | ||
if [ -f dist/torch*.whl ]; then | ||
echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" | ||
gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};" > new_body.txt | ||
gh --repo $repo issue edit $commit_issue --body-file new_body.txt | ||
else | ||
echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" | ||
gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25" | ||
git clean -df . | ||
git checkout $last_commit | ||
# apply PRs for stock pytorch | ||
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py | ||
git status && git show -s | ||
git submodule sync && git submodule update --init --recursive | ||
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then | ||
echo "Don't replace torch-xpu-ops!" | ||
else | ||
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ | ||
# Workaround for torch-xpu-ops ci test | ||
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt | ||
fi | ||
WERROR=1 python setup.py bdist_wheel | ||
fi | ||
pip install --force-reinstall dist/*.whl | ||
cp dist/*.whl ${{ github.workspace }}/ | ||
cp pytorch_${current_commit}_build.log ${{ github.workspace }}/ | ||
else | ||
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu | ||
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)') | ||
cd ../pytorch | ||
git reset --hard && git checkout ${TORCH_COMMIT_ID} | ||
TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt) | ||
rm -rf third_party/torch-xpu-ops | ||
git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops | ||
cd third_party/torch-xpu-ops | ||
git checkout ${TORCH_XPU_OPS_COMMIT} | ||
cd ../.. | ||
fi | ||
- name: Torch Config | ||
run: | | ||
source activate xpu_build | ||
source .github/scripts/env.sh ${{ inputs.pytorch }} | ||
python -c "import torch; print(torch.__config__.show())" | ||
python -c "import torch; print(torch.__config__.parallel_info())" | ||
python -c "import torch; print(torch.__config__.torch.xpu.device_count())" | ||
cd .. | ||
python pytorch/torch/utils/collect_env.py | ||
- name: Identify Build version | ||
id: build_version | ||
run: | | ||
source .github/scripts/env.sh | ||
cd ../pytorch | ||
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | ||
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | ||
echo "DRIVER_VERSION=$(dkms status 2>&1 |grep 'intel-i915-dkms' |sed 's/.*\///;s/,.*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | ||
echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | ||
echo "BUNDLE_VERSION=$(dpcpp --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | ||
. /etc/os-release | ||
echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | ||
echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" | ||
echo ${GITHUB_ENV} | ||
- name: Upload Torch XPU Wheel | ||
if: always() | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }} | ||
path: ${{ github.workspace }}/torch*.whl | ||
- name: Upload Build Log | ||
if: always() | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: Torch-XPU-Build-Log-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }} | ||
path: ${{ github.workspace }}/pytorch_*.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.