Inductor E2E Nightly Tests #22
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Inductor E2E Nightly Tests | |
on: | |
workflow_dispatch: | |
inputs: | |
torchrepo: | |
description: 'torchrepo' | |
required: true | |
default: 'https://github.com/pytorch/pytorch.git' | |
torchbranch: | |
description: 'torchbranch' | |
required: true | |
default: 'v2.0.1' | |
torchcommit: | |
description: 'torchcommit' | |
required: true | |
default: 'e9ebda29d87ce0916ab08c06ab26fd3766a870e5' | |
ipexrepo: | |
description: 'ipexrepo' | |
required: true | |
default: 'https://github.com/intel/intel-extension-for-pytorch.git' | |
ipexbranch: | |
description: 'ipexbranch' | |
required: true | |
default: 'xpu-master' | |
ipexcommit: | |
description: 'ipexcommit' | |
required: true | |
default: '4af80f77740ed939be78eba28ae36951823f335c' | |
oneapi: | |
description: 'oneAPI basekit version' | |
required: true | |
default: '2023.2.0' | |
schedule: | |
- cron: "0 14 * * *" # run at 2 PM UTC | |
jobs: | |
Tests-Env-Prepare: | |
runs-on: [self-hosted, PVC_E2E] | |
steps: | |
- name: Create conda environment | |
run: | | |
source ${HOME}/miniconda3/bin/activate triton-nightly-test | |
conda install -y astunparse numpy ninja pyyaml setuptools cmake cffi typing_extensions future six requests dataclasses mkl mkl-include | |
conda install -y -c conda-forge libstdcxx-ng | |
- name: Triton source code prepare | |
run: | | |
source ${HOME}/miniconda3/bin/activate triton-nightly-test | |
cd ${HOME}/triton-nightly | |
rm -rf triton | |
git clone https://github.com/openai/triton triton | |
cd triton | |
triton_commit=`git rev-parse HEAD` | |
echo "triton_commit: ${triton_commit}" | tee sw_info.log | |
git submodule sync | |
git submodule update --init --recursive --jobs 0 | |
cd third_party/intel_xpu_backend | |
git checkout main && git pull | |
- name: Install Dependency | |
run: | | |
python --version | |
source ${HOME}/miniconda3/bin/activate triton-nightly-test | |
python --version | |
pip install setuptools cython numpy wheel scikit-build scipy | |
pip install psutil cpuid | |
cd ${HOME}/triton-nightly | |
cp triton/third_party/intel_xpu_backend/.github/scripts/env_prepare.sh . | |
cp triton/third_party/intel_xpu_backend/.github/scripts/env_triton.sh ${HOME}/ | |
cp -r triton/third_party/intel_xpu_backend/.github/patches/ . | |
bash env_prepare.sh triton-nightly \ | |
${{ github.event.inputs.torchrepo }} \ | |
${{ github.event.inputs.torchbranch }} \ | |
${{ github.event.inputs.torchcommit }} \ | |
${{ github.event.inputs.ipexrepo }} \ | |
${{ github.event.inputs.ipexbranch }} \ | |
${{ github.event.inputs.ipexcommit }} \ | |
${{ github.event.inputs.oneapi }} | |
source ${HOME}/env_triton.sh ${{ github.event.inputs.oneapi }} | |
python -c "import torch;import intel_extension_for_pytorch" | |
if [ ${PIPESTATUS[0]} -ne 0 ]; then | |
echo -e "[ERROR] Public-torch or IPEX BUILD FAIL" | |
exit 1 | |
fi | |
- name: Build Triton | |
shell: bash | |
run: | | |
source ${HOME}/miniconda3/bin/activate triton-nightly-test | |
pip uninstall -y triton | |
sudo update-ca-certificates --fresh | |
export SSL_CERT_DIR=/etc/ssl/certs | |
pip install pybind11 | |
cd ${HOME}/triton-nightly/triton/python | |
python setup.py clean | |
TRITON_CODEGEN_INTEL_XPU_BACKEND=1 python setup.py bdist_wheel | |
pip install dist/*.whl | |
cd ${HOME}/triton-nightly | |
source ${HOME}/env_triton.sh ${{ github.event.inputs.oneapi }} | |
python -c "import triton" | |
if [ ${PIPESTATUS[0]} -ne 0 ]; then | |
echo -e "[ERROR] Triton BUILD FAIL" | |
exit 1 | |
fi | |
- name: Prepare Benchmark | |
run: | | |
cp ${HOME}/triton-nightly/triton/third_party/intel_xpu_backend/.github/scripts/inductor_xpu_test.sh ${HOME}/triton-nightly/pytorch | |
cp ${HOME}/triton-nightly/triton/third_party/intel_xpu_backend/.github/scripts/inductor_perf_summary.py ${HOME}/triton-nightly/pytorch | |
pip install styleFrame scipy pandas | |
Accuracy-Test: | |
needs: Tests-Env-Prepare | |
runs-on: [self-hosted, PVC_E2E] | |
steps: | |
- name: ACC Test for triton on PVC | |
run: | | |
echo -e "[ INFO ] Run E2E Acc test on Node $(hostname)" | |
source ${HOME}/miniconda3/bin/activate triton-nightly-test | |
source ${HOME}/env_triton.sh ${{ github.event.inputs.oneapi }} | |
cd ${HOME}/triton-nightly | |
bash set_proxy.sh | |
cd ${HOME}/triton-nightly/pytorch | |
rm -rf inductor_log | |
bash inductor_xpu_test.sh huggingface amp_bf16 inference accuracy xpu 0 & \ | |
bash inductor_xpu_test.sh huggingface amp_bf16 training accuracy xpu 1 & \ | |
bash inductor_xpu_test.sh huggingface amp_fp16 inference accuracy xpu 2 & \ | |
bash inductor_xpu_test.sh huggingface amp_fp16 training accuracy xpu 3 & wait | |
cp ${HOME}/triton-nightly/triton/sw_info.log inductor_log/ | |
- name: ACC Test Results Overview | |
run: | | |
cd ${HOME}/triton-nightly/pytorch/inductor_log/huggingface | |
cd amp_bf16 | |
echo -e "============ Acc Check for HF amp_bf16 ============" | tee -a ./e2e_summary.log | |
csv_lines_inf=$(cat inductor_huggingface_amp_bf16_inference_xpu_accuracy.csv | wc -l) | |
let num_total_amp_bf16=csv_lines_inf-1 | |
num_passed_amp_bf16_inf=$(grep "pass" inductor_huggingface_amp_bf16_inference_xpu_accuracy.csv | wc -l) | |
let num_failed_amp_bf16_inf=num_total_amp_bf16-num_passed_amp_bf16_inf | |
amp_bf16_inf_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_bf16_inf'/'$num_total_amp_bf16')*100}'` | |
echo "num_total_amp_bf16: $num_total_amp_bf16" | tee -a ./e2e_summary.log | |
echo "num_passed_amp_bf16_inf: $num_passed_amp_bf16_inf" | tee -a ./e2e_summary.log | |
echo "num_failed_amp_bf16_inf: $num_failed_amp_bf16_inf" | tee -a ./e2e_summary.log | |
echo "amp_bf16_inf_acc_pass_rate: $amp_bf16_inf_acc_pass_rate" | tee -a ./e2e_summary.log | |
num_passed_amp_bf16_tra=$(grep "pass" inductor_huggingface_amp_bf16_training_xpu_accuracy.csv | wc -l) | |
let num_failed_amp_bf16_tra=num_total_amp_bf16-num_passed_amp_bf16_tra | |
amp_bf16_tra_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_bf16_tra'/'$num_total_amp_bf16')*100}'` | |
echo "num_passed_amp_bf16_tra: $num_passed_amp_bf16_tra" | tee -a ./e2e_summary.log | |
echo "num_failed_amp_bf16_tra: $num_failed_amp_bf16_tra" | tee -a ./e2e_summary.log | |
echo "amp_bf16_tra_acc_pass_rate: $amp_bf16_tra_acc_pass_rate" | tee -a ./e2e_summary.log | |
cd ../amp_fp16 | |
echo -e "============ Acc Check for HF amp_fp16 ============" | tee -a ./e2e_summary.log | |
csv_lines_inf=$(cat inductor_huggingface_amp_fp16_inference_xpu_accuracy.csv | wc -l) | |
let num_total_amp_fp16=csv_lines_inf-1 | |
num_passed_amp_fp16_inf=$(grep "pass" inductor_huggingface_amp_fp16_inference_xpu_accuracy.csv | wc -l) | |
let num_failed_amp_fp16_inf=num_total_amp_fp16-num_passed_amp_fp16_inf | |
amp_fp16_inf_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_fp16_inf'/'$num_total_amp_fp16')*100}'` | |
echo "num_total_amp_fp16: $num_total_amp_fp16" | tee -a ./e2e_summary.log | |
echo "num_passed_amp_fp16_inf: $num_passed_amp_fp16_inf" | tee -a ./e2e_summary.log | |
echo "num_failed_amp_fp16_inf: $num_failed_amp_fp16_inf" | tee -a ./e2e_summary.log | |
echo "amp_fp16_inf_acc_pass_rate: $amp_fp16_inf_acc_pass_rate" | tee -a ./e2e_summary.log | |
num_passed_amp_fp16_tra=$(grep "pass" inductor_huggingface_amp_fp16_training_xpu_accuracy.csv | wc -l) | |
let num_failed_amp_fp16_tra=num_total_amp_fp16-num_passed_amp_fp16_tra | |
amp_fp16_tra_acc_pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed_amp_fp16_tra'/'$num_total_amp_fp16')*100}'` | |
echo "num_passed_amp_fp16_tra: $num_passed_amp_fp16_tra" | tee -a ./e2e_summary.log | |
echo "num_failed_amp_fp16_tra: $num_failed_amp_fp16_tra" | tee -a ./e2e_summary.log | |
echo "amp_fp16_tra_acc_pass_rate: $amp_fp16_tra_acc_pass_rate" | tee -a ./e2e_summary.log | |
- name: Upload Triton Inductor E2E Nightly Data | |
uses: actions/upload-artifact@v3 | |
with: | |
name: Triton-Inductor-E2E-Nightly-Data | |
path: /home/gta/triton-nightly/pytorch/inductor_log/ | |
- name: Test Results Check | |
run: | | |
cd ${HOME}/triton-nightly/pytorch/inductor_log/huggingface | |
cd amp_bf16 | |
num_passed_amp_bf16_inf=$(grep "num_passed_amp_bf16_inf:" e2e_summary.log | sed -e 's/.*://;s/[^0-9.]//') | |
if [ $num_passed_amp_bf16_inf -lt 45 ]; then | |
echo -e "[ERROR] Inductor E2E Nightly test for HF amp_bf16 inference passed_num < 45" | |
exit 1 | |
fi | |
num_passed_amp_bf16_tra=$(grep "num_passed_amp_bf16_tra:" e2e_summary.log | sed -e 's/.*://;s/[^0-9.]//') | |
if [ $num_passed_amp_bf16_tra -lt 42 ]; then | |
echo -e "[ERROR] Inductor E2E Nightly test for HF amp_bf16 training passed_num < 42" | |
exit 1 | |
fi | |
cd ../amp_fp16 | |
num_passed_amp_fp16_inf=$(grep "num_passed_amp_fp16_inf:" e2e_summary.log | sed -e 's/.*://;s/[^0-9.]//') | |
if [ $num_passed_amp_fp16_inf -lt 45 ]; then | |
echo -e "[ERROR] Inductor E2E Nightly test for HF amp_fp16 inference passed_num < 45" | |
exit 1 | |
fi | |
num_passed_amp_fp16_tra=$(grep "num_passed_amp_fp16_tra:" e2e_summary.log | sed -e 's/.*://;s/[^0-9.]//') | |
if [ $num_passed_amp_fp16_tra -lt 42 ]; then | |
echo -e "[ERROR] Inductor E2E Nightly test for HF amp_fp16 training passed_num < 42" | |
exit 1 | |
fi | |
Performance-Test: | |
needs: Accuracy-Test | |
runs-on: [self-hosted, PVC_E2E] | |
steps: | |
- name: Perf Test for triton on PVC | |
run: | | |
echo -e "[ INFO ] Run E2E Perf test on Node $(hostname)" | |
source ${HOME}/miniconda3/bin/activate triton-nightly-test | |
source ${HOME}/env_triton.sh | |
cd ${HOME}/triton-nightly | |
bash set_proxy.sh | |
cd ${HOME}/triton-nightly/pytorch | |
bash inductor_xpu_test.sh huggingface amp_bf16 inference performance xpu 0 & \ | |
bash inductor_xpu_test.sh huggingface amp_bf16 training performance xpu 1 & \ | |
bash inductor_xpu_test.sh huggingface amp_fp16 inference performance xpu 2 & \ | |
bash inductor_xpu_test.sh huggingface amp_fp16 training performance xpu 3 & wait | |
- name: Perf Test Results Generate and Overview | |
run: | | |
cd ${HOME}/triton-nightly/pytorch | |
python inductor_perf_summary.py -s huggingface -p amp_bf16 amp_fp16 | |
- name: Upload Triton Inductor E2E Nightly Data | |
uses: actions/upload-artifact@v3 | |
with: | |
name: Triton-Inductor-E2E-Nightly-Data | |
path: /home/gta/triton-nightly/pytorch/inductor_log/ |