Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
mengfei25 committed Oct 8, 2024
1 parent 9a42950 commit dc2398d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 408 deletions.
134 changes: 6 additions & 128 deletions .github/actions/inductor-xpu-e2e-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,138 +43,16 @@ inputs:
default: 'lts'
description: Driver lts/rolling

env:
HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}

runs:
using: composite
steps:
- name: Prepare ENV
if: ${{ inputs.env_prepare }}
shell: bash
run: |
source activate e2e_ci
source .github/scripts/env.sh
if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git
cd audio && git checkout $TORCHAUDIO_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchaudio -y && pip install dist/*.whl
cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
cd vision && git checkout $TORCHVISION_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
fi
cd ../ && python -c "import torch, torchvision, torchaudio"
rm -rf benchmark && git clone https://github.com/pytorch/benchmark.git
cd benchmark && git checkout $TORCHBENCH_COMMIT_ID && pip install --no-deps -r requirements.txt
python install.py --continue_on_fail
# deps for torchrec_dlrm
pip install pyre_extensions
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
pip install torchmetrics==1.0.3
pip install torchrec --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu
fi
if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then
pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION}
fi
if [[ ${{ inputs.suite }} == *"timm_models"* ]]; then
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
cd ../ && rm -rf vision && git clone --single-branch -b main https://github.com/pytorch/vision.git
cd vision && git checkout $TORCHVISION_COMMIT_ID
python setup.py bdist_wheel && pip uninstall torchvision -y && pip install dist/*.whl
fi
# install timm without dependencies
pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@$TIMM_COMMIT_ID
# install timm dependencies without torch and torchvision
pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/$TIMM_COMMIT_ID/requirements.txt | grep -vE torch)
fi
pip install numpy==1.26.4
- name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
shell: bash
env:
HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
run: |
source activate e2e_ci
source .github/scripts/env.sh
cp .github/scripts/inductor_xpu_test.sh ../pytorch
cd ../pytorch
# check param
function contains() {
contains_status="echo 'Start $2 ...'"
{
[[ $1 =~ (^|,)$2($|,) ]]
} || {
echo "[Warning] $2 is not suppotted type! Skipped!"
contains_status="continue"
}
}
set -xe
for suite in $(echo ${{ inputs.suite }} |sed 's/,/ /g')
do
contains "huggingface,timm_models,torchbench" $suite
$contains_status
for dt in $(echo ${{ inputs.dt }} |sed 's/,/ /g')
do
contains "float32,bfloat16,float16,amp_bf16,amp_fp16" $dt
$contains_status
for mode in $(echo ${{ inputs.mode }} |sed 's/,/ /g')
do
contains "inference,training" $mode
$contains_status
for scenario in $(echo ${{ inputs.scenario }} |sed 's/,/ /g')
do
contains "accuracy,performance" $scenario
$contains_status
if [ "${MODEL_ONLY_NAME}" == "" ];then
xpu_list=($(xpu-smi discovery |grep 'DRM Device: /dev/' |sed 's/.*card//;s/[^0-9].*//' |awk '{print $1 - 1":"NR - 1}'))
for xpu_id in ${xpu_list[*]}
do
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id/:*} static ${#xpu_list[*]} ${xpu_id/*:} &
done
else
bash inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${MODEL_ONLY_NAME} &
fi
wait
# summarize pass rate
LOG_DIR="inductor_log/${suite}/${dt}"
LOG_NAME=inductor_${suite}_${dt}_${mode}_xpu_${scenario}_all.log
rm -f ${LOG_DIR}/${LOG_NAME}
find ${LOG_DIR}/ -name "inductor_${suite}_${dt}_${mode}_xpu_${scenario}_card*.log" |xargs cat >> ${LOG_DIR}/${LOG_NAME} 2>&1
if [ "${scenario}" == "accuracy" ];then
python ../torch-xpu-ops/.github/ci_expected_accuracy/check_expected.py \
--suite $suite \
--mode $mode \
--dtype $dt \
--csv_file ${LOG_DIR}/inductor_${suite}_${dt}_${mode}_xpu_${scenario}.csv \
2>&1 |tee -a inductor_log/summary_accuracy.log
fi
done
done
done
done
- name: Summary E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
shell: bash
env:
HUGGING_FACE_HUB_TOKEN: ${{ inputs.hf_token }}
run: |
cd ../pytorch
rm -f inductor_log/summary_accuracy.csv
for var in $(find inductor_log/ -name "inductor_*_xpu_accuracy.csv")
do
sed -i "s/$/,$(basename $var)/" $var
cat $var >> inductor_log/summary_accuracy.csv
done
source activate e2e_ci
cd ${{ github.workspace }}
cp .github/scripts/inductor_summary.py ../pytorch
cd ../pytorch
pip install styleFrame scipy pandas
set -xe
dt=$(echo ${{ inputs.dt }} |sed 's/,/ /g')
mode=$(echo ${{ inputs.mode }} |sed 's/,/ /g')
suite=$(echo ${{ inputs.suite }} |sed 's/,/ /g')
scenario=$(echo ${{ inputs.scenario }} |sed 's/,/ /g')
python inductor_summary.py -p ${dt} -s ${suite} -m ${mode} -sc ${scenario}
printenv
Loading

0 comments on commit dc2398d

Please sign in to comment.