diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index f4840f92b..bcbcfbedd 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -81,7 +81,6 @@ runs: cp .github/scripts/inductor_xpu_test.sh ../pytorch cd ../pytorch source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh - rm -f ${{ github.workspace }}/summary_accuracy.log # check param function contains() { contains_status="echo 'Start $2 ...'" @@ -130,7 +129,7 @@ runs: --mode $mode \ --dtype $dt \ --csv_file ${LOG_DIR}/inductor_${suite}_${dt}_${mode}_xpu_${scenario}.csv \ - 2>&1 |tee -a ${{ github.workspace }}/summary_accuracy.log + 2>&1 |tee -a inductor_log/summary_accuracy.log fi done done @@ -150,7 +149,6 @@ runs: sed -i "s/$/,$(basename $var)/" $var cat $var >> inductor_log/summary_accuracy.csv done - cat ${{ github.workspace }}/summary_accuracy.log |tee -a inductor_log/summary_accuracy.log # check param function contains() { diff --git a/.github/ci_expected_accuracy/check_expected.py b/.github/ci_expected_accuracy/check_expected.py index 5339f4ce3..48543c930 100644 --- a/.github/ci_expected_accuracy/check_expected.py +++ b/.github/ci_expected_accuracy/check_expected.py @@ -36,12 +36,15 @@ new_models = [] new_pass_models = [] lost_models = [] +timeout_models = [] for model_name in model_names: # for index, row in refer_data.iterrows(): test_row = next(([i, line] for i, line in test_data.iterrows() if line["name"] == model_name), "N/A") refer_row = next(([i, line] for i, line in refer_data.iterrows() if line["name"] == model_name), "N/A") test_accuracy = test_row[1]["accuracy"] if test_row != "N/A" else "N/A" refer_accuracy = refer_row[1][args.dtype] if refer_row != "N/A" else "N/A" + test_accuracy = str(test_accuracy) + refer_accuracy = str(refer_accuracy) if test_accuracy == "N/A": lost_models.append([model_name, test_accuracy]) elif 'pass' in test_accuracy: @@ -54,6 +57,13 @@ elif 'pass' not in refer_accuracy: new_pass_models.append([model_name, test_accuracy]) refer_data.at[refer_row[0], args.dtype] = test_accuracy + elif 'timeout' in test_accuracy: + timeout_models.append([model_name, test_accuracy]) + if refer_accuracy == "N/A": + new_models.append([model_name, test_accuracy]) + refer_data.loc[len(refer_data),:] = "N/A" + refer_data.at[len(refer_data) - 1, "name"] = model_name + refer_data.at[len(refer_data) - 1, args.dtype] = test_accuracy else: if refer_accuracy == "N/A": new_models.append([model_name, test_accuracy]) @@ -72,8 +82,9 @@ print("============ Summary for {} {} {} accuracy ============".format(args.suite, args.dtype, args.mode)) print("Total models:", len(model_names)) print("Passed models:", len(passed_models)) -print("Real failed: models:", len(real_failed_models), real_failed_models) -print("Expected failed: models:", len(expected_failed_models), expected_failed_models) +print("Real failed models:", len(real_failed_models), real_failed_models) +print("Expected failed models:", len(expected_failed_models), expected_failed_models) +print("Warning timeout models:", len(timeout_models), timeout_models) print("New models:", len(new_models), new_models) print("Failed to passed models:", len(new_pass_models), new_pass_models) print("Not run/in models:", len(lost_models), lost_models) diff --git a/.github/ci_expected_accuracy/inductor_timm_models_training.csv b/.github/ci_expected_accuracy/inductor_timm_models_training.csv index df939e7db..c605c9bdf 100644 --- a/.github/ci_expected_accuracy/inductor_timm_models_training.csv +++ b/.github/ci_expected_accuracy/inductor_timm_models_training.csv @@ -1,62 +1,62 @@ name,float32,bfloat16,float16,amp_bf16,amp_fp16 -adv_inception_v3,pass,pass,fail_accuracy,pass,pass +adv_inception_v3,pass,pass,pass,pass,pass beit_base_patch16_224,pass,pass,pass,pass,pass -botnet26t_256,pass,pass,fail_accuracy,pass,pass +botnet26t_256,pass,pass,pass,pass,pass cait_m36_384,pass,pass,pass,pass,pass coat_lite_mini,pass,pass,pass,pass,pass convit_base,pass,pass,pass,pass,pass convmixer_768_32,pass,pass,pass,pass,pass -convnext_base,pass,pass,pass,pass,pass +convnext_base,pass,pass,fail_accuracy,pass,pass crossvit_9_240,pass,pass,pass,pass,pass -cspdarknet53,pass,pass,fail_accuracy,pass,pass +cspdarknet53,pass,pass,pass,pass,pass deit_base_distilled_patch16_224,pass,pass,pass,pass,pass -dla102,pass,pass,fail_accuracy,pass,pass +dla102,pass,pass,pass,pass,pass dm_nfnet_f0,pass,pass,pass,pass,pass -dpn107,pass,pass,fail_accuracy,pass,pass -eca_botnext26ts_256,pass,pass,fail_accuracy,pass,pass -eca_halonext26ts,pass,pass,fail_accuracy,pass,pass -ese_vovnet19b_dw,pass,pass,fail_accuracy,pass,pass -fbnetc_100,pass,pass,fail_accuracy,pass,pass -fbnetv3_b,pass,pass,fail_accuracy,pass,pass -gernet_l,pass,pass,fail_accuracy,pass,pass -ghostnet_100,pass,pass,fail_accuracy,pass,pass -gluon_inception_v3,pass,pass,fail_accuracy,pass,pass +dpn107,pass,pass,pass,pass,pass +eca_botnext26ts_256,pass,pass,pass,pass,pass +eca_halonext26ts,pass,pass,fail_to_run,pass,pass +ese_vovnet19b_dw,pass,pass,pass,pass,pass +fbnetc_100,pass,pass,pass,pass,pass +fbnetv3_b,pass,pass,pass,pass,pass +gernet_l,pass,pass,pass,pass,pass +ghostnet_100,pass,pass,pass,pass,pass +gluon_inception_v3,pass,pass,pass,pass,pass gmixer_24_224,pass,pass,pass,pass,pass gmlp_s16_224,pass,pass,pass,pass,pass -hrnet_w18,pass,pass,fail_accuracy,pass,pass -inception_v3,pass,pass,fail_accuracy,pass,pass -jx_nest_base,pass,pass,pass,pass,pass +hrnet_w18,pass,pass,pass,pass,pass +inception_v3,pass,pass,pass,pass,pass +jx_nest_base,pass,pass,fail_accuracy,pass,pass lcnet_050,pass,pass,fail_accuracy,pass,pass levit_128,pass,pass,pass,pass,pass mixer_b16_224,pass,pass,pass,pass,pass -mixnet_l,pass,pass,fail_accuracy,pass,pass -mnasnet_100,pass,pass,fail_accuracy,pass,pass -mobilenetv2_100,pass,pass,fail_accuracy,pass,pass -mobilenetv3_large_100,pass,pass,fail_accuracy,pass,pass -mobilevit_s,pass,pass,fail_accuracy,pass,pass +mixnet_l,pass,pass,pass,pass,pass +mnasnet_100,pass,pass,pass,pass,pass +mobilenetv2_100,pass,pass,pass,pass,pass +mobilenetv3_large_100,pass,pass,pass,pass,pass +mobilevit_s,pass,pass,pass,pass,pass nfnet_l0,pass,pass,pass,pass,pass pit_b_224,pass,pass,pass,pass,pass -pnasnet5large,pass,pass,pass,pass,fail_accuracy +pnasnet5large,pass,pass,pass,pass,pass poolformer_m36,pass,pass,pass,pass,pass -regnety_002,pass,pass,fail_accuracy,pass,pass -repvgg_a2,pass,pass,fail_accuracy,pass,pass -res2net101_26w_4s,pass,pass,fail_accuracy,pass,pass -res2net50_14w_8s,pass,pass,fail_accuracy,pass,pass -res2next50,pass,pass,fail_accuracy,pass,pass +regnety_002,pass,pass,pass,pass,pass +repvgg_a2,pass,pass,pass,pass,pass +res2net101_26w_4s,pass,pass,pass,pass,pass +res2net50_14w_8s,pass,pass,pass,pass,pass +res2next50,pass,pass,pass,pass,pass resmlp_12_224,pass,pass,pass,pass,pass -resnest101e,pass,pass,fail_accuracy,pass,pass -rexnet_100,pass,pass,fail_accuracy,pass,pass -sebotnet33ts_256,pass,pass,fail_accuracy,pass,pass -selecsls42b,pass,pass,fail_accuracy,pass,pass -spnasnet_100,pass,pass,fail_accuracy,pass,pass +resnest101e,pass,pass,pass,pass,pass +rexnet_100,pass,pass,pass,pass,pass +sebotnet33ts_256,pass,pass,pass,pass,pass +selecsls42b,pass,pass,pass,pass,pass +spnasnet_100,pass,pass,pass,pass,pass swin_base_patch4_window7_224,pass,pass,pass,pass,pass -swsl_resnext101_32x16d,pass,pass,fail_accuracy,pass,pass -tf_efficientnet_b0,pass,pass,fail_accuracy,pass,pass -tf_mixnet_l,pass,pass,fail_accuracy,pass,pass -tinynet_a,pass,pass,fail_accuracy,pass,pass +swsl_resnext101_32x16d,pass,pass,pass,pass,pass +tf_efficientnet_b0,pass,pass,pass,pass,pass +tf_mixnet_l,pass,pass,pass,pass,pass +tinynet_a,pass,pass,pass,pass,pass tnt_s_patch16_224,pass,pass,pass,pass,pass twins_pcpvt_base,pass,pass,pass,pass,pass visformer_small,pass,pass,pass,pass,pass vit_base_patch16_224,pass,pass,pass,pass,pass -volo_d1_224,pass,pass,fail_accuracy,pass,pass +volo_d1_224,pass,pass,pass,pass,pass xcit_large_24_p8_224,pass_due_to_skip,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run diff --git a/.github/ci_expected_accuracy/inductor_torchbench_inference.csv b/.github/ci_expected_accuracy/inductor_torchbench_inference.csv index 7daebdbd4..4825aa41f 100644 --- a/.github/ci_expected_accuracy/inductor_torchbench_inference.csv +++ b/.github/ci_expected_accuracy/inductor_torchbench_inference.csv @@ -1,11 +1,11 @@ name,float32,bfloat16,float16,amp_bf16,amp_fp16 torchrec_dlrm,pass,eager_fail_to_run,eager_fail_to_run,pass,pass BERT_pytorch,pass,pass,pass,pass,pass -Background_Matting,pass_due_to_skip,pass_due_to_skip,eager_fail_to_run,pass_due_to_skip,eager_fail_to_run +Background_Matting,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip DALLE2_pytorch,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run LearningToPaint,pass,pass,pass,pass,pass Super_SloMo,pass,pass,pass,pass,pass -alexnet,eager_two_runs_differ,pass,pass,pass,eager_two_runs_differ +alexnet,pass,pass,pass,pass,pass basic_gnn_edgecnn,pass,pass,pass,pass,pass basic_gnn_gcn,pass,pass,pass,pass,pass basic_gnn_gin,pass,pass,pass,pass,pass @@ -21,11 +21,11 @@ detectron2_fasterrcnn_r_50_c4,pass,eager_fail_to_run,fail_accuracy,eager_fail_to detectron2_fasterrcnn_r_50_dc5,pass,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy detectron2_fasterrcnn_r_50_fpn,pass,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy detectron2_fcos_r_50_fpn,pass,pass,pass,pass,pass -detectron2_maskrcnn,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run -detectron2_maskrcnn_r_101_c4,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run -detectron2_maskrcnn_r_101_fpn,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run -detectron2_maskrcnn_r_50_c4,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run -detectron2_maskrcnn_r_50_fpn,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run +detectron2_maskrcnn,fail_to_run,eager_fail_to_run,fail_to_run,eager_fail_to_run,fail_to_run +detectron2_maskrcnn_r_101_c4,fail_accuracy,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy +detectron2_maskrcnn_r_101_fpn,fail_accuracy,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy +detectron2_maskrcnn_r_50_c4,pass,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy +detectron2_maskrcnn_r_50_fpn,fail_accuracy,eager_fail_to_run,fail_accuracy,eager_fail_to_run,fail_accuracy dlrm,pass,pass,pass,pass,pass doctr_det_predictor,pass,pass,pass,eager_fail_to_run,pass doctr_reco_predictor,pass,pass,pass,pass,pass @@ -61,7 +61,7 @@ mnasnet1_0,pass,pass,pass,pass,pass mobilenet_v2,pass,pass,pass,pass,pass mobilenet_v2_quantized_qat,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load mobilenet_v3_large,pass,pass,pass,pass,pass -moco,model_fail_to_load,model_fail_to_load,model_fail_to_load,eager_fail_to_run,model_fail_to_load +moco,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load moondream,pass,pass,pass,pass,pass nanogpt,pass,pass,pass,pass,pass nvidia_deeprecommender,pass,pass,pass,pass,pass @@ -71,7 +71,7 @@ phlippe_resnet,pass,pass,pass,pass,pass pyhpc_equation_of_state,pass,pass,fail_accuracy,pass,pass pyhpc_isoneutral_mixing,pass,pass,pass,pass,pass pyhpc_turbulent_kinetic_energy,pass,pass,pass,pass,pass -pytorch_CycleGAN_and_pix2pix,pass,pass,eager_fail_to_run,pass,eager_fail_to_run +pytorch_CycleGAN_and_pix2pix,pass,pass,pass,pass,pass pytorch_stargan,pass,pass,pass,pass,pass pytorch_unet,pass,pass,pass,pass,pass resnet152,pass,pass,pass,pass,pass @@ -79,7 +79,7 @@ resnet18,pass,pass,pass,pass,pass resnet50,pass,pass,pass,pass,pass resnet50_quantized_qat,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load resnext50_32x4d,pass,pass,pass,pass,pass -sam,eager_two_runs_differ,pass,pass,pass,pass +sam,pass,pass,pass,pass,pass sam_fast,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run shufflenet_v2_x1_0,pass,pass,pass,pass,pass simple_gpt,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load @@ -89,7 +89,7 @@ speech_transformer,pass,pass,pass,pass,pass squeezenet1_1,pass,fail_accuracy,fail_accuracy,pass,pass stable_diffusion_text_encoder,pass,pass,pass,pass,pass stable_diffusion_unet,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip -tacotron2,pass,pass,pass,model_fail_to_load,fail_to_run +tacotron2,pass,pass,pass,fail_to_run,fail_to_run timm_efficientdet,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load timm_efficientnet,pass,pass,pass,pass,pass timm_nfnet,pass,pass,pass,pass,pass @@ -100,6 +100,7 @@ timm_vision_transformer_large,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip timm_vovnet,pass,pass,pass,pass,pass torch_multimodal_clip,pass,pass,pass,eager_fail_to_run,eager_fail_to_run tts_angular,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run -vgg16,eager_two_runs_differ,pass,pass,pass,pass +vgg16,pass,pass,pass,pass,pass vision_maskrcnn,pass,pass,pass,eager_fail_to_run,eager_fail_to_run yolov3,pass,pass,pass,pass,pass +hf_Roberta_base,pass,pass,pass,pass,pass diff --git a/.github/ci_expected_accuracy/inductor_torchbench_training.csv b/.github/ci_expected_accuracy/inductor_torchbench_training.csv index 94868d276..2984ecbc9 100644 --- a/.github/ci_expected_accuracy/inductor_torchbench_training.csv +++ b/.github/ci_expected_accuracy/inductor_torchbench_training.csv @@ -1,11 +1,11 @@ name,float32,bfloat16,float16,amp_bf16,amp_fp16 -torchrec_dlrm,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,fail_to_run +torchrec_dlrm,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run BERT_pytorch,pass,pass,pass,pass,pass -Background_Matting,pass_due_to_skip,pass_due_to_skip,eager_fail_to_run,pass_due_to_skip,eager_fail_to_run +Background_Matting,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip DALLE2_pytorch,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run LearningToPaint,pass,pass,pass,pass,pass -Super_SloMo,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ -alexnet,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ +Super_SloMo,eager_two_runs_differ,pass,pass,eager_two_runs_differ,pass +alexnet,eager_two_runs_differ,pass,pass,pass,pass basic_gnn_edgecnn,pass,pass,pass,pass,pass basic_gnn_gcn,pass,pass,pass,pass,pass basic_gnn_gin,pass,pass,pass,pass,pass @@ -30,7 +30,7 @@ dlrm,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eag doctr_det_predictor,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run doctr_reco_predictor,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run drq,pass,pass,pass,pass,pass -fastNLP_Bert,fail_accuracy,pass,pass,fail_accuracy,fail_accuracy +fastNLP_Bert,pass,pass,pass,pass,pass functorch_dp_cifar10,fail_accuracy,fail_accuracy,fail_accuracy,pass,pass functorch_maml_omniglot,pass,pass,pass,fail_accuracy,pass hf_Albert,pass,pass,pass,pass,pass @@ -61,7 +61,7 @@ mnasnet1_0,pass,pass,pass,pass,pass mobilenet_v2,pass,pass,pass,pass,pass mobilenet_v2_quantized_qat,fail_accuracy,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run mobilenet_v3_large,pass,pass,pass,pass,pass -moco,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,eager_fail_to_run +moco,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load moondream,pass,pass,pass,pass,pass nanogpt,pass,pass,pass,pass,pass nvidia_deeprecommender,pass,pass,pass,pass,pass @@ -71,7 +71,7 @@ phlippe_resnet,pass,fail_accuracy,pass,fail_accuracy,pass pyhpc_equation_of_state,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run pyhpc_isoneutral_mixing,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run pyhpc_turbulent_kinetic_energy,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load -pytorch_CycleGAN_and_pix2pix,eager_two_runs_differ,eager_two_runs_differ,eager_fail_to_run,eager_two_runs_differ,eager_fail_to_run +pytorch_CycleGAN_and_pix2pix,eager_two_runs_differ,eager_two_runs_differ,pass,eager_two_runs_differ,pass pytorch_stargan,pass,pass,pass,pass,pass pytorch_unet,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip resnet152,pass,pass,pass,pass,pass @@ -100,6 +100,7 @@ timm_vision_transformer_large,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip timm_vovnet,pass,pass,pass,pass,pass torch_multimodal_clip,pass,pass,pass,eager_fail_to_run,eager_fail_to_run tts_angular,pass,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run,eager_fail_to_run -vgg16,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ,eager_two_runs_differ +vgg16,pass,pass,pass,pass,pass vision_maskrcnn,pass,pass,pass,eager_fail_to_run,eager_fail_to_run -yolov3,pass,pass,fail_accuracy,pass,pass +yolov3,pass,pass,pass,pass,pass +hf_Roberta_base,pass,pass,pass,pass,pass diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 0440d202b..63dd2248e 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -40,6 +40,11 @@ jobs: - name: Prepare Stock Pytorch run: | pwd + which conda && conda clean -ay + conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \ + rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK} + conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y + source activate xpu_op_${ZE_AFFINITY_MASK} cd ../ && rm -rf pytorch git clone https://github.com/pytorch/pytorch pytorch cd pytorch && git checkout ${{ inputs.pytorch }} @@ -57,10 +62,6 @@ jobs: fi - name: Build Pytorch XPU run: | - which conda && conda clean -ay - conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \ - rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK} - conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y source activate xpu_op_${ZE_AFFINITY_MASK} conda install -c intel mkl-static mkl-include -y cd ../pytorch diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index e2f0c2c95..1a663661e 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -97,6 +97,7 @@ jobs: TRANSFORMERS_VERSION: ${{ steps.pinned.outputs.TRANSFORMERS_VERSION }} TIMM_COMMIT_ID: ${{ steps.pinned.outputs.TIMM_COMMIT_ID }} TRITON_COMMIT_ID: ${{ steps.pinned.outputs.TRITON_COMMIT_ID }} + TIMEOUT_MODELS: ${{ steps.summary.outputs.TIMEOUT_MODELS }} steps: - name: Checkout torch-xpu-ops uses: actions/checkout@v4 @@ -214,13 +215,25 @@ jobs: scenario: ${{ inputs.scenario }} hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - name: Summarize archieve files + id: summary if: always() run: | rm -rf ${{ github.workspace }}/upload_files cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files - failed_case=$(grep "Real failed: models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) - if [ ${failed_case} -ne 0 ];then - grep -E "Real failed: models: [1-9]|Summary for" ${{ github.workspace }}/summary_accuracy.log + mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/ + find . -type f -name "*.tgz" -mtime +3 -delete # delete files older than 3 days + tar zcf xpu-inductor-${GITHUB_RUN_ID}.tgz -C ${{ github.workspace }}/upload_files/ . # backup logs + failed_models=$(grep "Real failed models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) + timeout_models=$(grep "timeout models: *[1-9]" ${{ github.workspace }}/upload_files/summary_accuracy.log |wc -l || true) + if [ ${timeout_models} -ne 0 ];then + TIMEOUT_MODELS="$( + grep -E "timeout models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "timeout" -B 1 + )" + echo "${TIMEOUT_MODELS}" |sed 's/Summary/\\nSummary/g;s/Timeout/\\nTimeout/g' |tee -a "${GITHUB_OUTPUT}" + grep -E "timeout models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "timeout" -B 1 + fi + if [ ${failed_models} -ne 0 ];then + grep -E "Real failed models: [1-9]|Summary for" ${{ github.workspace }}/upload_files/summary_accuracy.log |grep "failed" -B 1 exit 1 fi - name: Upload Inductor XPU E2E Data @@ -259,6 +272,7 @@ jobs: TRANSFORMERS_VERSION="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TRANSFORMERS_VERSION }}" TIMM_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TIMM_COMMIT_ID }}" TRITON_COMMIT_ID="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TRITON_COMMIT_ID }}" + TIMEOUT_MODELS="${{ needs.Linux-Nightly-Ondemand-E2E-Tests.outputs.TIMEOUT_MODELS }}" # Test status if [ "${{ needs.Linux-Nightly-Ondemand-E2E-Tests.result }}" == "success" ];then test_status=Success @@ -301,6 +315,7 @@ jobs: fi echo -e "Inputs | $test_scope\n--- | --- \n" >> ${{ github.workspace }}/report.txt fi + echo "$TIMEOUT_MODELS" >> ${{ github.workspace }}/report.txt echo "$cc_comment" >> ${{ github.workspace }}/report.txt # Report report_txt=$(cat ${{ github.workspace }}/report.txt)