diff --git a/.github/scripts/check-transformers.py b/.github/scripts/check-transformers.py index b801a0518..abc14b051 100644 --- a/.github/scripts/check-transformers.py +++ b/.github/scripts/check-transformers.py @@ -7,125 +7,147 @@ parser.add_argument('junitxml', nargs='+') args = parser.parse_args() +benchmark_failures = { + 'link': 'https://github.com/huggingface/transformers/pull/35620', + 'cuda': 'passed', +} + +layernorm_accuracy_failures = { + 'link': 'https://github.com/pytorch/pytorch/issues/141642', + 'cuda': 'passed', +} + +# That's a list of known test failures. Each listed test can have +# associated metadata in the following format: +# failing_cases = { +# 'test_class': { +# 'test_name': { +# 'flaky': True, +# 'cuda': "passed", # or failed, or skipped +# 'link': https://github.com/org/project/issues/xxxx +# } +# } +# } +# Use None if no metadata is needed. failing_cases = { 'tests.benchmark.test_benchmark.BenchmarkTest': { - 'test_inference_encoder_decoder_with_configs': None, - 'test_inference_fp16': None, - 'test_inference_no_configs': None, - 'test_inference_no_configs_only_pretrain': None, - 'test_inference_no_model_no_architectures': None, - 'test_inference_torchscript': None, - 'test_inference_with_configs': None, - 'test_save_csv_files': None, - 'test_trace_memory': None, - 'test_train_encoder_decoder_with_configs': None, - 'test_train_no_configs': None, - 'test_train_no_configs_fp16': None, - 'test_train_with_configs': None, + 'test_inference_encoder_decoder_with_configs': benchmark_failures, + 'test_inference_fp16': benchmark_failures, + 'test_inference_no_configs': benchmark_failures, + 'test_inference_no_configs_only_pretrain': benchmark_failures, + 'test_inference_no_model_no_architectures': benchmark_failures, + 'test_inference_torchscript': benchmark_failures, + 'test_inference_with_configs': benchmark_failures, + 'test_save_csv_files': benchmark_failures, + 'test_trace_memory': benchmark_failures, + 'test_train_encoder_decoder_with_configs': benchmark_failures, + 'test_train_no_configs': benchmark_failures, + 'test_train_no_configs_fp16': benchmark_failures, + 'test_train_with_configs': benchmark_failures, }, 'tests.generation.test_logits_process.LogitsProcessorTest': { - 'test_watermarking_processor': None, + 'test_watermarking_processor': { 'cuda': 'passed', }, }, 'tests.generation.test_utils.GenerationIntegrationTests': { - 'test_assisted_decoding_encoder_decoder_shared_encoder': None, - 'test_assisted_decoding_num_assistant_tokens_heuristic_schedule': None, - 'test_assisted_generation_early_exit': None, - 'test_custom_logits_processor': None, - 'test_default_max_length_warning': None, - 'test_eos_token_id_int_and_list_beam_search': None, - 'test_eos_token_id_int_and_list_top_k_top_sampling': None, - 'test_generate_compile_fullgraph_tiny': None, - 'test_generated_length_assisted_generation': None, - 'test_max_new_tokens_encoder_decoder': None, - 'test_min_length_if_input_embeds': None, - 'test_model_kwarg_assisted_decoding_decoder_only': None, - 'test_model_kwarg_assisted_decoding_encoder_decoder': None, - 'test_model_kwarg_encoder_signature_filtering': None, - 'test_prepare_inputs_for_generation_decoder_llm': None, - 'test_stop_sequence_stopping_criteria': None, + 'test_assisted_decoding_encoder_decoder_shared_encoder': { 'cuda': 'failed', }, + 'test_assisted_decoding_num_assistant_tokens_heuristic_schedule': { 'cuda': 'failed', }, + 'test_assisted_generation_early_exit': { 'cuda': 'failed', }, + 'test_custom_logits_processor': { 'cuda': 'failed', }, + 'test_default_max_length_warning': { 'cuda': 'failed', }, + 'test_eos_token_id_int_and_list_beam_search': { 'cuda': 'failed', }, + 'test_eos_token_id_int_and_list_top_k_top_sampling': { 'cuda': 'failed', }, + 'test_generate_compile_fullgraph_tiny': { 'cuda': 'failed', }, + 'test_generated_length_assisted_generation': { 'cuda': 'failed', }, + 'test_max_new_tokens_encoder_decoder': { 'cuda': 'failed', }, + 'test_min_length_if_input_embeds': { 'cuda': 'passed' }, + 'test_model_kwarg_assisted_decoding_decoder_only': { 'cuda': 'failed' }, + 'test_model_kwarg_assisted_decoding_encoder_decoder': { 'cuda': 'failed' }, + 'test_model_kwarg_encoder_signature_filtering': { 'cuda': 'failed' }, + 'test_prepare_inputs_for_generation_decoder_llm': { 'cuda': 'failed' }, + 'test_stop_sequence_stopping_criteria': { 'cuda': 'failed' }, }, 'tests.models.detr.test_image_processing_detr.DetrImageProcessingTest': { 'test_fast_is_faster_than_slow': { 'flaky': True }, }, 'tests.models.dpt.test_modeling_dpt_auto_backbone.DPTModelTest': { - 'test_batching_equivalence': { 'flaky': True }, + 'test_batching_equivalence': { 'flaky': True, 'cuda': 'passed' }, }, 'tests.models.fuyu.test_modeling_fuyu.FuyuModelTest': { 'test_prompt_lookup_decoding_matches_greedy_search': { 'flaky': True }, }, 'tests.models.git.test_modeling_git.GitModelTest': { - 'test_generate_continue_from_past_key_values': { 'flaky': True }, - 'test_inputs_embeds_matches_input_ids': None, + 'test_generate_continue_from_past_key_values': { 'flaky': True, 'cuda': 'passed' }, + 'test_inputs_embeds_matches_input_ids': { 'cuda': 'passed' }, }, 'tests.models.hiera.test_modeling_hiera.HieraModelTest': { - 'test_torch_fx': None, - 'test_torch_fx_output_loss': None, + 'test_torch_fx': layernorm_accuracy_failures, + 'test_torch_fx_output_loss': layernorm_accuracy_failures, }, 'tests.models.mamba.test_modeling_mamba.MambaIntegrationTests': { - 'test_simple_generate_1_cpu': None, + 'test_simple_generate_1_cpu': { 'cuda': 'passed' }, }, 'tests.models.pix2struct.test_modeling_pix2struct.Pix2StructModelTest': { - 'test_new_cache_format_0': None, - 'test_new_cache_format_1': None, - 'test_new_cache_format_2': None, + 'test_new_cache_format_0': { 'cuda': 'passed' }, + 'test_new_cache_format_1': { 'cuda': 'passed' }, + 'test_new_cache_format_2': { 'cuda': 'passed' }, }, 'tests.models.speecht5.test_modeling_speecht5.SpeechT5ForTextToSpeechIntegrationTests': { - 'test_batch_generation': None, + 'test_batch_generation': { 'cuda': 'passed' }, }, 'tests.pipelines.test_pipelines_automatic_speech_recognition.AutomaticSpeechRecognitionPipelineTests': { - 'test_small_model_pt_seq2seq': None, + 'test_small_model_pt_seq2seq': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_common.CustomPipelineTest': { - 'test_custom_code_with_string_tokenizer': None, + 'test_custom_code_with_string_tokenizer': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_depth_estimation.DepthEstimationPipelineTests': { - 'test_multiprocess': None, + 'test_multiprocess': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_image_to_text.ImageToTextPipelineTests': { - 'test_small_model_pt': None, + 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_summarization.SummarizationPipelineTests': { - 'test_small_model_pt': None, + 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_text_generation.TextGenerationPipelineTests': { - 'test_small_model_pt': None, - 'test_stop_sequence_stopping_criteria': None, + 'test_small_model_pt': { 'cuda': "failed" }, + 'test_stop_sequence_stopping_criteria': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_video_classification.VideoClassificationPipelineTests': { - 'test_small_model_pt': None, + 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_visual_question_answering.VisualQuestionAnsweringPipelineTests': { - 'test_small_model_pt_blip2': None, + 'test_small_model_pt_blip2': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_zero_shot_image_classification.ZeroShotImageClassificationPipelineTests': { - 'test_small_model_pt': None, - 'test_small_model_pt_fp16': None, + 'test_small_model_pt': { 'cuda': "failed" }, + 'test_small_model_pt_fp16': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.AutomaticSpeechRecognitionPipelineTests': { - 'test_small_model_pt_seq2seq': None, + 'test_small_model_pt_seq2seq': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.DepthEstimationPipelineTests': { - 'test_multiprocess': None, + 'test_multiprocess': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.ImageToTextPipelineTests': { - 'test_small_model_pt': None, + 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.SummarizationPipelineTests': { - 'test_small_model_pt': None, + 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.TextGenerationPipelineTests': { - 'test_small_model_pt': None, - 'test_stop_sequence_stopping_criteria': None, + 'test_small_model_pt': { 'cuda': "failed" }, + 'test_stop_sequence_stopping_criteria': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.VideoClassificationPipelineTests': { - 'test_small_model_pt': None, + 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.VisualQuestionAnsweringPipelineTests': { - 'test_small_model_pt_blip2': None, + 'test_small_model_pt_blip2': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.ZeroShotImageClassificationPipelineTests': { - 'test_small_model_pt': None, - 'test_small_model_pt_fp16': None, + 'test_small_model_pt': { 'cuda': "failed" }, + 'test_small_model_pt_fp16': { 'cuda': "failed" }, }, } @@ -170,6 +192,24 @@ def is_flaky(classname, name): return True if 'flaky' in _case and _case['flaky'] else False return False +def get_cuda_status(classname, name): + if classname in failing_cases and name in failing_cases[classname]: + _case = failing_cases[classname][name] + if _case is None or 'cuda' not in _case: + return "" + return _case['cuda'] + return "" + +def get_link(classname, name): + if classname in failing_cases and name in failing_cases[classname]: + _case = failing_cases[classname][name] + if _case is None or 'link' not in _case: + return "" + link = _case['link'] + link = f"[link]({link})" + return link + return "" + xmls = [ JUnitXml.fromfile(f) for f in args.junitxml ] for idx, xml in enumerate(xmls): for suite in xml: @@ -213,6 +253,8 @@ def print_cases(cases): 'Class name': classname, 'Test name': name, 'Status': result, + 'CUDA Status': get_cuda_status(classname, name), + 'Link': get_link(classname, name), 'Message': message, } print_md_row(row, print_header)