Skip to content

Commit

Permalink
ci: add cuda status for the known Transformers test failures (#1283)
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Rogozhkin <[email protected]>
  • Loading branch information
dvrogozh authored Jan 14, 2025
1 parent 299831d commit cf82b8e
Showing 1 changed file with 103 additions and 61 deletions.
164 changes: 103 additions & 61 deletions .github/scripts/check-transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,125 +7,147 @@
parser.add_argument('junitxml', nargs='+')
args = parser.parse_args()

benchmark_failures = {
'link': 'https://github.com/huggingface/transformers/pull/35620',
'cuda': 'passed',
}

layernorm_accuracy_failures = {
'link': 'https://github.com/pytorch/pytorch/issues/141642',
'cuda': 'passed',
}

# That's a list of known test failures. Each listed test can have
# associated metadata in the following format:
# failing_cases = {
# 'test_class': {
# 'test_name': {
# 'flaky': True,
# 'cuda': "passed", # or failed, or skipped
# 'link': https://github.com/org/project/issues/xxxx
# }
# }
# }
# Use None if no metadata is needed.
failing_cases = {
'tests.benchmark.test_benchmark.BenchmarkTest': {
'test_inference_encoder_decoder_with_configs': None,
'test_inference_fp16': None,
'test_inference_no_configs': None,
'test_inference_no_configs_only_pretrain': None,
'test_inference_no_model_no_architectures': None,
'test_inference_torchscript': None,
'test_inference_with_configs': None,
'test_save_csv_files': None,
'test_trace_memory': None,
'test_train_encoder_decoder_with_configs': None,
'test_train_no_configs': None,
'test_train_no_configs_fp16': None,
'test_train_with_configs': None,
'test_inference_encoder_decoder_with_configs': benchmark_failures,
'test_inference_fp16': benchmark_failures,
'test_inference_no_configs': benchmark_failures,
'test_inference_no_configs_only_pretrain': benchmark_failures,
'test_inference_no_model_no_architectures': benchmark_failures,
'test_inference_torchscript': benchmark_failures,
'test_inference_with_configs': benchmark_failures,
'test_save_csv_files': benchmark_failures,
'test_trace_memory': benchmark_failures,
'test_train_encoder_decoder_with_configs': benchmark_failures,
'test_train_no_configs': benchmark_failures,
'test_train_no_configs_fp16': benchmark_failures,
'test_train_with_configs': benchmark_failures,
},
'tests.generation.test_logits_process.LogitsProcessorTest': {
'test_watermarking_processor': None,
'test_watermarking_processor': { 'cuda': 'passed', },
},
'tests.generation.test_utils.GenerationIntegrationTests': {
'test_assisted_decoding_encoder_decoder_shared_encoder': None,
'test_assisted_decoding_num_assistant_tokens_heuristic_schedule': None,
'test_assisted_generation_early_exit': None,
'test_custom_logits_processor': None,
'test_default_max_length_warning': None,
'test_eos_token_id_int_and_list_beam_search': None,
'test_eos_token_id_int_and_list_top_k_top_sampling': None,
'test_generate_compile_fullgraph_tiny': None,
'test_generated_length_assisted_generation': None,
'test_max_new_tokens_encoder_decoder': None,
'test_min_length_if_input_embeds': None,
'test_model_kwarg_assisted_decoding_decoder_only': None,
'test_model_kwarg_assisted_decoding_encoder_decoder': None,
'test_model_kwarg_encoder_signature_filtering': None,
'test_prepare_inputs_for_generation_decoder_llm': None,
'test_stop_sequence_stopping_criteria': None,
'test_assisted_decoding_encoder_decoder_shared_encoder': { 'cuda': 'failed', },
'test_assisted_decoding_num_assistant_tokens_heuristic_schedule': { 'cuda': 'failed', },
'test_assisted_generation_early_exit': { 'cuda': 'failed', },
'test_custom_logits_processor': { 'cuda': 'failed', },
'test_default_max_length_warning': { 'cuda': 'failed', },
'test_eos_token_id_int_and_list_beam_search': { 'cuda': 'failed', },
'test_eos_token_id_int_and_list_top_k_top_sampling': { 'cuda': 'failed', },
'test_generate_compile_fullgraph_tiny': { 'cuda': 'failed', },
'test_generated_length_assisted_generation': { 'cuda': 'failed', },
'test_max_new_tokens_encoder_decoder': { 'cuda': 'failed', },
'test_min_length_if_input_embeds': { 'cuda': 'passed' },
'test_model_kwarg_assisted_decoding_decoder_only': { 'cuda': 'failed' },
'test_model_kwarg_assisted_decoding_encoder_decoder': { 'cuda': 'failed' },
'test_model_kwarg_encoder_signature_filtering': { 'cuda': 'failed' },
'test_prepare_inputs_for_generation_decoder_llm': { 'cuda': 'failed' },
'test_stop_sequence_stopping_criteria': { 'cuda': 'failed' },
},
'tests.models.detr.test_image_processing_detr.DetrImageProcessingTest': {
'test_fast_is_faster_than_slow': { 'flaky': True },
},
'tests.models.dpt.test_modeling_dpt_auto_backbone.DPTModelTest': {
'test_batching_equivalence': { 'flaky': True },
'test_batching_equivalence': { 'flaky': True, 'cuda': 'passed' },
},
'tests.models.fuyu.test_modeling_fuyu.FuyuModelTest': {
'test_prompt_lookup_decoding_matches_greedy_search': { 'flaky': True },
},
'tests.models.git.test_modeling_git.GitModelTest': {
'test_generate_continue_from_past_key_values': { 'flaky': True },
'test_inputs_embeds_matches_input_ids': None,
'test_generate_continue_from_past_key_values': { 'flaky': True, 'cuda': 'passed' },
'test_inputs_embeds_matches_input_ids': { 'cuda': 'passed' },
},
'tests.models.hiera.test_modeling_hiera.HieraModelTest': {
'test_torch_fx': None,
'test_torch_fx_output_loss': None,
'test_torch_fx': layernorm_accuracy_failures,
'test_torch_fx_output_loss': layernorm_accuracy_failures,
},
'tests.models.mamba.test_modeling_mamba.MambaIntegrationTests': {
'test_simple_generate_1_cpu': None,
'test_simple_generate_1_cpu': { 'cuda': 'passed' },
},
'tests.models.pix2struct.test_modeling_pix2struct.Pix2StructModelTest': {
'test_new_cache_format_0': None,
'test_new_cache_format_1': None,
'test_new_cache_format_2': None,
'test_new_cache_format_0': { 'cuda': 'passed' },
'test_new_cache_format_1': { 'cuda': 'passed' },
'test_new_cache_format_2': { 'cuda': 'passed' },
},
'tests.models.speecht5.test_modeling_speecht5.SpeechT5ForTextToSpeechIntegrationTests': {
'test_batch_generation': None,
'test_batch_generation': { 'cuda': 'passed' },
},
'tests.pipelines.test_pipelines_automatic_speech_recognition.AutomaticSpeechRecognitionPipelineTests': {
'test_small_model_pt_seq2seq': None,
'test_small_model_pt_seq2seq': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_common.CustomPipelineTest': {
'test_custom_code_with_string_tokenizer': None,
'test_custom_code_with_string_tokenizer': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_depth_estimation.DepthEstimationPipelineTests': {
'test_multiprocess': None,
'test_multiprocess': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_image_to_text.ImageToTextPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_summarization.SummarizationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_text_generation.TextGenerationPipelineTests': {
'test_small_model_pt': None,
'test_stop_sequence_stopping_criteria': None,
'test_small_model_pt': { 'cuda': "failed" },
'test_stop_sequence_stopping_criteria': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_video_classification.VideoClassificationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_visual_question_answering.VisualQuestionAnsweringPipelineTests': {
'test_small_model_pt_blip2': None,
'test_small_model_pt_blip2': { 'cuda': "failed" },
},
'tests.pipelines.test_pipelines_zero_shot_image_classification.ZeroShotImageClassificationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt_fp16': None,
'test_small_model_pt': { 'cuda': "failed" },
'test_small_model_pt_fp16': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.AutomaticSpeechRecognitionPipelineTests': {
'test_small_model_pt_seq2seq': None,
'test_small_model_pt_seq2seq': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.DepthEstimationPipelineTests': {
'test_multiprocess': None,
'test_multiprocess': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.ImageToTextPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.SummarizationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.TextGenerationPipelineTests': {
'test_small_model_pt': None,
'test_stop_sequence_stopping_criteria': None,
'test_small_model_pt': { 'cuda': "failed" },
'test_stop_sequence_stopping_criteria': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.VideoClassificationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.VisualQuestionAnsweringPipelineTests': {
'test_small_model_pt_blip2': None,
'test_small_model_pt_blip2': { 'cuda': "failed" },
},
'tests.test_pipeline_mixin.ZeroShotImageClassificationPipelineTests': {
'test_small_model_pt': None,
'test_small_model_pt_fp16': None,
'test_small_model_pt': { 'cuda': "failed" },
'test_small_model_pt_fp16': { 'cuda': "failed" },
},
}

Expand Down Expand Up @@ -170,6 +192,24 @@ def is_flaky(classname, name):
return True if 'flaky' in _case and _case['flaky'] else False
return False

def get_cuda_status(classname, name):
if classname in failing_cases and name in failing_cases[classname]:
_case = failing_cases[classname][name]
if _case is None or 'cuda' not in _case:
return ""
return _case['cuda']
return ""

def get_link(classname, name):
if classname in failing_cases and name in failing_cases[classname]:
_case = failing_cases[classname][name]
if _case is None or 'link' not in _case:
return ""
link = _case['link']
link = f"[link]({link})"
return link
return ""

xmls = [ JUnitXml.fromfile(f) for f in args.junitxml ]
for idx, xml in enumerate(xmls):
for suite in xml:
Expand Down Expand Up @@ -213,6 +253,8 @@ def print_cases(cases):
'Class name': classname,
'Test name': name,
'Status': result,
'CUDA Status': get_cuda_status(classname, name),
'Link': get_link(classname, name),
'Message': message,
}
print_md_row(row, print_header)
Expand Down

0 comments on commit cf82b8e

Please sign in to comment.