From 14d8e78992a0a7f7ace2c94ff6dc607b41fc3a81 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 13 May 2024 09:36:22 +0200 Subject: [PATCH 01/10] Tests with model lists added. --- .../python/tests/models/nightly | 145 ++++++++++++++++++ .../python/tests/models/precommit | 6 + .../python/tests/test_sampling.py | 80 +++++++--- 3 files changed, 207 insertions(+), 24 deletions(-) create mode 100644 text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly create mode 100644 text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly new file mode 100644 index 0000000000..4bf044c112 --- /dev/null +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly @@ -0,0 +1,145 @@ +facebook/opt-125m +bigcode/starcoder2-7b +mosaicml/mpt-7b-chat +# meta-llama/Llama-2-7b-chat-hf requires authorization +# mistralai/Mistral-7B-Instruct-v0.2 requires authorization +hf-internal-testing/tiny-random-BloomForCausalLM +hf-internal-testing/tiny-random-FalconForCausalLM +hf-internal-testing/tiny-random-Starcoder2ForCausalLM +hf-internal-testing/tiny-random-GPTJForCausalLM +hf-internal-testing/tiny-random-StableLmForCausalLM +hf-internal-testing/tiny-random-LlamaForCausalLM +hf-internal-testing/tiny-random-MistralForCausalLM +hf-internal-testing/tiny-random-OPTForCausalLM +hf-internal-testing/tiny-random-PhiForCausalLM +hf-internal-testing/tiny-random-gpt2 +hf-internal-testing/tiny-random-BertForQuestionAnswering +hf-internal-testing/tiny-random-T5ForConditionalGeneration +hf-internal-testing/tiny-random-BertForSequenceClassification +hf-internal-testing/tiny-random-GPTNeoXForCausalLM +hf-internal-testing/tiny-random-bert +hf-internal-testing/Mixtral-tiny +hf-internal-testing/tiny-random-PersimmonForCausalLM +hf-internal-testing/tiny-random-T5ForQuestionAnswering +hf-internal-testing/tiny-random-SwiftFormerModel +hf-internal-testing/tiny-stable-diffusion-pipe +hf-internal-testing/tiny-stable-diffusion-torch +hf-internal-testing/tiny-random-BartForCausalLM +hf-internal-testing/tiny-random-MegaModel +hf-internal-testing/tiny-clap-htsat-unfused +hf-internal-testing/diffusers-stable-diffusion-tiny-all +hf-internal-testing/tiny-random-VideoMAEForVideoClassification +hf-internal-testing/tiny-random-OPTModel +hf-internal-testing/tiny-random-OPTForQuestionAnswering +hf-internal-testing/tiny-random-GPTNeoModel +hf-internal-testing/tiny-random-wav2vec2 +hf-internal-testing/tiny-random-owlvit-object-detection +hf-internal-testing/unet-pipeline-dummy +hf-internal-testing/tiny-random-layoutlmv3 +hf-internal-testing/tiny-random-t5 +hf-internal-testing/tiny-random-vit +hf-internal-testing/tiny-random-speech-encoder-decoder +hf-internal-testing/tiny-bert-for-token-classification +hf-internal-testing/tiny-random-distilbert +hf-internal-testing/tiny-detr-mobilenetsv3-panoptic +hf-internal-testing/tiny-random-longformer +hf-internal-testing/example-documents +hf-internal-testing/tiny-random-CLIPSegModel +hf-internal-testing/tiny-random-NystromformerForMultipleChoice +hf-internal-testing/tiny-random-Wav2Vec2ForXVector +hf-internal-testing/tiny-random-NystromformerForTokenClassification +hf-internal-testing/tiny-random-unispeech +hf-internal-testing/tiny-random-flaubert +hf-internal-testing/tiny-random-ProphetNetForConditionalGeneration +hf-internal-testing/tiny-random-LevitForImageClassification +hf-internal-testing/tiny-adapter +hf-internal-testing/tiny-random-DonutSwinModel +hf-internal-testing/tiny-random-IdeficsForVisionText2Text +hf-internal-testing/tiny-random-LEDModel +hf-internal-testing/tiny-random-NezhaForTokenClassification +hf-internal-testing/tiny-random-GPTBigCodeModel +hf-internal-testing/tiny-random-bert-subfolder +hf-internal-testing/test_dynamic_image_processor +hf-internal-testing/tiny-random-EfficientFormerForImageClassificationWithTeacher +hf-internal-testing/tiny-random-ImageGPTModel +hf-internal-testing/tiny-random-Swinv2ForImageClassification +hf-internal-testing/tiny-random-GPTJModel +hf-internal-testing/tiny-random-BloomForQuestionAnswering +hf-internal-testing/tiny-random-Data2VecAudioForXVector +hf-internal-testing/tiny-random-NatModel +hf-internal-testing/tiny-random-DPTForDepthEstimation +hf-internal-testing/tiny-random-MraForQuestionAnswering +hf-internal-testing/tiny-random-DPTForSemanticSegmentation +hf-internal-testing/tiny-random-SEWDForCTC +hf-internal-testing/tiny-random-RoCBertForCausalLM +hf-internal-testing/tiny-random-vision_perceiver_conv +hf-internal-testing/tiny-random-SEWDModel +hf-internal-testing/tiny-random-ViTMAEForPreTraining +hf-internal-testing/tiny-random-MegatronBertForMultipleChoice +hf-internal-testing/tiny-random-GitModel +hf-internal-testing/tiny-random-BeitForSemanticSegmentation +hf-internal-testing/tiny-random-SamModel +hf-internal-testing/tiny-random-LevitForImageClassificationWithTeacher +hf-internal-testing/tiny-random-PegasusXModel +hf-internal-testing/tiny-random-Starcoder2Model +hf-internal-testing/tiny-random-GPTJForQuestionAnswering +hf-internal-testing/tiny-random-ResNetForImageClassification +hf-internal-testing/tiny-random-ErnieForMaskedLM +hf-internal-testing/tiny-random-ConvBertForMaskedLM +hf-internal-testing/edgLycorisMugler-light +hf-internal-testing/tiny-random-BigBirdPegasusForCausalLM +hf-internal-testing/tiny-random-BigBirdForPreTraining +hf-internal-testing/tiny-random-transfo-xl +hf-internal-testing/tiny-random-IBertForQuestionAnswering +hf-internal-testing/tiny-random-CanineForTokenClassification +hf-internal-testing/tiny-random-SplinterForQuestionAnswering +hf-internal-testing/tiny-random-PegasusXForConditionalGeneration +hf-internal-testing/tiny-random-XmodForCausalLM +hf-internal-testing/tiny-random-BigBirdForQuestionAnswering +hf-internal-testing/tiny-random-MT5ForSequenceClassification +hf-internal-testing/tiny-random-MarianForCausalLM +hf-internal-testing/tiny-random-Data2VecTextForQuestionAnswering +hf-internal-testing/tiny-random-SpeechT5Model +hf-internal-testing/tiny-random-Data2VecAudioForSequenceClassification +hf-internal-testing/tiny-random-MaskFormerModel +hf-internal-testing/tiny-random-M2M100ForConditionalGeneration +hf-internal-testing/diffusers-images +hf-internal-testing/tiny-random-ViTMSNModel +hf-internal-testing/tiny-random-MegatronBertForSequenceClassification +hf-internal-testing/tiny-random-MobileNetV2Model +hf-internal-testing/tiny-random-DistilBertForQuestionAnswering +hf-internal-testing/tiny-random-PerceiverModel +hf-internal-testing/tiny-random-M2M100Model +hf-internal-testing/tiny-random-vision-encoder-decoder +hf-internal-testing/tiny-random-XLNetForTokenClassification +hf-internal-testing/tiny-random-SwitchTransformersForConditionalGeneration +hf-internal-testing/tiny-electra +hf-internal-testing/tiny-random-GLPNModel +hf-internal-testing/tiny-random-NezhaForPreTraining +hf-internal-testing/test-two-configs +hf-internal-testing/tiny-random-Dinov2ForImageClassification +hf-internal-testing/tiny-bert-pt-safetensors-msgpack +hf-internal-testing/tiny-random-owlvit +hf-internal-testing/tiny-random-VitsModel +hf-internal-testing/tiny-random-MT5ForTokenClassification +hf-internal-testing/tiny-random-xlnet +hf-internal-testing/tiny-random-MusicgenMelodyForCausalLM +hf-internal-testing/tiny-random-RegNetModel +hf-internal-testing/tiny-random-MarkupLMForTokenClassification +hf-internal-testing/tiny-random-Speech2TextModel +hf-internal-testing/llama-code-tokenizer +hf-internal-testing/tiny-random-T5ForSequenceClassification +hf-internal-testing/tiny-random-BartForConditionalGeneration +hf-internal-testing/tiny-random-PegasusModel +hf-internal-testing/tiny-random-TransfoXLModel +hf-internal-testing/tiny-random-BigBirdForMultipleChoice +hf-internal-testing/tiny-random-PLBartForCausalLM +hf-internal-testing/tiny-random-SwinBackbone +hf-internal-testing/tiny-random-electra +hf-internal-testing/tiny-random-FNetForMaskedLM +hf-internal-testing/tiny-random-PersimmonForSequenceClassification +hf-internal-testing/tiny-random-MobileViTV2ForImageClassification +hf-internal-testing/tiny-random-XGLMForCausalLM +hf-internal-testing/tiny-random-SEWForCTC +hf-internal-testing/tiny-random-UMT5ForSequenceClassification +hf-internal-testing/tiny-random-PersimmonModel diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit new file mode 100644 index 0000000000..7b0e0d02b6 --- /dev/null +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit @@ -0,0 +1,6 @@ +facebook/opt-125m +bigcode/starcoder2-7b +hf-internal-testing/tiny-random-LlamaForCausalLM +hf-internal-testing/tiny-random-MistralForCausalLM +hf-internal-testing/tiny-random-OPTModel +hf-internal-testing/tiny-random-OPTForQuestionAnswering \ No newline at end of file diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index 1d449f57f5..74af0d8d97 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -1,12 +1,13 @@ +import os import pytest -from pathlib import Path -from typing import List, Tuple +from optimum.intel import OVModelForCausalLM +from pathlib import Path +from py_continuous_batching import ContinuousBatchingPipeline, GenerationConfig, SchedulerConfig, GenerationResult from transformers import AutoTokenizer, AutoModelForCausalLM from transformers import GenerationConfig as HFGenerationConfig -from optimum.intel import OVModelForCausalLM +from typing import List, Tuple -from py_continuous_batching import ContinuousBatchingPipeline, GenerationConfig, SchedulerConfig, GenerationResult def get_greedy() -> GenerationConfig: generation_config = GenerationConfig() @@ -94,7 +95,7 @@ def run_hugging_face( use_optimum: bool, tmp_path: Path ) -> Tuple[List[GenerationResult], str]: - hf_tokenizer = AutoTokenizer.from_pretrained(model_id) + hf_tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['MODEL_TOKEN']) model = OVModelForCausalLM.from_pretrained(model_id, export=True) if use_optimum else \ AutoModelForCausalLM.from_pretrained(model_id) generation_results: List[GenerationResult] = [] @@ -133,6 +134,46 @@ def run_continuous_batching( pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), scheduler_config) return pipe.generate(prompts, generation_configs) +def get_models_list(file_name: str): + models = [] + with open(file_name) as f: + for model_name in f: + model_name = model_name.strip() + # skip comment in model scope file + if model_name.startswith('#'): + continue + models.append(model_name) + return models + +def compare_results(hf_result, ov_result, generation_config): + if generation_config.is_beam_search: + assert len(hf_result.m_scores) == len(ov_result.m_scores) + for hf_score, ov_score in zip(hf_result.m_scores, ov_result.m_scores): + # Note, that for fp32 / fp16 models scores are different less than 0.001 + assert abs(hf_score - ov_score) < 0.02 + + assert len(hf_result.m_generation_ids) == len(ov_result.m_generation_ids) + for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids): + assert hf_text == ov_text + + +def run_test_pipeline(tmp_path: str, model_id: str, scheduler_params: dict = None): + prompts, generation_configs = get_test_dataset() + scheduler_config = get_scheduler_config(scheduler_params) + + (hf_results, model_path) = run_hugging_face(model_id=model_id, prompts=prompts, + generation_configs=generation_configs, tmp_path=tmp_path, + use_optimum=True) + my_results: List[GenerationResult] = run_continuous_batching(model_path, scheduler_config, prompts, + generation_configs) + + assert len(prompts) == len(hf_results) + assert len(prompts) == len(my_results) + + for prompt, hf_result, ov_result, generation_config in zip(prompts, hf_results, my_results, generation_configs): + print(f"Prompt = {prompt}\nHF result = {hf_result}\nmy result = {ov_result}") + compare_results(hf_result, ov_result, generation_config) + # tested models: # - facebook/opt-125m # - meta-llama/Llama-2-7b-chat-hf @@ -142,26 +183,17 @@ def run_continuous_batching( {"num_kv_blocks": 40, "block_size": 4, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, # test preemption for dynamic_split_fuse {"num_kv_blocks": 40, "block_size": 4, "dynamic_split_fuse": False, "max_num_batched_tokens": 256, "max_num_seqs": 256}] # test preemption for vllm @pytest.mark.parametrize("scheduler_params", scheduler_params_list) +@pytest.mark.precommit def test_preemption(tmp_path, scheduler_params): - prompts, generation_configs = get_test_dataset() - model_id : str = "facebook/opt-125m" - scheduler_config = get_scheduler_config(scheduler_params) + run_test_pipeline(tmp_path, "facebook/opt-125m", scheduler_params) - (hf_results, model_path) = run_hugging_face(model_id=model_id, prompts=prompts, generation_configs=generation_configs, tmp_path=tmp_path, use_optimum=True) - my_results : List[GenerationResult] = run_continuous_batching(model_path, scheduler_config, prompts, generation_configs) - assert len(prompts) == len(hf_results) - assert len(prompts) == len(my_results) +@pytest.mark.precommit +@pytest.mark.parametrize("model_id", get_models_list("models/precommit")) +def test_hf_models_precommit(tmp_path, model_id): + run_test_pipeline(tmp_path, model_id) - for prompt, hf_result, my_result, generation_config in zip(prompts, hf_results, my_results, generation_configs): - print(f"Prompt = {prompt}\nHF result = {hf_result}\nmy result = {my_result}") - - if generation_config.is_beam_search: - assert len(hf_result.m_scores) == len(my_result.m_scores) - for hf_score, my_score in zip(hf_result.m_scores, my_result.m_scores): - # Note, that for fp32 / fp16 models scores are different less than 0.001 - assert abs(hf_score - my_score) < 0.02 - - assert len(hf_result.m_generation_ids) == len(my_result.m_generation_ids) - for hf_text, my_text in zip(hf_result.m_generation_ids, my_result.m_generation_ids): - assert hf_text == my_text +@pytest.mark.nightly +@pytest.mark.parametrize("model_id", get_models_list("models/nightly")) +def test_hf_models_nightly(tmp_path, model_id): + run_test_pipeline(tmp_path, model_id) \ No newline at end of file From b1065424e0f6caca24ca0ee859cf6bb2fed211e6 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 13 May 2024 09:44:50 +0200 Subject: [PATCH 02/10] Minor correction. --- .../cpp/continuous_batching/python/tests/test_sampling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index 74af0d8d97..fce7342b10 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -95,7 +95,7 @@ def run_hugging_face( use_optimum: bool, tmp_path: Path ) -> Tuple[List[GenerationResult], str]: - hf_tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['MODEL_TOKEN']) + hf_tokenizer = AutoTokenizer.from_pretrained(model_id) model = OVModelForCausalLM.from_pretrained(model_id, export=True) if use_optimum else \ AutoModelForCausalLM.from_pretrained(model_id) generation_results: List[GenerationResult] = [] @@ -196,4 +196,4 @@ def test_hf_models_precommit(tmp_path, model_id): @pytest.mark.nightly @pytest.mark.parametrize("model_id", get_models_list("models/nightly")) def test_hf_models_nightly(tmp_path, model_id): - run_test_pipeline(tmp_path, model_id) \ No newline at end of file + run_test_pipeline(tmp_path, model_id) From 998d1e1e891f2cf1d44a34c3800718274c6ea7af Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 13 May 2024 09:46:29 +0200 Subject: [PATCH 03/10] Minor correction. --- .../cpp/continuous_batching/python/tests/test_sampling.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index fce7342b10..18368553ba 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -164,14 +164,14 @@ def run_test_pipeline(tmp_path: str, model_id: str, scheduler_params: dict = Non (hf_results, model_path) = run_hugging_face(model_id=model_id, prompts=prompts, generation_configs=generation_configs, tmp_path=tmp_path, use_optimum=True) - my_results: List[GenerationResult] = run_continuous_batching(model_path, scheduler_config, prompts, + ov_results: List[GenerationResult] = run_continuous_batching(model_path, scheduler_config, prompts, generation_configs) assert len(prompts) == len(hf_results) - assert len(prompts) == len(my_results) + assert len(prompts) == len(ov_results) - for prompt, hf_result, ov_result, generation_config in zip(prompts, hf_results, my_results, generation_configs): - print(f"Prompt = {prompt}\nHF result = {hf_result}\nmy result = {ov_result}") + for prompt, hf_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs): + print(f"Prompt = {prompt}\nHF result = {hf_result}\nOV result = {ov_result}") compare_results(hf_result, ov_result, generation_config) # tested models: From 988706527bdc68e5bc226169593ff25ac8fd999f Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 13 May 2024 13:30:05 +0200 Subject: [PATCH 04/10] Fixed model lists paths. --- .../cpp/continuous_batching/python/tests/test_sampling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index 18368553ba..f5bd50f7b4 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -189,11 +189,11 @@ def test_preemption(tmp_path, scheduler_params): @pytest.mark.precommit -@pytest.mark.parametrize("model_id", get_models_list("models/precommit")) +@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit"))) def test_hf_models_precommit(tmp_path, model_id): run_test_pipeline(tmp_path, model_id) @pytest.mark.nightly -@pytest.mark.parametrize("model_id", get_models_list("models/nightly")) +@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "nightly"))) def test_hf_models_nightly(tmp_path, model_id): run_test_pipeline(tmp_path, model_id) From 8870e0eb641160ba636e2f8690c93509fbb9208e Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 13 May 2024 18:50:42 +0200 Subject: [PATCH 05/10] Fix for non utf-8 text. --- .../cpp/continuous_batching/python/python.cpp | 12 ++++++++++++ .../python/tests/test_sampling.py | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp index 3200a9d053..2777cf8aa2 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp +++ b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp @@ -26,6 +26,18 @@ PYBIND11_MODULE(py_continuous_batching, m) { .def(py::init<>()) .def_readonly("m_request_id", &GenerationResult::m_request_id) .def_readwrite("m_generation_ids", &GenerationResult::m_generation_ids) + + .def("get_generation_ids", + [](GenerationResult &r) -> py::list { + py::list res; + for (auto s: r.m_generation_ids) { + + PyObject* py_s = PyUnicode_DecodeUTF8(s.data(), s.length(), "replace"); + res.append(py_s); + } + return res; + } + ) .def_readwrite("m_scores", &GenerationResult::m_scores) .def("__repr__", [](const GenerationResult &r) { diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index f5bd50f7b4..04909d52dc 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -152,8 +152,8 @@ def compare_results(hf_result, ov_result, generation_config): # Note, that for fp32 / fp16 models scores are different less than 0.001 assert abs(hf_score - ov_score) < 0.02 - assert len(hf_result.m_generation_ids) == len(ov_result.m_generation_ids) - for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids): + assert len(hf_result.m_generation_ids) == len(ov_result.get_generation_ids()) + for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.get_generation_ids()): assert hf_text == ov_text @@ -171,7 +171,7 @@ def run_test_pipeline(tmp_path: str, model_id: str, scheduler_params: dict = Non assert len(prompts) == len(ov_results) for prompt, hf_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs): - print(f"Prompt = {prompt}\nHF result = {hf_result}\nOV result = {ov_result}") + #print(f"Prompt = {prompt}\nHF result = {hf_result}\nOV result = {ov_result}") compare_results(hf_result, ov_result, generation_config) # tested models: From c5f74db056afa7b0519bb8feae39afe321df976f Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Tue, 14 May 2024 12:47:38 +0200 Subject: [PATCH 06/10] Fixed __repr__, fixed m_generation_ids property. --- .../cpp/continuous_batching/python/python.cpp | 19 +++++++++++-------- .../python/tests/test_sampling.py | 6 +++--- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp index 2777cf8aa2..41d55b9622 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp +++ b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp @@ -4,7 +4,8 @@ #include "pybind11/pybind11.h" #include - +#include +#include #include "continuous_batching_pipeline.hpp" namespace py = pybind11; @@ -25,9 +26,7 @@ PYBIND11_MODULE(py_continuous_batching, m) { py::class_(m, "GenerationResult") .def(py::init<>()) .def_readonly("m_request_id", &GenerationResult::m_request_id) - .def_readwrite("m_generation_ids", &GenerationResult::m_generation_ids) - - .def("get_generation_ids", + .def_property("m_generation_ids", [](GenerationResult &r) -> py::list { py::list res; for (auto s: r.m_generation_ids) { @@ -36,14 +35,18 @@ PYBIND11_MODULE(py_continuous_batching, m) { res.append(py_s); } return res; - } - ) + }, + [](GenerationResult &r, std::vector &generation_ids) { + r.m_generation_ids = generation_ids; + }) .def_readwrite("m_scores", &GenerationResult::m_scores) .def("__repr__", - [](const GenerationResult &r) { + [](const GenerationResult &r) -> py::str{ std::stringstream stream; stream << ""; - return stream.str(); + std::string str = stream.str(); + PyObject* py_s = PyUnicode_DecodeUTF8(str.data(), str.length(), "replace"); + return py::reinterpret_steal(py_s); } ); diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index 04909d52dc..f5bd50f7b4 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -152,8 +152,8 @@ def compare_results(hf_result, ov_result, generation_config): # Note, that for fp32 / fp16 models scores are different less than 0.001 assert abs(hf_score - ov_score) < 0.02 - assert len(hf_result.m_generation_ids) == len(ov_result.get_generation_ids()) - for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.get_generation_ids()): + assert len(hf_result.m_generation_ids) == len(ov_result.m_generation_ids) + for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids): assert hf_text == ov_text @@ -171,7 +171,7 @@ def run_test_pipeline(tmp_path: str, model_id: str, scheduler_params: dict = Non assert len(prompts) == len(ov_results) for prompt, hf_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs): - #print(f"Prompt = {prompt}\nHF result = {hf_result}\nOV result = {ov_result}") + print(f"Prompt = {prompt}\nHF result = {hf_result}\nOV result = {ov_result}") compare_results(hf_result, ov_result, generation_config) # tested models: From a163972372b90dc5e06f47df55dc05b330eefba1 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Tue, 14 May 2024 12:49:12 +0200 Subject: [PATCH 07/10] Removed not needed imports. --- .../causal_lm/cpp/continuous_batching/python/python.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp index 41d55b9622..9ceb9261f7 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp +++ b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp @@ -4,8 +4,6 @@ #include "pybind11/pybind11.h" #include -#include -#include #include "continuous_batching_pipeline.hpp" namespace py = pybind11; From 34308b01a0890156e7b44b3c5f19ed30a7f428de Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Tue, 14 May 2024 14:37:47 +0200 Subject: [PATCH 08/10] Changed model lists, separated test files. --- .../cpp/continuous_batching/python/python.cpp | 1 + .../tests/{test_sampling.py => common.py} | 29 +-- .../python/tests/models/nightly | 186 ++++-------------- .../python/tests/models/precommit | 9 +- .../python/tests/preemption.py | 12 ++ .../python/tests/sampling.py | 19 ++ 6 files changed, 80 insertions(+), 176 deletions(-) rename text_generation/causal_lm/cpp/continuous_batching/python/tests/{test_sampling.py => common.py} (84%) create mode 100644 text_generation/causal_lm/cpp/continuous_batching/python/tests/preemption.py create mode 100644 text_generation/causal_lm/cpp/continuous_batching/python/tests/sampling.py diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp index 9ceb9261f7..c209462b37 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp +++ b/text_generation/causal_lm/cpp/continuous_batching/python/python.cpp @@ -4,6 +4,7 @@ #include "pybind11/pybind11.h" #include + #include "continuous_batching_pipeline.hpp" namespace py = pybind11; diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/common.py similarity index 84% rename from text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py rename to text_generation/causal_lm/cpp/continuous_batching/python/tests/common.py index f5bd50f7b4..8b7bebd93d 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/common.py @@ -1,3 +1,6 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import os import pytest @@ -172,28 +175,4 @@ def run_test_pipeline(tmp_path: str, model_id: str, scheduler_params: dict = Non for prompt, hf_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs): print(f"Prompt = {prompt}\nHF result = {hf_result}\nOV result = {ov_result}") - compare_results(hf_result, ov_result, generation_config) - -# tested models: -# - facebook/opt-125m -# - meta-llama/Llama-2-7b-chat-hf -# - mistralai/Mistral-7B-Instruct-v0.2 - -scheduler_params_list = [{"num_kv_blocks": 300, "block_size": 16, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, - {"num_kv_blocks": 40, "block_size": 4, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, # test preemption for dynamic_split_fuse - {"num_kv_blocks": 40, "block_size": 4, "dynamic_split_fuse": False, "max_num_batched_tokens": 256, "max_num_seqs": 256}] # test preemption for vllm -@pytest.mark.parametrize("scheduler_params", scheduler_params_list) -@pytest.mark.precommit -def test_preemption(tmp_path, scheduler_params): - run_test_pipeline(tmp_path, "facebook/opt-125m", scheduler_params) - - -@pytest.mark.precommit -@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit"))) -def test_hf_models_precommit(tmp_path, model_id): - run_test_pipeline(tmp_path, model_id) - -@pytest.mark.nightly -@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "nightly"))) -def test_hf_models_nightly(tmp_path, model_id): - run_test_pipeline(tmp_path, model_id) + compare_results(hf_result, ov_result, generation_config) \ No newline at end of file diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly index 4bf044c112..3a241384d1 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly @@ -1,145 +1,41 @@ -facebook/opt-125m -bigcode/starcoder2-7b -mosaicml/mpt-7b-chat -# meta-llama/Llama-2-7b-chat-hf requires authorization -# mistralai/Mistral-7B-Instruct-v0.2 requires authorization -hf-internal-testing/tiny-random-BloomForCausalLM -hf-internal-testing/tiny-random-FalconForCausalLM -hf-internal-testing/tiny-random-Starcoder2ForCausalLM -hf-internal-testing/tiny-random-GPTJForCausalLM -hf-internal-testing/tiny-random-StableLmForCausalLM -hf-internal-testing/tiny-random-LlamaForCausalLM -hf-internal-testing/tiny-random-MistralForCausalLM -hf-internal-testing/tiny-random-OPTForCausalLM -hf-internal-testing/tiny-random-PhiForCausalLM -hf-internal-testing/tiny-random-gpt2 -hf-internal-testing/tiny-random-BertForQuestionAnswering -hf-internal-testing/tiny-random-T5ForConditionalGeneration -hf-internal-testing/tiny-random-BertForSequenceClassification -hf-internal-testing/tiny-random-GPTNeoXForCausalLM -hf-internal-testing/tiny-random-bert -hf-internal-testing/Mixtral-tiny -hf-internal-testing/tiny-random-PersimmonForCausalLM -hf-internal-testing/tiny-random-T5ForQuestionAnswering -hf-internal-testing/tiny-random-SwiftFormerModel -hf-internal-testing/tiny-stable-diffusion-pipe -hf-internal-testing/tiny-stable-diffusion-torch -hf-internal-testing/tiny-random-BartForCausalLM -hf-internal-testing/tiny-random-MegaModel -hf-internal-testing/tiny-clap-htsat-unfused -hf-internal-testing/diffusers-stable-diffusion-tiny-all -hf-internal-testing/tiny-random-VideoMAEForVideoClassification -hf-internal-testing/tiny-random-OPTModel -hf-internal-testing/tiny-random-OPTForQuestionAnswering -hf-internal-testing/tiny-random-GPTNeoModel -hf-internal-testing/tiny-random-wav2vec2 -hf-internal-testing/tiny-random-owlvit-object-detection -hf-internal-testing/unet-pipeline-dummy -hf-internal-testing/tiny-random-layoutlmv3 -hf-internal-testing/tiny-random-t5 -hf-internal-testing/tiny-random-vit -hf-internal-testing/tiny-random-speech-encoder-decoder -hf-internal-testing/tiny-bert-for-token-classification -hf-internal-testing/tiny-random-distilbert -hf-internal-testing/tiny-detr-mobilenetsv3-panoptic -hf-internal-testing/tiny-random-longformer -hf-internal-testing/example-documents -hf-internal-testing/tiny-random-CLIPSegModel -hf-internal-testing/tiny-random-NystromformerForMultipleChoice -hf-internal-testing/tiny-random-Wav2Vec2ForXVector -hf-internal-testing/tiny-random-NystromformerForTokenClassification -hf-internal-testing/tiny-random-unispeech -hf-internal-testing/tiny-random-flaubert -hf-internal-testing/tiny-random-ProphetNetForConditionalGeneration -hf-internal-testing/tiny-random-LevitForImageClassification -hf-internal-testing/tiny-adapter -hf-internal-testing/tiny-random-DonutSwinModel -hf-internal-testing/tiny-random-IdeficsForVisionText2Text -hf-internal-testing/tiny-random-LEDModel -hf-internal-testing/tiny-random-NezhaForTokenClassification -hf-internal-testing/tiny-random-GPTBigCodeModel -hf-internal-testing/tiny-random-bert-subfolder -hf-internal-testing/test_dynamic_image_processor -hf-internal-testing/tiny-random-EfficientFormerForImageClassificationWithTeacher -hf-internal-testing/tiny-random-ImageGPTModel -hf-internal-testing/tiny-random-Swinv2ForImageClassification -hf-internal-testing/tiny-random-GPTJModel -hf-internal-testing/tiny-random-BloomForQuestionAnswering -hf-internal-testing/tiny-random-Data2VecAudioForXVector -hf-internal-testing/tiny-random-NatModel -hf-internal-testing/tiny-random-DPTForDepthEstimation -hf-internal-testing/tiny-random-MraForQuestionAnswering -hf-internal-testing/tiny-random-DPTForSemanticSegmentation -hf-internal-testing/tiny-random-SEWDForCTC -hf-internal-testing/tiny-random-RoCBertForCausalLM -hf-internal-testing/tiny-random-vision_perceiver_conv -hf-internal-testing/tiny-random-SEWDModel -hf-internal-testing/tiny-random-ViTMAEForPreTraining -hf-internal-testing/tiny-random-MegatronBertForMultipleChoice -hf-internal-testing/tiny-random-GitModel -hf-internal-testing/tiny-random-BeitForSemanticSegmentation -hf-internal-testing/tiny-random-SamModel -hf-internal-testing/tiny-random-LevitForImageClassificationWithTeacher -hf-internal-testing/tiny-random-PegasusXModel -hf-internal-testing/tiny-random-Starcoder2Model -hf-internal-testing/tiny-random-GPTJForQuestionAnswering -hf-internal-testing/tiny-random-ResNetForImageClassification -hf-internal-testing/tiny-random-ErnieForMaskedLM -hf-internal-testing/tiny-random-ConvBertForMaskedLM -hf-internal-testing/edgLycorisMugler-light -hf-internal-testing/tiny-random-BigBirdPegasusForCausalLM -hf-internal-testing/tiny-random-BigBirdForPreTraining -hf-internal-testing/tiny-random-transfo-xl -hf-internal-testing/tiny-random-IBertForQuestionAnswering -hf-internal-testing/tiny-random-CanineForTokenClassification -hf-internal-testing/tiny-random-SplinterForQuestionAnswering -hf-internal-testing/tiny-random-PegasusXForConditionalGeneration -hf-internal-testing/tiny-random-XmodForCausalLM -hf-internal-testing/tiny-random-BigBirdForQuestionAnswering -hf-internal-testing/tiny-random-MT5ForSequenceClassification -hf-internal-testing/tiny-random-MarianForCausalLM -hf-internal-testing/tiny-random-Data2VecTextForQuestionAnswering -hf-internal-testing/tiny-random-SpeechT5Model -hf-internal-testing/tiny-random-Data2VecAudioForSequenceClassification -hf-internal-testing/tiny-random-MaskFormerModel -hf-internal-testing/tiny-random-M2M100ForConditionalGeneration -hf-internal-testing/diffusers-images -hf-internal-testing/tiny-random-ViTMSNModel -hf-internal-testing/tiny-random-MegatronBertForSequenceClassification -hf-internal-testing/tiny-random-MobileNetV2Model -hf-internal-testing/tiny-random-DistilBertForQuestionAnswering -hf-internal-testing/tiny-random-PerceiverModel -hf-internal-testing/tiny-random-M2M100Model -hf-internal-testing/tiny-random-vision-encoder-decoder -hf-internal-testing/tiny-random-XLNetForTokenClassification -hf-internal-testing/tiny-random-SwitchTransformersForConditionalGeneration -hf-internal-testing/tiny-electra -hf-internal-testing/tiny-random-GLPNModel -hf-internal-testing/tiny-random-NezhaForPreTraining -hf-internal-testing/test-two-configs -hf-internal-testing/tiny-random-Dinov2ForImageClassification -hf-internal-testing/tiny-bert-pt-safetensors-msgpack -hf-internal-testing/tiny-random-owlvit -hf-internal-testing/tiny-random-VitsModel -hf-internal-testing/tiny-random-MT5ForTokenClassification -hf-internal-testing/tiny-random-xlnet -hf-internal-testing/tiny-random-MusicgenMelodyForCausalLM -hf-internal-testing/tiny-random-RegNetModel -hf-internal-testing/tiny-random-MarkupLMForTokenClassification -hf-internal-testing/tiny-random-Speech2TextModel -hf-internal-testing/llama-code-tokenizer -hf-internal-testing/tiny-random-T5ForSequenceClassification -hf-internal-testing/tiny-random-BartForConditionalGeneration -hf-internal-testing/tiny-random-PegasusModel -hf-internal-testing/tiny-random-TransfoXLModel -hf-internal-testing/tiny-random-BigBirdForMultipleChoice -hf-internal-testing/tiny-random-PLBartForCausalLM -hf-internal-testing/tiny-random-SwinBackbone -hf-internal-testing/tiny-random-electra -hf-internal-testing/tiny-random-FNetForMaskedLM -hf-internal-testing/tiny-random-PersimmonForSequenceClassification -hf-internal-testing/tiny-random-MobileViTV2ForImageClassification -hf-internal-testing/tiny-random-XGLMForCausalLM -hf-internal-testing/tiny-random-SEWForCTC -hf-internal-testing/tiny-random-UMT5ForSequenceClassification -hf-internal-testing/tiny-random-PersimmonModel +hf-tiny-model-private/tiny-random-GPTJForCausalLM +hf-tiny-model-private/tiny-random-BartForCausalLM +hf-tiny-model-private/tiny-random-BertLMHeadModel +hf-tiny-model-private/tiny-random-BigBirdForCausalLM +hf-tiny-model-private/tiny-random-BigBirdPegasusForCausalLM +hf-tiny-model-private/tiny-random-BioGptForCausalLM +hf-tiny-model-private/tiny-random-BlenderbotSmallForCausalLM +hf-tiny-model-private/tiny-random-BlenderbotForCausalLM +hf-tiny-model-private/tiny-random-BloomForCausalLM +hf-tiny-model-private/tiny-random-CodeGenForCausalLM +hf-tiny-model-private/tiny-random-CTRLLMHeadModel +hf-tiny-model-private/tiny-random-Data2VecTextForCausalLM +hf-tiny-model-private/tiny-random-ElectraForCausalLM +hf-tiny-model-private/tiny-random-ErnieForCausalLM +hf-tiny-model-private/tiny-random-GitForCausalLM +hf-tiny-model-private/tiny-random-GPT2LMHeadModel +hf-tiny-model-private/tiny-random-GPTNeoForCausalLM +hf-tiny-model-private/tiny-random-GPTNeoXForCausalLM +hf-tiny-model-private/tiny-random-GPTNeoXJapaneseForCausalLM +hf-tiny-model-private/tiny-random-MBartForCausalLM +hf-tiny-model-private/tiny-random-MegaForCausalLM +hf-tiny-model-private/tiny-random-MegatronBertForCausalLM +hf-tiny-model-private/tiny-random-MvpForCausalLM +hf-tiny-model-private/tiny-random-OpenAIGPTLMHeadModel +hf-tiny-model-private/tiny-random-OPTForCausalLM +hf-tiny-model-private/tiny-random-PegasusForCausalLM +hf-tiny-model-private/tiny-random-PLBartForCausalLM +hf-tiny-model-private/tiny-random-ProphetNetForCausalLM +hf-tiny-model-private/tiny-random-RemBertForCausalLM +hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM +hf-tiny-model-private/tiny-random-RobertaForCausalLM +hf-tiny-model-private/tiny-random-RoCBertForCausalLM +hf-tiny-model-private/tiny-random-RoFormerForCausalLM +hf-tiny-model-private/tiny-random-TransfoXLLMHeadModel +hf-tiny-model-private/tiny-random-XGLMForCausalLM +hf-tiny-model-private/tiny-random-XLMRobertaXLForCausalLM +hf-tiny-model-private/tiny-random-XLNetLMHeadModel +hf-tiny-model-private/tiny-random-XmodForCausalLM +PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-4bit-smashed +PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-8bit-smashed +PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-HQQ-2bit-smashed diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit index 7b0e0d02b6..0b913d3b01 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/precommit @@ -1,6 +1,3 @@ -facebook/opt-125m -bigcode/starcoder2-7b -hf-internal-testing/tiny-random-LlamaForCausalLM -hf-internal-testing/tiny-random-MistralForCausalLM -hf-internal-testing/tiny-random-OPTModel -hf-internal-testing/tiny-random-OPTForQuestionAnswering \ No newline at end of file +hf-tiny-model-private/tiny-random-CodeGenForCausalLM +hf-tiny-model-private/tiny-random-GPT2LMHeadModel +hf-tiny-model-private/tiny-random-OPTForCausalLM \ No newline at end of file diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/preemption.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/preemption.py new file mode 100644 index 0000000000..7baff5ede0 --- /dev/null +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/preemption.py @@ -0,0 +1,12 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from common import run_test_pipeline + +scheduler_params_list = [{"num_kv_blocks": 300, "block_size": 16, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, + {"num_kv_blocks": 40, "block_size": 4, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, # test preemption for dynamic_split_fuse + {"num_kv_blocks": 40, "block_size": 4, "dynamic_split_fuse": False, "max_num_batched_tokens": 256, "max_num_seqs": 256}] # test preemption for vllm +@pytest.mark.parametrize("scheduler_params", scheduler_params_list) +@pytest.mark.precommit +def test_preemption(tmp_path, scheduler_params): + run_test_pipeline(tmp_path, "facebook/opt-125m", scheduler_params) \ No newline at end of file diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/sampling.py new file mode 100644 index 0000000000..c6d97dd5af --- /dev/null +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/sampling.py @@ -0,0 +1,19 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from common import run_test_pipeline + +# tested models: +# - facebook/opt-125m +# - meta-llama/Llama-2-7b-chat-hf +# - mistralai/Mistral-7B-Instruct-v0.2 + +@pytest.mark.precommit +@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit"))) +def test_sampling_precommit(tmp_path, model_id): + run_test_pipeline(tmp_path, model_id) + +@pytest.mark.nightly +@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "nightly"))) +def test_sampling_nightly(tmp_path, model_id): + run_test_pipeline(tmp_path, model_id) From 81ff24d63041979d17a5841cd1565bca4e5be9bb Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Tue, 14 May 2024 14:48:05 +0200 Subject: [PATCH 09/10] Minor corrections. --- .../python/tests/models/nightly | 49 +++++++++++++++++++ .../{preemption.py => test_preemption.py} | 3 +- .../tests/{sampling.py => test_sampling.py} | 5 +- 3 files changed, 55 insertions(+), 2 deletions(-) rename text_generation/causal_lm/cpp/continuous_batching/python/tests/{preemption.py => test_preemption.py} (92%) rename text_generation/causal_lm/cpp/continuous_batching/python/tests/{sampling.py => test_sampling.py} (89%) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly index 3a241384d1..4f8755bce5 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly @@ -39,3 +39,52 @@ hf-tiny-model-private/tiny-random-XmodForCausalLM PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-4bit-smashed PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-8bit-smashed PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-HQQ-2bit-smashed +hf-internal-testing/tiny-random-MistralForCausalLM +hf-internal-testing/tiny-random-GPTNeoXForCausalLM +hf-internal-testing/tiny-random-LlamaForCausalLM +hf-internal-testing/tiny-random-StableLmForCausalLM +hf-internal-testing/tiny-random-PersimmonForCausalLM +hf-internal-testing/tiny-random-BartForCausalLM +hf-internal-testing/tiny-random-PhiForCausalLM +hf-internal-testing/tiny-random-GPTNeoForCausalLM +hf-internal-testing/tiny-random-CodeGenForCausalLM +hf-internal-testing/tiny-random-MptForCausalLM +hf-internal-testing/tiny-random-GPTBigCodeForCausalLM +hf-internal-testing/tiny-random-BloomForCausalLM +hf-internal-testing/tiny-random-GPTJForCausalLM +hf-internal-testing/tiny-random-MusicgenMelodyForCausalLM +hf-internal-testing/tiny-random-CohereForCausalLM +hf-internal-testing/tiny-random-MambaForCausalLM +hf-internal-testing/tiny-random-Starcoder2ForCausalLM +hf-internal-testing/tiny-random-WhisperForCausalLM +hf-internal-testing/tiny-random-FuyuForCausalLM +hf-internal-testing/tiny-random-FalconForCausalLM +hf-internal-testing/tiny-random-MusicgenForCausalLM +hf-internal-testing/tiny-random-RwkvForCausalLM +hf-internal-testing/tiny-random-XmodForCausalLM +hf-internal-testing/tiny-random-XLMRobertaXLForCausalLM +hf-internal-testing/tiny-random-XGLMForCausalLM +hf-internal-testing/tiny-random-RemBertForCausalLM +hf-internal-testing/tiny-random-PegasusForCausalLM +hf-internal-testing/tiny-random-MBartForCausalLM +hf-internal-testing/tiny-random-BigBirdPegasusForCausalLM +hf-internal-testing/tiny-random-BigBirdForCausalLM +hf-internal-testing/tiny-random-MegaForCausalLM +hf-internal-testing/tiny-random-RobertaPreLayerNormForCausalLM +hf-internal-testing/tiny-random-GitForCausalLM +hf-internal-testing/tiny-random-BioGptForCausalLM +hf-internal-testing/tiny-random-RobertaForCausalLM +hf-internal-testing/tiny-random-Data2VecTextForCausalLM +hf-internal-testing/tiny-random-RoFormerForCausalLM +hf-internal-testing/tiny-random-RoCBertForCausalLM +hf-internal-testing/tiny-random-ProphetNetForCausalLM +hf-internal-testing/tiny-random-PLBartForCausalLM +hf-internal-testing/tiny-random-OPTForCausalLM +hf-internal-testing/tiny-random-MvpForCausalLM +hf-internal-testing/tiny-random-MegatronBertForCausalLM +hf-internal-testing/tiny-random-MarianForCausalLM +hf-internal-testing/tiny-random-GPTNeoXJapaneseForCausalLM +hf-internal-testing/tiny-random-ErnieForCausalLM +hf-internal-testing/tiny-random-ElectraForCausalLM +hf-internal-testing/tiny-random-BlenderbotForCausalLM +hf-internal-testing/tiny-random-BlenderbotSmallForCausalLM diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/preemption.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_preemption.py similarity index 92% rename from text_generation/causal_lm/cpp/continuous_batching/python/tests/preemption.py rename to text_generation/causal_lm/cpp/continuous_batching/python/tests/test_preemption.py index 7baff5ede0..eb7116341c 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/preemption.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_preemption.py @@ -1,7 +1,8 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import pytest -from common import run_test_pipeline +from common import run_test_pipeline, get_models_list scheduler_params_list = [{"num_kv_blocks": 300, "block_size": 16, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, {"num_kv_blocks": 40, "block_size": 4, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, # test preemption for dynamic_split_fuse diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py similarity index 89% rename from text_generation/causal_lm/cpp/continuous_batching/python/tests/sampling.py rename to text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index c6d97dd5af..211932aa72 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -1,7 +1,10 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os +import pytest + +from common import run_test_pipeline, get_models_list -from common import run_test_pipeline # tested models: # - facebook/opt-125m From 822b4c726532cf45856fd52cbf668d94810e93a5 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Tue, 14 May 2024 15:33:30 +0200 Subject: [PATCH 10/10] Removed wrong models. --- .../cpp/continuous_batching/python/tests/models/nightly | 6 ------ 1 file changed, 6 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly index 4f8755bce5..0937bbbf2b 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/models/nightly @@ -1,6 +1,5 @@ hf-tiny-model-private/tiny-random-GPTJForCausalLM hf-tiny-model-private/tiny-random-BartForCausalLM -hf-tiny-model-private/tiny-random-BertLMHeadModel hf-tiny-model-private/tiny-random-BigBirdForCausalLM hf-tiny-model-private/tiny-random-BigBirdPegasusForCausalLM hf-tiny-model-private/tiny-random-BioGptForCausalLM @@ -8,12 +7,10 @@ hf-tiny-model-private/tiny-random-BlenderbotSmallForCausalLM hf-tiny-model-private/tiny-random-BlenderbotForCausalLM hf-tiny-model-private/tiny-random-BloomForCausalLM hf-tiny-model-private/tiny-random-CodeGenForCausalLM -hf-tiny-model-private/tiny-random-CTRLLMHeadModel hf-tiny-model-private/tiny-random-Data2VecTextForCausalLM hf-tiny-model-private/tiny-random-ElectraForCausalLM hf-tiny-model-private/tiny-random-ErnieForCausalLM hf-tiny-model-private/tiny-random-GitForCausalLM -hf-tiny-model-private/tiny-random-GPT2LMHeadModel hf-tiny-model-private/tiny-random-GPTNeoForCausalLM hf-tiny-model-private/tiny-random-GPTNeoXForCausalLM hf-tiny-model-private/tiny-random-GPTNeoXJapaneseForCausalLM @@ -21,7 +18,6 @@ hf-tiny-model-private/tiny-random-MBartForCausalLM hf-tiny-model-private/tiny-random-MegaForCausalLM hf-tiny-model-private/tiny-random-MegatronBertForCausalLM hf-tiny-model-private/tiny-random-MvpForCausalLM -hf-tiny-model-private/tiny-random-OpenAIGPTLMHeadModel hf-tiny-model-private/tiny-random-OPTForCausalLM hf-tiny-model-private/tiny-random-PegasusForCausalLM hf-tiny-model-private/tiny-random-PLBartForCausalLM @@ -31,10 +27,8 @@ hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM hf-tiny-model-private/tiny-random-RobertaForCausalLM hf-tiny-model-private/tiny-random-RoCBertForCausalLM hf-tiny-model-private/tiny-random-RoFormerForCausalLM -hf-tiny-model-private/tiny-random-TransfoXLLMHeadModel hf-tiny-model-private/tiny-random-XGLMForCausalLM hf-tiny-model-private/tiny-random-XLMRobertaXLForCausalLM -hf-tiny-model-private/tiny-random-XLNetLMHeadModel hf-tiny-model-private/tiny-random-XmodForCausalLM PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-4bit-smashed PrunaAI/hf-tiny-model-private-tiny-random-BloomForCausalLM-bnb-8bit-smashed