From c98106999cad0a1f8081d811dd534b188962cc8a Mon Sep 17 00:00:00 2001 From: Ekaterina Shiryaeva Date: Fri, 22 Nov 2024 22:10:14 +0000 Subject: [PATCH 1/5] Add tests for Whisper static pipeline --- .../test_whisper_pipeline_static.py | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tests/python_tests/test_whisper_pipeline_static.py diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py new file mode 100644 index 0000000000..32c143838e --- /dev/null +++ b/tests/python_tests/test_whisper_pipeline_static.py @@ -0,0 +1,99 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from ov_genai_test_utils import get_whisper_models_list +from test_whisper_generate_api import get_samples_from_dataset +import openvino_genai as ov_genai +import pytest + +def compare_results_with_assert(expected, actual_out): + if expected.texts[0] != actual_out.texts[0]: + print(f'expected: {expected.texts[0]}\n') + print(f'actual_out: {actual_out.texts[0]}') + assert expected.texts[0] == actual_out.texts[0] + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1)) +@pytest.mark.precommit +def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample): + model_id, model_path = model_descr + + cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") + expected = cpu_pipe.generate(test_sample) + + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + actual_out = npu_pipe.generate(test_sample) + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize("test_sample", + [ +# *get_samples_from_dataset(language="fr", length=2), # 1/2 failed + *get_samples_from_dataset(language="de", length=2), +# *get_samples_from_dataset(language="es", length=2), # 1/2 failed + ],) +@pytest.mark.precommit +def test_static_whisper_autodetect(model_descr, test_sample): + model_id, model_path = model_descr + + cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") + expected = cpu_pipe.generate(test_sample) + + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + actual_out = npu_pipe.generate(test_sample) + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize( + "test_sample", get_samples_from_dataset(language="de", length=3) +) +@pytest.mark.precommit +def test_static_whisper_language_de(model_descr, test_sample): + model_id, model_path = model_descr + + cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") + expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") + + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize( + "test_sample", get_samples_from_dataset(language="fr", length=3) +) +@pytest.mark.precommit +def test_static_whisper_language_fr(model_descr, test_sample): + model_id, model_path = model_descr + + cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") + expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") + + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") + + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize( + "test_sample", get_samples_from_dataset(language="ru", length=3) +) +@pytest.mark.precommit +def test_static_whisper_language_ru(model_descr, test_sample): + model_id, model_path = model_descr + + cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") + expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>") + + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>") + + compare_results_with_assert(expected, actual_out) From 5c1ead0ae5db1e0ff89983d11d5da082bf080352 Mon Sep 17 00:00:00 2001 From: Ekaterina Shiryaeva Date: Mon, 2 Dec 2024 09:14:11 +0000 Subject: [PATCH 2/5] StaticWhisperPipeline: fix build target for tests --- .github/labeler.yml | 1 + .github/workflows/linux.yml | 4 ++-- .github/workflows/mac.yml | 2 +- .github/workflows/windows.yml | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index c162f6aff4..f7015233a0 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -57,6 +57,7 @@ - 'src/cpp/src/whisper_pipeline.cpp' - 'src/python/py_whisper_pipeline.cpp' - 'tests/python_tests/test_whisper_generate_api.py' +- 'tests/python_tests/test_whisper_pipeline_static.py' 'category: Python API': - 'src/python/**/*' diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 96848e947c..19f8b5b7e8 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -270,9 +270,9 @@ jobs: matrix: test: - name: 'Whisper' - cmd: 'tests/python_tests/test_whisper_generate_api.py' + cmd: 'tests/python_tests/test_whisper_generate_api.py tests/python_tests/test_whisper_pipeline_static.py' - name: 'LLM & VLM' - cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py -k "not Qwen2-0.5B-Instruct"' # Skip failed tests Qwen2-0.5B-Instruct + cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py -k "not Qwen2-0.5B-Instruct"' # Skip failed tests Qwen2-0.5B-Instruct defaults: run: shell: bash diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 7a4ee31beb..dc14214f3f 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -235,7 +235,7 @@ jobs: python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels python -c "from openvino_genai import LLMPipeline" python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels - python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template" + python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template" genai_python_lib_whisper: name: OpenVINO genai extension whisper tests (cmake + wheel) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 649d678c02..341d44b9aa 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -245,7 +245,7 @@ jobs: . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels - python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template" + python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template" genai_python_lib_whisper: name: OpenVINO genai extension whisper tests (cmake + wheel) @@ -310,7 +310,7 @@ jobs: . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels - python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke" + python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke" genai_python_lib_vlm: name: OpenVINO genai VLM tests (cmake + wheel) From a63064319e755aef45d6cb5e9deb10d6484bac22 Mon Sep 17 00:00:00 2001 From: Ekaterina Shiryaeva Date: Tue, 3 Dec 2024 13:04:24 +0000 Subject: [PATCH 3/5] Add config settings to run with NPUW:CPU --- src/cpp/src/whisper_pipeline_static.cpp | 6 +++--- tests/python_tests/test_whisper_pipeline_static.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/cpp/src/whisper_pipeline_static.cpp b/src/cpp/src/whisper_pipeline_static.cpp index 9937082a81..a487fe33e4 100644 --- a/src/cpp/src/whisper_pipeline_static.cpp +++ b/src/cpp/src/whisper_pipeline_static.cpp @@ -555,9 +555,9 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys preprocess_decoder(decoder_model); preprocess_decoder(decoder_with_past_model); - m_models.encoder = core.compile_model(encoder_model, "NPU").create_infer_request(); - m_models.decoder = core.compile_model(decoder_model, "NPU").create_infer_request(); - m_models.decoder_with_past = core.compile_model(decoder_with_past_model, "NPU").create_infer_request(); + m_models.encoder = core.compile_model(encoder_model, "NPU", properties).create_infer_request(); + m_models.decoder = core.compile_model(decoder_model, "NPU", properties).create_infer_request(); + m_models.decoder_with_past = core.compile_model(decoder_with_past_model, "NPU", properties).create_infer_request(); // If eos_token_id was not provided, take value if (m_generation_config.eos_token_id == -1) { diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py index 32c143838e..2219b9b1cd 100644 --- a/tests/python_tests/test_whisper_pipeline_static.py +++ b/tests/python_tests/test_whisper_pipeline_static.py @@ -6,6 +6,10 @@ import openvino_genai as ov_genai import pytest +config = {"NPU_USE_NPUW" : "YES", + "NPUW_DEVICES" : "CPU", + "NPUW_ONLINE_PIPELINE" : "NONE"} + def compare_results_with_assert(expected, actual_out): if expected.texts[0] != actual_out.texts[0]: print(f'expected: {expected.texts[0]}\n') @@ -22,7 +26,7 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample): cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample) - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) actual_out = npu_pipe.generate(test_sample) compare_results_with_assert(expected, actual_out) @@ -42,7 +46,7 @@ def test_static_whisper_autodetect(model_descr, test_sample): cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample) - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) actual_out = npu_pipe.generate(test_sample) compare_results_with_assert(expected, actual_out) @@ -59,7 +63,7 @@ def test_static_whisper_language_de(model_descr, test_sample): cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") compare_results_with_assert(expected, actual_out) @@ -76,7 +80,7 @@ def test_static_whisper_language_fr(model_descr, test_sample): cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") compare_results_with_assert(expected, actual_out) @@ -93,7 +97,7 @@ def test_static_whisper_language_ru(model_descr, test_sample): cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>") - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU") + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>") compare_results_with_assert(expected, actual_out) From dae413f6b687541122d6f8a64ca122391dcc600a Mon Sep 17 00:00:00 2001 From: Ekaterina Shiryaeva Date: Wed, 4 Dec 2024 11:49:15 +0000 Subject: [PATCH 4/5] Download model if it's not found --- .../test_whisper_pipeline_static.py | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py index 2219b9b1cd..2702b38eca 100644 --- a/tests/python_tests/test_whisper_pipeline_static.py +++ b/tests/python_tests/test_whisper_pipeline_static.py @@ -3,13 +3,50 @@ from ov_genai_test_utils import get_whisper_models_list from test_whisper_generate_api import get_samples_from_dataset +from transformers import WhisperProcessor, pipeline, AutoTokenizer +from optimum.intel.openvino import OVModelForSpeechSeq2Seq import openvino_genai as ov_genai +import openvino_tokenizers +import openvino import pytest config = {"NPU_USE_NPUW" : "YES", "NPUW_DEVICES" : "CPU", "NPUW_ONLINE_PIPELINE" : "NONE"} +def load_and_save_whisper_model(params, **tokenizer_kwargs): + model_id, path = params + + processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True) + + if not (path / "openvino_encoder_model.xml").exists(): + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer( + tokenizer, + with_detokenizer=True, + clean_up_tokenization_spaces=False, + **tokenizer_kwargs, + ) + + openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml") + openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml") + + # to store tokenizer config jsons with special tokens + tokenizer.save_pretrained(path) + + opt_model = OVModelForSpeechSeq2Seq.from_pretrained( + model_id, + export=True, + trust_remote_code=True, + compile=False, + device="CPU", + load_in_8bit=False, + ) + opt_model.generation_config.save_pretrained(path) + opt_model.config.save_pretrained(path) + opt_model.save_pretrained(path) + processor.save_pretrained(path) + def compare_results_with_assert(expected, actual_out): if expected.texts[0] != actual_out.texts[0]: print(f'expected: {expected.texts[0]}\n') @@ -18,13 +55,15 @@ def compare_results_with_assert(expected, actual_out): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) -@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1)) +@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1)) @pytest.mark.precommit def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample): model_id, model_path = model_descr + load_and_save_whisper_model(model_descr) cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample) + # expected = None npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) actual_out = npu_pipe.generate(test_sample) @@ -42,6 +81,7 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample): @pytest.mark.precommit def test_static_whisper_autodetect(model_descr, test_sample): model_id, model_path = model_descr + load_and_save_whisper_model(model_descr) cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample) @@ -59,6 +99,7 @@ def test_static_whisper_autodetect(model_descr, test_sample): @pytest.mark.precommit def test_static_whisper_language_de(model_descr, test_sample): model_id, model_path = model_descr + load_and_save_whisper_model(model_descr) cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") @@ -76,6 +117,7 @@ def test_static_whisper_language_de(model_descr, test_sample): @pytest.mark.precommit def test_static_whisper_language_fr(model_descr, test_sample): model_id, model_path = model_descr + load_and_save_whisper_model(model_descr) cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") @@ -93,6 +135,7 @@ def test_static_whisper_language_fr(model_descr, test_sample): @pytest.mark.precommit def test_static_whisper_language_ru(model_descr, test_sample): model_id, model_path = model_descr + load_and_save_whisper_model(model_descr) cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>") From 0e11d549dcdf8f39fd5ae02946c1855895d86328 Mon Sep 17 00:00:00 2001 From: Ekaterina Shiryaeva Date: Wed, 11 Dec 2024 10:06:54 +0000 Subject: [PATCH 5/5] Address review comments + add test with long input --- .github/labeler.yml | 3 + .../test_whisper_pipeline_static.py | 70 ++++++++++--------- 2 files changed, 41 insertions(+), 32 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index f7015233a0..bf54ba756f 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -54,6 +54,9 @@ - 'src/cpp/include/openvino/genai/whisper_pipeline.hpp' - 'src/cpp/src/whisper/**/*' - 'src/cpp/src/whisper_generation_config.cpp' +- 'src/cpp/src/whisper_pipeline_base.hpp' +- 'src/cpp/src/whisper_pipeline_static.cpp' +- 'src/cpp/src/whisper_pipeline_static.hpp' - 'src/cpp/src/whisper_pipeline.cpp' - 'src/python/py_whisper_pipeline.cpp' - 'tests/python_tests/test_whisper_generate_api.py' diff --git a/tests/python_tests/test_whisper_pipeline_static.py b/tests/python_tests/test_whisper_pipeline_static.py index 2702b38eca..15470bed35 100644 --- a/tests/python_tests/test_whisper_pipeline_static.py +++ b/tests/python_tests/test_whisper_pipeline_static.py @@ -3,13 +3,15 @@ from ov_genai_test_utils import get_whisper_models_list from test_whisper_generate_api import get_samples_from_dataset -from transformers import WhisperProcessor, pipeline, AutoTokenizer +from transformers import WhisperProcessor, AutoTokenizer from optimum.intel.openvino import OVModelForSpeechSeq2Seq import openvino_genai as ov_genai import openvino_tokenizers import openvino import pytest +# This test suite is designed specifically to validate the functionality +# and robustness of the WhisperStaticPipeline on NPUW:CPU. config = {"NPU_USE_NPUW" : "YES", "NPUW_DEVICES" : "CPU", "NPUW_ONLINE_PIPELINE" : "NONE"} @@ -47,11 +49,23 @@ def load_and_save_whisper_model(params, **tokenizer_kwargs): opt_model.save_pretrained(path) processor.save_pretrained(path) +def get_results_cpu_npu(model_path, audio_sample, **config_kwargs): + cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") + expected = cpu_pipe.generate(audio_sample, **config_kwargs) + + npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) + actual_out = npu_pipe.generate(audio_sample, **config_kwargs) + + return expected, actual_out + def compare_results_with_assert(expected, actual_out): - if expected.texts[0] != actual_out.texts[0]: - print(f'expected: {expected.texts[0]}\n') - print(f'actual_out: {actual_out.texts[0]}') - assert expected.texts[0] == actual_out.texts[0] + assert len(expected.texts) == len(actual_out.texts) + + for i in range(0, len(expected.texts)): + if expected.texts[i] != actual_out.texts[i]: + print(f'expected: {expected.texts[i]}\n') + print(f'actual_out: {actual_out.texts[i]}') + assert expected.texts[i] == actual_out.texts[i] @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @@ -61,12 +75,7 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample): model_id, model_path = model_descr load_and_save_whisper_model(model_descr) - cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") - expected = cpu_pipe.generate(test_sample) - # expected = None - - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) - actual_out = npu_pipe.generate(test_sample) + expected, actual_out = get_results_cpu_npu(model_path, test_sample) compare_results_with_assert(expected, actual_out) @@ -74,20 +83,16 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample): @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize("test_sample", [ -# *get_samples_from_dataset(language="fr", length=2), # 1/2 failed + *get_samples_from_dataset(language="fr", length=1), *get_samples_from_dataset(language="de", length=2), -# *get_samples_from_dataset(language="es", length=2), # 1/2 failed + # *get_samples_from_dataset(language="es", length=2), # mismatch CPU/NPU pipelines ],) @pytest.mark.precommit def test_static_whisper_autodetect(model_descr, test_sample): model_id, model_path = model_descr load_and_save_whisper_model(model_descr) - cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") - expected = cpu_pipe.generate(test_sample) - - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) - actual_out = npu_pipe.generate(test_sample) + expected, actual_out = get_results_cpu_npu(model_path, test_sample) compare_results_with_assert(expected, actual_out) @@ -101,11 +106,7 @@ def test_static_whisper_language_de(model_descr, test_sample): model_id, model_path = model_descr load_and_save_whisper_model(model_descr) - cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") - expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") - - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) - actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") + expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|de|>") compare_results_with_assert(expected, actual_out) @@ -119,11 +120,7 @@ def test_static_whisper_language_fr(model_descr, test_sample): model_id, model_path = model_descr load_and_save_whisper_model(model_descr) - cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") - expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") - - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) - actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") + expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|fr|>") compare_results_with_assert(expected, actual_out) @@ -137,10 +134,19 @@ def test_static_whisper_language_ru(model_descr, test_sample): model_id, model_path = model_descr load_and_save_whisper_model(model_descr) - cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU") - expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>") + expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|ru|>") - npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config) - actual_out = npu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>") + compare_results_with_assert(expected, actual_out) + + +@pytest.mark.skip(reason="Mismatches in output") +@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) +@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1, long_form=True)) +@pytest.mark.precommit +def test_static_whisper_generation_long(model_descr, test_sample): + model_id, model_path = model_descr + load_and_save_whisper_model(model_descr) + + expected, actual_out = get_results_cpu_npu(model_path, test_sample) compare_results_with_assert(expected, actual_out)