Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for Whisper static pipeline #1250

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,13 @@
- 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
- 'src/cpp/src/whisper/**/*'
- 'src/cpp/src/whisper_generation_config.cpp'
- 'src/cpp/src/whisper_pipeline_base.hpp'
- 'src/cpp/src/whisper_pipeline_static.cpp'
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/cpp/src/whisper_pipeline.cpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_generate_api.py'
TolyaTalamanov marked this conversation as resolved.
Show resolved Hide resolved
- 'tests/python_tests/test_whisper_pipeline_static.py'

'category: Python API':
- 'src/python/**/*'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ jobs:
matrix:
test:
- name: 'Whisper'
cmd: 'tests/python_tests/test_whisper_generate_api.py'
cmd: 'tests/python_tests/test_whisper_generate_api.py tests/python_tests/test_whisper_pipeline_static.py'
- name: 'LLM & VLM'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py -k "not Qwen2-0.5B-Instruct"' # Skip failed tests Qwen2-0.5B-Instruct
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py -k "not Qwen2-0.5B-Instruct"' # Skip failed tests Qwen2-0.5B-Instruct
defaults:
run:
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"

genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"

genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -310,7 +310,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke"

genai_python_lib_vlm:
name: OpenVINO genai VLM tests (cmake + wheel)
Expand Down
6 changes: 3 additions & 3 deletions src/cpp/src/whisper_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,9 +555,9 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys
preprocess_decoder(decoder_model);
preprocess_decoder(decoder_with_past_model);

m_models.encoder = core.compile_model(encoder_model, "NPU").create_infer_request();
m_models.decoder = core.compile_model(decoder_model, "NPU").create_infer_request();
m_models.decoder_with_past = core.compile_model(decoder_with_past_model, "NPU").create_infer_request();
m_models.encoder = core.compile_model(encoder_model, "NPU", properties).create_infer_request();
m_models.decoder = core.compile_model(decoder_model, "NPU", properties).create_infer_request();
m_models.decoder_with_past = core.compile_model(decoder_with_past_model, "NPU", properties).create_infer_request();

// If eos_token_id was not provided, take value
if (m_generation_config.eos_token_id == -1) {
Expand Down
152 changes: 152 additions & 0 deletions tests/python_tests/test_whisper_pipeline_static.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

from ov_genai_test_utils import get_whisper_models_list
from test_whisper_generate_api import get_samples_from_dataset
from transformers import WhisperProcessor, AutoTokenizer
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
import openvino_genai as ov_genai
import openvino_tokenizers
import openvino
import pytest

# This test suite is designed specifically to validate the functionality
# and robustness of the WhisperStaticPipeline on NPUW:CPU.
config = {"NPU_USE_NPUW" : "YES",
TolyaTalamanov marked this conversation as resolved.
Show resolved Hide resolved
"NPUW_DEVICES" : "CPU",
"NPUW_ONLINE_PIPELINE" : "NONE"}

def load_and_save_whisper_model(params, **tokenizer_kwargs):
model_id, path = params

processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True)

if not (path / "openvino_encoder_model.xml").exists():
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(
tokenizer,
with_detokenizer=True,
clean_up_tokenization_spaces=False,
**tokenizer_kwargs,
)

openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")

# to store tokenizer config jsons with special tokens
tokenizer.save_pretrained(path)

opt_model = OVModelForSpeechSeq2Seq.from_pretrained(
model_id,
export=True,
trust_remote_code=True,
compile=False,
device="CPU",
load_in_8bit=False,
)
opt_model.generation_config.save_pretrained(path)
opt_model.config.save_pretrained(path)
opt_model.save_pretrained(path)
processor.save_pretrained(path)

def get_results_cpu_npu(model_path, audio_sample, **config_kwargs):
cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(audio_sample, **config_kwargs)

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(audio_sample, **config_kwargs)

return expected, actual_out

def compare_results_with_assert(expected, actual_out):
assert len(expected.texts) == len(actual_out.texts)

for i in range(0, len(expected.texts)):
if expected.texts[i] != actual_out.texts[i]:
print(f'expected: {expected.texts[i]}\n')
print(f'actual_out: {actual_out.texts[i]}')
assert expected.texts[i] == actual_out.texts[i]


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1))
@pytest.mark.precommit
def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample",
[
*get_samples_from_dataset(language="fr", length=1),
*get_samples_from_dataset(language="de", length=2),
# *get_samples_from_dataset(language="es", length=2), # mismatch CPU/NPU pipelines
],)
@pytest.mark.precommit
def test_static_whisper_autodetect(model_descr, test_sample):
model_id, model_path = model_descr
TolyaTalamanov marked this conversation as resolved.
Show resolved Hide resolved
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="de", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_de(model_descr, test_sample):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does it actually check? How it's different from test_static_whisper_autodetect?

model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|de|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="fr", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_fr(model_descr, test_sample):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question, how it's different from test_static_whisper_autodetect

model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|fr|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="ru", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_ru(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|ru|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.skip(reason="Mismatches in output")
@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1, long_form=True))
@pytest.mark.precommit
def test_static_whisper_generation_long(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)
Loading