Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for Whisper static pipeline #1250

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_pipeline.py'
- 'tests/python_tests/test_whisper_pipeline_static.py'

'category: Python API':
- 'src/python/**/*'
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,10 @@ jobs:
matrix:
test:
- name: 'Whisper'
cmd: 'tests/python_tests/test_whisper_pipeline.py'
cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py'
- name: 'LLM & VLM'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py'

defaults:
run:
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_whisper_pipeline_static.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"

genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -316,7 +316,7 @@ jobs:
# this check enabled for windows only. Ticket: 160205.
python -m pip install git+https://github.com/huggingface/optimum-intel.git@753f84db6e0966580eb9eaa74a808213be730631

python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py ./tests/python_tests/test_whisper_pipeline_static.py -k "not test_smoke"

genai_python_lib_vlm:
name: OpenVINO genai VLM tests (cmake + wheel)
Expand Down
8 changes: 4 additions & 4 deletions src/cpp/src/whisper_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ ov::InferRequest DecoderCache::get_model(uint8_t input_ids_size) {
reshape_input_ids(m_decoder_model, input_ids_size);

ov::Core core = utils::singleton_core();
ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU");
ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU", m_properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder model");
m_cache.emplace(input_ids_size, compiled_model.create_infer_request());
}
Expand Down Expand Up @@ -544,14 +544,14 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys
preprocess_decoder(decoder_with_past_model);

ov::CompiledModel compiled_model;
compiled_model = core.compile_model(encoder_model, "NPU");
compiled_model = core.compile_model(encoder_model, "NPU", properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper encoder model");
m_models.encoder = compiled_model.create_infer_request();

// Will compile decoder model when it's needed
m_decoder_cache = DecoderCache(decoder_model);
m_decoder_cache = DecoderCache(decoder_model, properties);

compiled_model = core.compile_model(decoder_with_past_model, "NPU");
compiled_model = core.compile_model(decoder_with_past_model, "NPU", properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder with past model");
m_models.decoder_with_past = compiled_model.create_infer_request();

Expand Down
5 changes: 4 additions & 1 deletion src/cpp/src/whisper_pipeline_static.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ namespace genai {
class DecoderCache {
public:
DecoderCache() = default;
DecoderCache(std::shared_ptr<ov::Model> model) : m_decoder_model(model) {}
DecoderCache(std::shared_ptr<ov::Model> model, ov::AnyMap properties)
: m_decoder_model(model)
, m_properties(properties) {}

ov::InferRequest get_model(uint8_t input_ids_size);
private:
std::unordered_map<uint8_t, ov::InferRequest> m_cache;
std::shared_ptr<ov::Model> m_decoder_model;
ov::AnyMap m_properties;
};

class WhisperPipeline::StaticWhisperPipeline : public WhisperPipeline::WhisperPipelineImplBase {
Expand Down
150 changes: 150 additions & 0 deletions tests/python_tests/test_whisper_pipeline_static.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

from test_whisper_pipeline import get_whisper_models_list, get_samples_from_dataset
from transformers import WhisperProcessor, AutoTokenizer
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
import openvino_genai as ov_genai
import openvino_tokenizers
import openvino
import pytest

# This test suite is designed specifically to validate the functionality
# and robustness of the WhisperStaticPipeline on NPUW:CPU.
config = {"NPU_USE_NPUW" : "YES",
TolyaTalamanov marked this conversation as resolved.
Show resolved Hide resolved
"NPUW_DEVICES" : "CPU",
"NPUW_ONLINE_PIPELINE" : "NONE"}

def load_and_save_whisper_model(params, **tokenizer_kwargs):
model_id, path = params

processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True)

if not (path / "openvino_encoder_model.xml").exists():
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(
tokenizer,
with_detokenizer=True,
clean_up_tokenization_spaces=False,
**tokenizer_kwargs,
)

openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")

# to store tokenizer config jsons with special tokens
tokenizer.save_pretrained(path)

opt_model = OVModelForSpeechSeq2Seq.from_pretrained(
model_id,
export=True,
trust_remote_code=True,
compile=False,
device="CPU",
load_in_8bit=False,
)
opt_model.generation_config.save_pretrained(path)
opt_model.config.save_pretrained(path)
opt_model.save_pretrained(path)
processor.save_pretrained(path)

def get_results_cpu_npu(model_path, audio_sample, **config_kwargs):
cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(audio_sample, **config_kwargs)

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(audio_sample, **config_kwargs)

return expected, actual_out

def compare_results_with_assert(expected, actual_out):
assert len(expected.texts) == len(actual_out.texts)

for i in range(0, len(expected.texts)):
if expected.texts[i] != actual_out.texts[i]:
print(f'expected: {expected.texts[i]}\n')
print(f'actual_out: {actual_out.texts[i]}')
assert expected.texts[i] == actual_out.texts[i]


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1))
@pytest.mark.precommit
def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample",
[
*get_samples_from_dataset(language="fr", length=2),
*get_samples_from_dataset(language="de", length=2),
*get_samples_from_dataset(language="es", length=2),
],)
@pytest.mark.precommit
def test_static_whisper_autodetect(model_descr, test_sample):
model_id, model_path = model_descr
TolyaTalamanov marked this conversation as resolved.
Show resolved Hide resolved
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="de", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_de(model_descr, test_sample):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does it actually check? How it's different from test_static_whisper_autodetect?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we explicitly set language in config, in test_static_whisper_autodetect at first additional infer request will be called to detect language of the audio.

model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|de|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="fr", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_fr(model_descr, test_sample):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question, how it's different from test_static_whisper_autodetect

model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|fr|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize(
"test_sample", get_samples_from_dataset(language="ru", length=3)
)
@pytest.mark.precommit
def test_static_whisper_language_ru(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample, max_new_tokens=30, language="<|ru|>")

compare_results_with_assert(expected, actual_out)


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1, long_form=True))
@pytest.mark.precommit
def test_static_whisper_generation_long(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

expected, actual_out = get_results_cpu_npu(model_path, test_sample)

compare_results_with_assert(expected, actual_out)
Loading