Skip to content

Commit

Permalink
Download model if it's not found
Browse files Browse the repository at this point in the history
  • Loading branch information
eshiryae committed Jan 7, 2025
1 parent 25a950b commit 92e08dd
Showing 1 changed file with 44 additions and 1 deletion.
45 changes: 44 additions & 1 deletion tests/python_tests/test_whisper_pipeline_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,50 @@

from ov_genai_test_utils import get_whisper_models_list
from test_whisper_generate_api import get_samples_from_dataset
from transformers import WhisperProcessor, pipeline, AutoTokenizer
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
import openvino_genai as ov_genai
import openvino_tokenizers
import openvino
import pytest

config = {"NPU_USE_NPUW" : "YES",
"NPUW_DEVICES" : "CPU",
"NPUW_ONLINE_PIPELINE" : "NONE"}

def load_and_save_whisper_model(params, **tokenizer_kwargs):
model_id, path = params

processor = WhisperProcessor.from_pretrained(model_id, trust_remote_code=True)

if not (path / "openvino_encoder_model.xml").exists():
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(
tokenizer,
with_detokenizer=True,
clean_up_tokenization_spaces=False,
**tokenizer_kwargs,
)

openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")

# to store tokenizer config jsons with special tokens
tokenizer.save_pretrained(path)

opt_model = OVModelForSpeechSeq2Seq.from_pretrained(
model_id,
export=True,
trust_remote_code=True,
compile=False,
device="CPU",
load_in_8bit=False,
)
opt_model.generation_config.save_pretrained(path)
opt_model.config.save_pretrained(path)
opt_model.save_pretrained(path)
processor.save_pretrained(path)

def compare_results_with_assert(expected, actual_out):
if expected.texts[0] != actual_out.texts[0]:
print(f'expected: {expected.texts[0]}\n')
Expand All @@ -18,13 +55,15 @@ def compare_results_with_assert(expected, actual_out):


@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1))
@pytest.mark.parametrize("test_sample", get_samples_from_dataset(language="en", length=1))
@pytest.mark.precommit
def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample)
# expected = None

npu_pipe = ov_genai.WhisperPipeline(model_path, "NPU", **config)
actual_out = npu_pipe.generate(test_sample)
Expand All @@ -42,6 +81,7 @@ def test_static_whisper_generation_compare_with_cpu(model_descr, test_sample):
@pytest.mark.precommit
def test_static_whisper_autodetect(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample)
Expand All @@ -59,6 +99,7 @@ def test_static_whisper_autodetect(model_descr, test_sample):
@pytest.mark.precommit
def test_static_whisper_language_de(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|de|>")
Expand All @@ -76,6 +117,7 @@ def test_static_whisper_language_de(model_descr, test_sample):
@pytest.mark.precommit
def test_static_whisper_language_fr(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>")
Expand All @@ -93,6 +135,7 @@ def test_static_whisper_language_fr(model_descr, test_sample):
@pytest.mark.precommit
def test_static_whisper_language_ru(model_descr, test_sample):
model_id, model_path = model_descr
load_and_save_whisper_model(model_descr)

cpu_pipe = ov_genai.WhisperPipeline(model_path, "CPU")
expected = cpu_pipe.generate(test_sample, max_new_tokens=30, language="<|ru|>")
Expand Down

0 comments on commit 92e08dd

Please sign in to comment.