From b353929fabdd4620f2c44b664ffbc0a474a88923 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Thu, 9 Jan 2025 13:20:07 +0800 Subject: [PATCH 1/6] Update openvino tokenizers (#1506) To test Llama3 fix: https://github.com/openvinotoolkit/openvino_tokenizers/pull/357 --- thirdparty/openvino_tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index bcfd3eda25..d5f0abf827 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit bcfd3eda25ae3ec423502a4074e35c774506c732 +Subproject commit d5f0abf8271f3cd8fc98d747b3e569fbeacca532 From ca0babefd952ac78bcb0008ced94beb380a73496 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 9 Jan 2025 09:21:27 +0400 Subject: [PATCH 2/6] Replaced chatglm2-6b with chatglm3-6b (#1505) CVS-159975 --- SUPPORTED_MODELS.md | 1 - tests/python_tests/models/real_models | 1 - tests/python_tests/ov_genai_test_utils.py | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/SUPPORTED_MODELS.md b/SUPPORTED_MODELS.md index 6b45f47890..79333fa45c 100644 --- a/SUPPORTED_MODELS.md +++ b/SUPPORTED_MODELS.md @@ -14,7 +14,6 @@ ChatGLM diff --git a/tests/python_tests/models/real_models b/tests/python_tests/models/real_models index 420f8f53b6..5fd8fe0500 100644 --- a/tests/python_tests/models/real_models +++ b/tests/python_tests/models/real_models @@ -27,7 +27,6 @@ Salesforce/codegen-350M-multi Salesforce/codegen-350M-nl Salesforce/codegen2-1b # Salesforce/xgen-7b-8k-base: Transformers issue - Object of type method is not JSON serializable (https://huggingface.co/Salesforce/xgen-7b-8k-base/discussions/32) -THUDM/chatglm2-6b THUDM/chatglm3-6b TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ TinyLlama/TinyLlama-1.1B-Chat-v0.6 diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py index 66fb58f46d..ff55c3c378 100644 --- a/tests/python_tests/ov_genai_test_utils.py +++ b/tests/python_tests/ov_genai_test_utils.py @@ -26,7 +26,7 @@ def get_models_list(): "facebook/opt-125m", "microsoft/phi-1_5", "microsoft/phi-2", - "THUDM/chatglm2-6b", + "THUDM/chatglm3-6b", "Qwen/Qwen2-0.5B-Instruct", "Qwen/Qwen-7B-Chat", "Qwen/Qwen1.5-7B-Chat", From 2c6d67e039a22e32bc43b53533c3f5b27929eea6 Mon Sep 17 00:00:00 2001 From: Alexander Suvorov Date: Thu, 9 Jan 2025 06:21:51 +0100 Subject: [PATCH 3/6] Whisper pipeline: refactor tests, disable `return_timestamps` check (#1496) Ticket: 160055 --------- Co-authored-by: Ilya Lavrenov --- .github/workflows/windows.yml | 6 + samples/export-requirements.txt | 2 +- tests/python_tests/requirements.txt | 2 +- tests/python_tests/test_whisper_pipeline.py | 434 ++++++++------------ 4 files changed, 169 insertions(+), 275 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 95a713d7a1..8f43af44ae 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -310,6 +310,12 @@ jobs: . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels + + # will install transformers 4.46.3 version + # transformers 4.46.3 will enable return_timestamps tests + # this check enabled for windows only. Ticket: 160205. + python -m pip install git+https://github.com/huggingface/optimum-intel.git@753f84db6e0966580eb9eaa74a808213be730631 + python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke" genai_python_lib_vlm: diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt index 2f71891b7b..af38558656 100644 --- a/samples/export-requirements.txt +++ b/samples/export-requirements.txt @@ -2,7 +2,7 @@ --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers~=2025.0.0.0.dev -optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@753f84db6e0966580eb9eaa74a808213be730631 +optimum-intel @ git+https://github.com/huggingface/optimum-intel.git numpy<2.0.0; sys_platform == 'darwin' einops==0.8.0 # For Qwen transformers_stream_generator==0.0.5 # For Qwen diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt index e23eaacc21..c851c71ee5 100644 --- a/tests/python_tests/requirements.txt +++ b/tests/python_tests/requirements.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cpu diffusers==0.32.1 -optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@753f84db6e0966580eb9eaa74a808213be730631 +optimum-intel @ git+https://github.com/huggingface/optimum-intel.git numpy<2.0.0; platform_system == "Darwin" and platform_machine == "x86_64" onnx==1.17.0 pytest diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py index aa78666e32..c046d1ae2c 100644 --- a/tests/python_tests/test_whisper_pipeline.py +++ b/tests/python_tests/test_whisper_pipeline.py @@ -11,11 +11,13 @@ from optimum.intel.openvino import OVModelForSpeechSeq2Seq import gc import json -import time import typing import numpy as np import os import pathlib +import importlib.metadata as metadata +from packaging.version import parse + @pytest.fixture(scope="class", autouse=True) def run_gc_after_test(): @@ -27,36 +29,29 @@ def run_gc_after_test(): gc.collect() -def get_whisper_models_list(tiny_only=False, multilingual=False, en_only=False): - precommit_models = [ +def get_whisper_models_list(tiny_only=False): + model_ids = [ "openai/whisper-tiny", - "openai/whisper-tiny.en", "distil-whisper/distil-small.en", ] - if multilingual: - precommit_models = ["openai/whisper-tiny"] - if en_only: - precommit_models = ["openai/whisper-tiny.en", "distil-whisper/distil-small.en"] - if tiny_only: - precommit_models = ["openai/whisper-tiny"] - - nightly_models = [] - if pytest.run_marker == "precommit": - model_ids = precommit_models - else: - model_ids = nightly_models + if tiny_only: + model_ids = ["openai/whisper-tiny"] if pytest.selected_model_ids: - model_ids = [model_id for model_id in model_ids if model_id in pytest.selected_model_ids.split(' ')] + model_ids = [ + model_id + for model_id in model_ids + if model_id in pytest.selected_model_ids.split(" ") + ] - prefix = pathlib.Path(os.getenv('GENAI_MODELS_PATH_PREFIX', '')) - return [(model_id, prefix / model_id.split('/')[1]) for model_id in model_ids] + prefix = pathlib.Path(os.getenv("GENAI_MODELS_PATH_PREFIX", "")) + return [(model_id, prefix / model_id.split("/")[1]) for model_id in model_ids] # used whisper models are relatively small # cache them in memory to speedup tests -@functools.lru_cache(3) +@functools.lru_cache() def read_whisper_model(params, **tokenizer_kwargs): model_id, path = params @@ -90,6 +85,7 @@ def read_whisper_model(params, **tokenizer_kwargs): model_id, export=True, trust_remote_code=True, + stateful=False, compile=False, device="CPU", load_in_8bit=False, @@ -114,30 +110,39 @@ def read_whisper_model(params, **tokenizer_kwargs): ) -def compare_genai_and_opt_pipelines(opt_pipe, genai_pipe, dataset_id): - ds = datasets.load_dataset(dataset_id, "clean", split="validation") - opt_infer_time = 0 - genai_infer_time = 0 - - for ds_row in ds: - audio_sample = ds_row["audio"] +def run_huggingface( + pipeline, + sample, + config: ov_genai.WhisperGenerationConfig | None = None, +): + if not config: + config = ov_genai.WhisperGenerationConfig() + + return pipeline( + sample, + max_new_tokens=min(config.max_new_tokens, 444), + return_timestamps=config.return_timestamps, + generate_kwargs={"language": config.language, "task": config.task}, + ) - streamer_result = [] - start = time.time() - genai_result = genai_pipe.generate( - audio_sample["array"].tolist(), streamer=lambda x: streamer_result.append(x) - ) - genai_infer_time += time.time() - start +def run_genai( + pipeline: ov_genai.WhisperPipeline, + sample, + config: ov_genai.WhisperGenerationConfig | None = None, + streamer: typing.Callable[[str], bool] | None = None, +): + if not config: + config = ov_genai.WhisperGenerationConfig() - start = time.time() - result = opt_pipe(audio_sample) - opt_infer_time += time.time() - start + genai_config = pipeline.get_generation_config() - assert genai_result.texts[0] == result["text"] - assert "".join(streamer_result) == result["text"] + genai_config.max_new_tokens = config.max_new_tokens + genai_config.return_timestamps = config.return_timestamps + genai_config.task = config.task + genai_config.language = f"<|{config.language}|>" if config.language else None - print(f"Inference time\nOpt: {opt_infer_time}\nGenAI: {genai_infer_time}") + return pipeline.generate(sample, genai_config, streamer=streamer) def get_samples_from_dataset( @@ -166,13 +171,50 @@ def get_samples_from_dataset( return [x["audio"]["array"] for x in ds] -@pytest.mark.parametrize("model_descr", get_whisper_models_list()) -@pytest.mark.parametrize("dataset_id", ["hf-internal-testing/librispeech_asr_dummy"]) -@pytest.mark.precommit -def test_whisper_on_hf_dataset(model_descr, dataset_id): - model_id, path, opt_pipe, genai_pipe = read_whisper_model(model_descr) +def run_pipeline_with_ref( + model_id: str, + tmp_path: str, + sample: np.ndarray | list[np.ndarray], + generation_config: ov_genai.WhisperGenerationConfig | None = None, + streamer: typing.Callable[[str], bool] | None = None, +): + _, _, hf_pipe, genai_pipe = read_whisper_model((model_id, tmp_path)) + + if type(sample) is np.ndarray and len(sample.shape) == 1: + sample = np.expand_dims(sample, 0) + + for _sample in sample: + genai_result = run_genai(genai_pipe, _sample, generation_config, streamer) + hf_result = run_huggingface(hf_pipe, _sample, generation_config) + + compare_results(hf_result, genai_result) + - compare_genai_and_opt_pipelines(opt_pipe, genai_pipe, dataset_id) +def compare_results(hf_result, genai_result): + assert genai_result.texts[0] == hf_result["text"] + + # transformers 4.47 updated return_timestamps implementation + # remove once genai implementation aligned with transformers. Ticket 160205. + transformers_version_greater_4_47 = parse( + metadata.version("transformers") + ) >= parse("4.47.0") + + if transformers_version_greater_4_47: + return + + if "chunks" not in hf_result and genai_result.chunks is None: + return + + assert len(genai_result.chunks) == len(hf_result["chunks"]) + + for opt_chunk, genai_chunk in zip(hf_result["chunks"], genai_result.chunks): + assert opt_chunk["text"] == genai_chunk.text + assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2) + if opt_chunk["timestamp"][1]: + assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2) + else: + assert opt_chunk["timestamp"][1] == None + assert round(genai_chunk.end_ts, 2) == -1.0 @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @@ -182,16 +224,11 @@ def test_whisper_on_hf_dataset(model_descr, dataset_id): ) @pytest.mark.precommit def test_smoke(model_descr, test_sample): - model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - - expected = opt_pipe(test_sample) - - genai_result = pipe.generate(test_sample) - - assert genai_result.texts[0] == expected["text"] - - assert "chunks" not in expected - assert genai_result.chunks == None + run_pipeline_with_ref( + model_id=model_descr[0], + tmp_path=model_descr[1], + sample=test_sample, + ) @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @@ -259,79 +296,55 @@ def test_whisper_constructors(model_descr, test_sample): def test_max_new_tokens(model_descr, test_sample): model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - expected = opt_pipe(test_sample, max_new_tokens=10)["text"] + expected = opt_pipe(test_sample, max_new_tokens=10) genai_result = pipe.generate(test_sample, max_new_tokens=10) - assert genai_result.texts[0] == expected - - genai_result = pipe.generate(test_sample) - - assert genai_result.texts[0] != expected + compare_results(expected, genai_result) config = pipe.get_generation_config() config.max_new_tokens = 10 genai_result = pipe.generate(test_sample, config) - assert genai_result.texts[0] == expected + compare_results(expected, genai_result) @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize( - "test_sample", get_samples_from_dataset(language="fr", length=3) + "test_samples", + [ + (get_samples_from_dataset(language="fr", length=1), "fr"), + (get_samples_from_dataset(language="de", length=1), "de"), + ], ) @pytest.mark.precommit -def test_language_mode_fr(model_descr, test_sample): - model_id, path = model_descr +def test_language_mode(model_descr, test_samples): model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) + samples, language = test_samples expected = opt_pipe( - test_sample, max_new_tokens=30, generate_kwargs={"language": "fr"} + samples[0], max_new_tokens=30, generate_kwargs={"language": language} ) - genai_result = pipe.generate(test_sample, max_new_tokens=30, language="<|fr|>") - - assert genai_result.texts[0] == expected["text"] - - config = pipe.get_generation_config() - config.max_new_tokens = 30 - config.language = "<|fr|>" - genai_result = pipe.generate(test_sample, config) - - assert genai_result.texts[0] == expected["text"] - - -@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) -@pytest.mark.parametrize( - "test_sample", get_samples_from_dataset(language="de", length=3) -) -@pytest.mark.precommit -def test_language_mode_de(model_descr, test_sample): - model_id, path = model_descr - model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - - expected = opt_pipe( - test_sample, max_new_tokens=30, generate_kwargs={"language": "de"} + genai_result = pipe.generate( + samples[0], max_new_tokens=30, language=f"<|{language}|>" ) - genai_result = pipe.generate(test_sample, max_new_tokens=30, language="<|de|>") - - assert genai_result.texts[0] == expected["text"] + compare_results(expected, genai_result) config = pipe.get_generation_config() config.max_new_tokens = 30 - config.language = "<|de|>" - genai_result = pipe.generate(test_sample, config) + config.language = f"<|{language}|>" + genai_result = pipe.generate(samples[0], config) - assert genai_result.texts[0] == expected["text"] + compare_results(expected, genai_result) @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize( - "test_sample", get_samples_from_dataset(language="fr", length=3) + "test_sample", get_samples_from_dataset(language="fr", length=1) ) @pytest.mark.precommit def test_task_mode(model_descr, test_sample): - model_id, path = model_descr model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) expected = opt_pipe( @@ -344,7 +357,7 @@ def test_task_mode(model_descr, test_sample): test_sample, max_new_tokens=30, language="<|fr|>", task="translate" ) - assert genai_result.texts[0] == expected["text"] + compare_results(expected, genai_result) config = pipe.get_generation_config() config.max_new_tokens = 30 @@ -352,27 +365,7 @@ def test_task_mode(model_descr, test_sample): config.task = "translate" genai_result = pipe.generate(test_sample, config) - assert genai_result.texts[0] == expected["text"] - - expected = opt_pipe( - test_sample, - max_new_tokens=30, - generate_kwargs={"language": "ru", "task": "translate"}, - ) - - genai_result = pipe.generate( - test_sample, max_new_tokens=30, language="<|ru|>", task="translate" - ) - - assert genai_result.texts[0] == expected["text"] - - config = pipe.get_generation_config() - config.max_new_tokens = 30 - config.language = "<|ru|>" - config.task = "translate" - genai_result = pipe.generate(test_sample, config) - - assert genai_result.texts[0] == expected["text"] + compare_results(expected, genai_result) # seems to be equivalent to translate task expected = opt_pipe( @@ -385,7 +378,7 @@ def test_task_mode(model_descr, test_sample): test_sample, max_new_tokens=30, language="<|en|>", task="transcribe" ) - assert genai_result.texts[0] == expected["text"] + compare_results(expected, genai_result) config = pipe.get_generation_config() config.max_new_tokens = 30 @@ -393,21 +386,20 @@ def test_task_mode(model_descr, test_sample): config.task = "transcribe" genai_result = pipe.generate(test_sample, config) - assert genai_result.texts[0] == expected["text"] + compare_results(expected, genai_result) @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) @pytest.mark.parametrize( "test_sample", [ - *get_samples_from_dataset(language="fr", length=2), - *get_samples_from_dataset(language="de", length=2), - *get_samples_from_dataset(language="es", length=2), + *get_samples_from_dataset(language="fr", length=1), + *get_samples_from_dataset(language="de", length=1), + *get_samples_from_dataset(language="es", length=1), ], ) @pytest.mark.precommit def test_language_autodetect(model_descr, test_sample): - model_id, path = model_descr model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) input_features = opt_pipe.feature_extractor(test_sample) @@ -415,188 +407,84 @@ def test_language_autodetect(model_descr, test_sample): # ensure detected language us not english assert language_id != pipe.get_generation_config().lang_to_id["<|en|>"] - expected = opt_pipe( - test_sample, - max_new_tokens=30, + run_pipeline_with_ref( + model_id=model_descr[0], + tmp_path=model_descr[1], + sample=test_sample, + generation_config=ov_genai.WhisperGenerationConfig(max_new_tokens=30), ) - genai_result = pipe.generate(test_sample, max_new_tokens=30) - - assert genai_result.texts[0] == expected["text"] - @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) -@pytest.mark.parametrize( - "test_sample", - [ - *get_samples_from_dataset(language="en", length=10, long_form=True), - ], -) +@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1)) @pytest.mark.precommit def test_return_timestamps_short_form(model_descr, test_sample): - model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - # long form audio not supported yet - test_sample = test_sample[: 16000 * 30] - - expected = opt_pipe( - test_sample, - return_timestamps=True, - ) - - genai_result = pipe.generate( - test_sample.tolist(), - return_timestamps=True, + run_pipeline_with_ref( + model_id=model_descr[0], + tmp_path=model_descr[1], + sample=test_sample, + generation_config=ov_genai.WhisperGenerationConfig(return_timestamps=True), ) - assert genai_result.texts[0] == expected["text"] - - assert len(genai_result.chunks) == len(expected["chunks"]) - - for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks): - assert opt_chunk["text"] == genai_chunk.text - assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2) - assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2) - @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) -@pytest.mark.parametrize( - "test_sample", - [ - *get_samples_from_dataset(language="en", length=10, long_form=True), - ], -) +@pytest.mark.parametrize("test_sample", get_samples_from_dataset(length=1)) @pytest.mark.precommit def test_return_timestamps_max_new_tokens_short_form(model_descr, test_sample): - model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - # long form audio not supported yet - test_sample = test_sample[: 16000 * 30] - - expected = opt_pipe( - test_sample, - return_timestamps=True, - max_new_tokens=15, - generate_kwargs={"language": "en"}, - ) - - genai_result = pipe.generate( - test_sample.tolist(), - max_new_tokens=15, - return_timestamps=True, - language="<|en|>", + run_pipeline_with_ref( + model_id=model_descr[0], + tmp_path=model_descr[1], + sample=test_sample, + generation_config=ov_genai.WhisperGenerationConfig( + return_timestamps=True, language="en", max_new_tokens=30 + ), ) - assert genai_result.texts[0] == expected["text"] - - assert len(genai_result.chunks) == len(expected["chunks"]) - for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks): - assert opt_chunk["text"] == genai_chunk.text - assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2) - if opt_chunk["timestamp"][1]: - assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2) - else: - assert opt_chunk["timestamp"][1] == None - assert round(genai_chunk.end_ts, 2) == -1.0 - - -@pytest.mark.parametrize("model_descr", get_whisper_models_list(multilingual=True)) +@pytest.mark.parametrize("model_descr", get_whisper_models_list()) @pytest.mark.parametrize( - "test_sample", - [ - *get_samples_from_dataset(language="en", length=10, long_form=True), - *get_samples_from_dataset(language="fr", length=10, long_form=True), - ], + "test_sample", get_samples_from_dataset(length=10, long_form=True) ) @pytest.mark.precommit -def test_longform_audio_return_timestamps_multilingual(model_descr, test_sample): - model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - - expected = opt_pipe( - test_sample, - return_timestamps=True, - ) +def test_longform_audio(model_descr, test_sample): + _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr) streamer_result = [] - genai_result = pipe.generate( + genai_result = run_genai( + genai_pipe, test_sample, - return_timestamps=True, + config=ov_genai.WhisperGenerationConfig(return_timestamps=True), streamer=lambda x: streamer_result.append(x), ) - assert genai_result.texts[0] == expected["text"] - assert "".join(streamer_result) == expected["text"] - - assert len(genai_result.chunks) == len(expected["chunks"]) - - for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks): - assert opt_chunk["text"] == genai_chunk.text - assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2) - if opt_chunk["timestamp"][1]: - assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2) - else: - assert opt_chunk["timestamp"][1] == None - assert round(genai_chunk.end_ts, 2) == -1.0 - - -@pytest.mark.parametrize("model_descr", get_whisper_models_list(en_only=True)) -@pytest.mark.parametrize( - "test_sample", - [ - *get_samples_from_dataset(language="en", length=10, long_form=True), - ], -) -@pytest.mark.precommit -def test_longform_audio_return_timestamps_en(model_descr, test_sample): - model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - - expected = opt_pipe( - test_sample, - return_timestamps=True, - ) - - streamer_result = [] - - genai_result = pipe.generate( + hf_result = run_huggingface( + hf_pipe, test_sample, - return_timestamps=True, - streamer=lambda x: streamer_result.append(x), + config=ov_genai.WhisperGenerationConfig(return_timestamps=True), ) - assert genai_result.texts[0] == expected["text"] - assert "".join(streamer_result) == expected["text"] - - assert len(genai_result.chunks) == len(expected["chunks"]) + compare_results(hf_result, genai_result) - for opt_chunk, genai_chunk in zip(expected["chunks"], genai_result.chunks): - assert opt_chunk["text"] == genai_chunk.text - assert opt_chunk["timestamp"][0] == round(genai_chunk.start_ts, 2) - if opt_chunk["timestamp"][1]: - assert opt_chunk["timestamp"][1] == round(genai_chunk.end_ts, 2) - else: - assert opt_chunk["timestamp"][1] == None - assert round(genai_chunk.end_ts, 2) == -1.0 + assert "".join(streamer_result) == hf_result["text"] -@pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) -@pytest.mark.parametrize( - "test_sample", - [ - *get_samples_from_dataset(language="en", length=3, long_form=True), - *get_samples_from_dataset(language="sp", length=3, long_form=True), - ], -) +@pytest.mark.parametrize("model_descr", get_whisper_models_list()) @pytest.mark.precommit -def test_longform_audio(model_descr, test_sample): - model_id, path, opt_pipe, pipe = read_whisper_model(model_descr) - - expected = opt_pipe(test_sample, return_timestamps=True) - - genai_result = pipe.generate(test_sample) +def test_shortform(model_descr): + samples = [] + ds = datasets.load_dataset( + "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation" + ) - assert genai_result.texts[0] == expected["text"] + for ds_row in ds: + samples.append(ds_row["audio"]["array"]) - assert genai_result.chunks == None + run_pipeline_with_ref( + model_id=model_descr[0], + tmp_path=model_descr[1], + sample=samples, + ) @pytest.mark.parametrize("model_descr", get_whisper_models_list(tiny_only=True)) From 5a82b84a643578c3b534e76088aa0f3125cad31e Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 9 Jan 2025 12:32:22 +0400 Subject: [PATCH 4/6] DOCS: unify package name usage across snippets in README.md (#1509) --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c5cf799973..cea1e358bc 100644 --- a/README.md +++ b/README.md @@ -73,9 +73,9 @@ optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weigh ### Run generation using LLMPipeline API in Python ```python -import openvino_genai as ov_genai +import openvino_genai #Will run model on CPU, GPU or NPU are possible options -pipe = ov_genai.LLMPipeline("./TinyLlama-1.1B-Chat-v1.0/", "CPU") +pipe = openvino_genai.LLMPipeline("./TinyLlama-1.1B-Chat-v1.0/", "CPU") print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) ``` @@ -128,11 +128,11 @@ curl -O "https://storage.openvinotoolkit.org/test_data/images/dog.jpg" ```python import numpy as np import openvino as ov -import openvino_genai as ov_genai +import openvino_genai from PIL import Image # Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU -pipe = ov_genai.VLMPipeline("./InternVL2-1B", "CPU") +pipe = openvino_genai.VLMPipeline("./InternVL2-1B", "CPU") pipe.start_chat() image = Image.open("dog.jpg") From 2d5911b13b2bfab8a0433eaa38394bb9d064680f Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 9 Jan 2025 12:33:27 +0400 Subject: [PATCH 5/6] GHA: use latest OpenVINO master (#1511) Fix to PA has been merged https://github.com/openvinotoolkit/openvino/pull/28332 --- .github/workflows/genai-tools.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 2 +- .github/workflows/stable_diffusion_1_5_cpp.yml | 4 ++-- .github/workflows/windows.yml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/genai-tools.yml b/.github/workflows/genai-tools.yml index bd6cb46362..333bee3e11 100644 --- a/.github/workflows/genai-tools.yml +++ b/.github/workflows/genai-tools.yml @@ -44,7 +44,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: 345163f87953fb0dd8dd590257eb7fc84378da8e + revision: latest_available_commit llm_bench: name: 'LLM bench tests' diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 0d7a5b7bae..0a991e2a54 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -52,7 +52,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: 345163f87953fb0dd8dd590257eb7fc84378da8e + revision: latest_available_commit - name: Clone docker tag from OpenVINO repo uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 062b83fc27..7cb0ff98d3 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -17,7 +17,7 @@ concurrency: env: PYTHON_VERSION: '3.10' - OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e + OV_BRANCH: 'master' OV_TARBALL: '' jobs: diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index 3b01697f26..e0bf5371b3 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -45,7 +45,7 @@ jobs: with: platform: ubuntu22 commit_packages_to_provide: wheels - revision: 345163f87953fb0dd8dd590257eb7fc84378da8e + revision: latest_available_commit openvino_download_windows: name: Download OpenVINO for Windows @@ -71,7 +71,7 @@ jobs: with: platform: windows commit_packages_to_provide: wheels - revision: 345163f87953fb0dd8dd590257eb7fc84378da8e + revision: latest_available_commit stable_diffusion_1_5_cpp-linux: runs-on: ubuntu-22.04-8-cores diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 8f43af44ae..e65972110b 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -17,7 +17,7 @@ concurrency: env: PYTHON_VERSION: '3.11' - OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e + OV_BRANCH: 'master' OV_TARBALL: '' jobs: From 7ef754c88e13f2970272628d59c9202e773ce5f1 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 9 Jan 2025 12:41:05 +0400 Subject: [PATCH 6/6] [GHA] Increase timeout for cpp-multinomial-greedy_causal_lm-ubuntu (#1510) See https://github.com/openvinotoolkit/openvino.genai/actions/runs/12676190622/job/35328859923?pr=1507 It fails from time to time by timeout. Let's increase it a bit to check whether it will make GHA CI more stable --------- Co-authored-by: Vladimir Zlobin --- .github/workflows/causal_lm_cpp.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index fb0c9c4b0b..b6abbefac0 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -53,17 +53,17 @@ jobs: wget https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors?download=true -O adapter_model.safetensors - run: > . ./ov/setupvars.sh - && timeout 25s ./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./open_llama_3b_v2/ a + && timeout 35s ./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./open_llama_3b_v2/ a env: PYTHONPATH: "./build" - run: > . ./ov/setupvars.sh - && timeout 25s ./samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./open_llama_3b_v2/ b + && timeout 35s ./samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./open_llama_3b_v2/ b env: PYTHONPATH: "./build" - run: > . ./ov/setupvars.sh - && timeout 25s ./build/samples/cpp/text_generation/greedy_causal_lm ./open_llama_3b_v2/ "return 0" + && timeout 35s ./build/samples/cpp/text_generation/greedy_causal_lm ./open_llama_3b_v2/ "return 0" | diff <(timeout 25s samples/python/text_generation/greedy_causal_lm.py ./open_llama_3b_v2/ "return 0") - env: PYTHONPATH: "./build"