Merge branch 'master' into at/static-llm-pipeline-enable-chat-test

openvinotoolkit · Dec 27, 2024 · 5ed704a · 5ed704a
2 parents cc68e28 + 842c99e
commit 5ed704a
Show file tree

Hide file tree

Showing 49 changed files with 2,398 additions and 1,953 deletions.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -13,17 +13,20 @@
 - 'src/python/py_tokenizer.cpp'
 - 'thirdparty/openvino_tokenizers'
 - 'tests/python_tests/tokenizer_configs.py'
+- 'tests/python_tests/test_tokenizer.py'
 
 'category: LLM':
 - 'src/cpp/include/openvino/genai/llm_pipeline.hpp'
 - 'src/cpp/src/llm_pipeline.cpp'
+- 'src/cpp/src/lm_encoding.hpp'
 - 'src/cpp/src/lm_encoding.cpp'
 - 'src/cpp/src/llm_pipeline_base.hpp'
 - 'src/cpp/src/llm_pipeline_static.hpp'
 - 'src/cpp/src/llm_pipeline_static.cpp'
+- 'src/cpp/src/text_callback_streamer.cpp'
+- 'src/cpp/src/text_callback_streamer.hpp'
 - 'src/python/py_llm_pipeline.cpp'
-- 'tests/python_tests/test_generate_api.py'
-- 'tests/python_tests/test_chat_generate_api.py'
+- 'tests/python_tests/test_llm_pipeline.py'
 
 'category: sampling':
 - 'src/cpp/include/openvino/genai/generation_config.hpp'
@@ -35,6 +38,7 @@
 - 'tests/cpp/logit_filtering.cpp'
 - 'tests/cpp/generate_config.cpp'
 - 'tests/cpp/sampler.cpp'
+- 'tests/python_tests/test_sampling.py'
 
 'category: LoRA':
 - 'src/cpp/include/openvino/genai/lora_adapter.hpp'
@@ -54,9 +58,12 @@
 - 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
 - 'src/cpp/src/whisper/**/*'
 - 'src/cpp/src/whisper_generation_config.cpp'
+- 'src/cpp/src/whisper_pipeline_base.hpp'
 - 'src/cpp/src/whisper_pipeline.cpp'
+- 'src/cpp/src/whisper_pipeline_static.cpp'
+- 'src/cpp/src/whisper_pipeline_static.hpp'
 - 'src/python/py_whisper_pipeline.cpp'
-- 'tests/python_tests/test_whisper_generate_api.py'
+- 'tests/python_tests/test_whisper_pipeline.py'
 
 'category: Python API':
 - 'src/python/**/*'
@@ -65,10 +72,14 @@
 - 'src/include/openvino/genai/visual_language/**/*'
 - 'src/cpp/src/visual_language/**/*'
 - 'src/python/py_vlm_pipeline.cpp'
-- 'tests/python_tests/test_vlm_api.py'
+- 'tests/python_tests/test_vlm_pipeline.py'
 
 'category: speculative decoding':
 - 'src/cpp/src/speculative_decoding/**/*'
+- 'tests/cpp/speculative_decoding.cpp'
+
+'category: prompt lookup':
+- 'src/cpp/src/prompt_lookup/**/*'
 
 'category: continuous batching':
 - 'src/cpp/include/openvino/genai/cache_eviction.hpp'
@@ -91,19 +102,19 @@
 - 'src/cpp/src/generation_handle.cpp'
 - 'src/cpp/src/generation_stream.hpp'
 - 'src/cpp/src/model_runner.hpp'
-- 'src/cpp/src/paged_attention_transformations.cpp'
-- 'src/cpp/src/paged_attention_transformations.hpp'
+- 'src/cpp/src/utils/paged_attention_transformations.cpp'
+- 'src/cpp/src/utils/paged_attention_transformations.hpp'
 - 'src/cpp/src/scheduler.hpp'
 - 'src/cpp/src/sequence_group.cpp'
 - 'src/cpp/src/sequence_group.hpp'
 - 'src/cpp/src/timer.hpp'
 - 'src/python/py_continuous_batching_pipeline.cpp'
-- 'tests/python_tests/test_cache_optimizations.py'
-- 'tests/python_tests/test_preemption.py'
-- 'tests/python_tests/test_sampling.py'
+- 'tests/python_tests/test_continuous_batching.py'
+- 'tests/python_tests/test_kv_cache_eviction.py'
 - 'tests/cpp/block_allocator.cpp'
 - 'tests/cpp/block_hash_store.cpp'
 - 'tests/cpp/block_manager.cpp'
+- 'tests/cpp/cache_eviction.cpp'
 - 'tests/cpp/cache_manager.cpp'
 - 'tests/cpp/device_config.cpp'
 - 'tests/cpp/scheduler.cpp'

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -16,10 +16,10 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241205_x86_64.tgz
-  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
-  m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241205_x86_64.tgz
-  w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/w_openvino_toolkit_windows_2025.0.0.dev20241205_x86_64.zip
+  l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241224_x86_64.tgz
+  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
+  m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241224_x86_64.tgz
+  w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
 jobs:
   cpp-multinomial-greedy_causal_lm-ubuntu:
     runs-on: ubuntu-20.04-8-cores

diff --git a/.github/workflows/job_vlm_sample_llava.yml b/.github/workflows/job_vlm_sample_llava.yml
@@ -11,7 +11,7 @@ on:
         type: string
 
 env:
-  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
+  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
 
 jobs:
   visual_language_chat_sample-ubuntu-llava:

diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -18,8 +18,8 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.9'
-  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
-  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/w_openvino_toolkit_windows_2025.0.0.dev20241205_x86_64.zip
+  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
+  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
   OV_INSTALL_DIR: ${{ github.workspace }}/ov
 
 jobs:

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -268,9 +268,9 @@ jobs:
       matrix:
         test:
           - name: 'Whisper'
-            cmd: 'tests/python_tests/test_whisper_generate_api.py'
+            cmd: 'tests/python_tests/test_whisper_pipeline.py'
           - name: 'LLM & VLM'
-            cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py -k "not Qwen2-0.5B-Instruct"' # Skip failed tests Qwen2-0.5B-Instruct
+            cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
     defaults:
       run:
         shell: bash

diff --git a/.github/workflows/llm_bench-python.yml b/.github/workflows/llm_bench-python.yml
@@ -114,14 +114,14 @@ jobs:
       - name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux Optimum Intel
         run: |
           huggingface-cli download OpenVINO/LCM_Dreamshaper_v7-int8-ov --local-dir ov_models/lcm_dreamshaper_v7
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum -ic 4
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum --num_steps 4
       - name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI
         run: |
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 -ic 4
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --num_steps 4
       - name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI and LoRA
         run: |
           wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
-          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7 -ic 4
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7 --num_steps 4
           rm -rf ./ov_models/lcm_dreamshaper_v7/
       - name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Deconding mode on Linux
         run: |
@@ -151,7 +151,7 @@ jobs:
           rm -rf ./ov_models/internvl2-1B
       - name: WWB Tests
         run: |
-          pip install git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a
+          pip install git+https://github.com/huggingface/optimum-intel.git
           GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }}
           python -m pytest -v ${{ env.WWB_PATH }}/tests
   stateful:
@@ -190,7 +190,7 @@ jobs:
       - name: WWB Tests
         run: |
           pip install pytest
-          pip install git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a
+          pip install git+https://github.com/huggingface/optimum-intel.git
           GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }}
           python -m pytest -v ${{ env.WWB_PATH }}/tests
 

diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
@@ -17,7 +17,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.9'
-  OV_BRANCH: 0080d90974ca84f9a6d359da3388a2a18a93b753
+  OV_BRANCH: master
   OV_TARBALL: ''
 
 jobs:
@@ -178,7 +178,7 @@ jobs:
     if: |
       always() &&
       (needs.openvino_download.outputs.status == 'success' || needs.openvino_build.result == 'success')
-    timeout-minutes: 90
+    timeout-minutes: 120
     defaults:
       run:
         shell: bash
@@ -225,7 +225,7 @@ jobs:
         run: |
           source ${OV_INSTALL_DIR}/setupvars.sh
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_chat_generate_api.py::test_set_chat_template
+          python -m pytest -v ./tests/python_tests/test_tokenizer.py::test_set_chat_template
         env:
           PYTHONPATH: "./build/:$PYTHONPATH"
 
@@ -235,7 +235,7 @@ jobs:
           python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
           python -c "from openvino_genai import LLMPipeline"
           python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
+          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
 
   genai_python_lib_whisper:
     name: OpenVINO genai extension whisper tests (cmake + wheel)
@@ -290,7 +290,7 @@ jobs:
         run: |
           source ${OV_INSTALL_DIR}/setupvars.sh
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
         env:
           PYTHONPATH: "./build/:$PYTHONPATH"
 
@@ -300,7 +300,7 @@ jobs:
           python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
           python -c "from openvino_genai import LLMPipeline"
           python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
 
   genai_package:
     name: OpenVINO genai extension (install to OpenVINO package)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -17,7 +17,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.11'
-  OV_BRANCH: 0080d90974ca84f9a6d359da3388a2a18a93b753
+  OV_BRANCH: master
   OV_TARBALL: ''
 
 jobs:
@@ -236,7 +236,7 @@ jobs:
         run: |
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_chat_generate_api.py::test_set_chat_template
+          python -m pytest -v ./tests/python_tests/test_tokenizer.py::test_set_chat_template
         env:
           PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
 
@@ -245,7 +245,7 @@ jobs:
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
           python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
+          python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
 
   genai_python_lib_whisper:
     name: OpenVINO genai extension whisper tests (cmake + wheel)
@@ -301,7 +301,7 @@ jobs:
         run: |
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
         env:
           PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
 
@@ -310,7 +310,7 @@ jobs:
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
           python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
+          python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
 
   genai_python_lib_vlm:
     name: OpenVINO genai VLM tests (cmake + wheel)
@@ -366,7 +366,7 @@ jobs:
         run: |
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
-          python -m pytest -v ./tests/python_tests/test_vlm_api.py
+          python -m pytest -v ./tests/python_tests/test_vlm_pipeline.py
         env:
           PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
 

diff --git a/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.cpp b/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.cpp
@@ -22,14 +22,10 @@ int main(int argc, char* argv[]) try {
 
     std::string device = "CPU";
 
-    ov::genai::SchedulerConfig scheduler_config;
-    scheduler_config.cache_size = 5;
-
     ov::genai::LLMPipeline pipe(
         model_path,
         device,
-        ov::genai::prompt_lookup(true),
-        ov::genai::scheduler_config(scheduler_config));
+        ov::genai::prompt_lookup(true));
 
     auto streamer = [](std::string subword) {
         std::cout << subword << std::flush;

diff --git a/samples/cpp/speculative_decoding_lm/speculative_decoding_lm.cpp b/samples/cpp/speculative_decoding_lm/speculative_decoding_lm.cpp
@@ -26,14 +26,10 @@ int main(int argc, char* argv[]) try {
     // Please, set device for main model in `LLMPipeline` constructor and in in `ov::genai::draft_model` for draft.
     std::string main_device = "CPU", draft_device = "CPU";
 
-    ov::genai::SchedulerConfig scheduler_config;
-    scheduler_config.cache_size = 5;
-
     ov::genai::LLMPipeline pipe(
         main_model_path,
         main_device,
-        ov::genai::draft_model(draft_model_path, draft_device),
-        ov::genai::scheduler_config(scheduler_config));
+        ov::genai::draft_model(draft_model_path, draft_device));
 
     auto streamer = [](std::string subword) {
         std::cout << subword << std::flush;

diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt
@@ -2,11 +2,11 @@
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
 openvino-tokenizers~=2025.0.0.0.dev
-optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a
+optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
 numpy<2.0.0; sys_platform == 'darwin'
 einops==0.8.0  # For Qwen
 transformers_stream_generator==0.0.5  # For Qwen
-diffusers==0.31.0 # For image generation pipelines
+diffusers==0.32.1 # For image generation pipelines
 timm==1.0.12  # For exporting InternVL2
 torchvision  # For visual language models
 transformers>=4.43 # For Whisper
diff --git a/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py b/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py
@@ -18,11 +18,8 @@ def main():
     args = parser.parse_args()
 
     device = 'CPU'
-    scheduler_config = openvino_genai.SchedulerConfig()
-    # cache params
-    scheduler_config.cache_size = 2
 
-    pipe = openvino_genai.LLMPipeline(args.model_dir, device, scheduler_config=scheduler_config, prompt_lookup=True)
+    pipe = openvino_genai.LLMPipeline(args.model_dir, device, prompt_lookup=True)
 
     config = openvino_genai.GenerationConfig()
     config.max_new_tokens = 100

diff --git a/samples/python/speculative_decoding_lm/speculative_decoding_lm.py b/samples/python/speculative_decoding_lm/speculative_decoding_lm.py
@@ -25,13 +25,9 @@ def main():
     main_device = 'CPU'  # GPU can be used as well
     draft_device = 'CPU'
 
-    scheduler_config = openvino_genai.SchedulerConfig()
-    # cache params
-    scheduler_config.cache_size = 2
-
     draft_model = openvino_genai.draft_model(args.draft_model_dir, draft_device)
 
-    pipe = openvino_genai.LLMPipeline(args.model_dir, main_device, scheduler_config=scheduler_config, draft_model=draft_model)
+    pipe = openvino_genai.LLMPipeline(args.model_dir, main_device, draft_model=draft_model)
 
     config = openvino_genai.GenerationConfig()
     config.max_new_tokens = 100