Skip to content

Commit

Permalink
Merge branch 'master' into at/static-llm-pipeline-enable-chat-test
Browse files Browse the repository at this point in the history
  • Loading branch information
TolyaTalamanov authored Dec 27, 2024
2 parents cc68e28 + 842c99e commit 5ed704a
Show file tree
Hide file tree
Showing 49 changed files with 2,398 additions and 1,953 deletions.
29 changes: 20 additions & 9 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@
- 'src/python/py_tokenizer.cpp'
- 'thirdparty/openvino_tokenizers'
- 'tests/python_tests/tokenizer_configs.py'
- 'tests/python_tests/test_tokenizer.py'

'category: LLM':
- 'src/cpp/include/openvino/genai/llm_pipeline.hpp'
- 'src/cpp/src/llm_pipeline.cpp'
- 'src/cpp/src/lm_encoding.hpp'
- 'src/cpp/src/lm_encoding.cpp'
- 'src/cpp/src/llm_pipeline_base.hpp'
- 'src/cpp/src/llm_pipeline_static.hpp'
- 'src/cpp/src/llm_pipeline_static.cpp'
- 'src/cpp/src/text_callback_streamer.cpp'
- 'src/cpp/src/text_callback_streamer.hpp'
- 'src/python/py_llm_pipeline.cpp'
- 'tests/python_tests/test_generate_api.py'
- 'tests/python_tests/test_chat_generate_api.py'
- 'tests/python_tests/test_llm_pipeline.py'

'category: sampling':
- 'src/cpp/include/openvino/genai/generation_config.hpp'
Expand All @@ -35,6 +38,7 @@
- 'tests/cpp/logit_filtering.cpp'
- 'tests/cpp/generate_config.cpp'
- 'tests/cpp/sampler.cpp'
- 'tests/python_tests/test_sampling.py'

'category: LoRA':
- 'src/cpp/include/openvino/genai/lora_adapter.hpp'
Expand All @@ -54,9 +58,12 @@
- 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
- 'src/cpp/src/whisper/**/*'
- 'src/cpp/src/whisper_generation_config.cpp'
- 'src/cpp/src/whisper_pipeline_base.hpp'
- 'src/cpp/src/whisper_pipeline.cpp'
- 'src/cpp/src/whisper_pipeline_static.cpp'
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_generate_api.py'
- 'tests/python_tests/test_whisper_pipeline.py'

'category: Python API':
- 'src/python/**/*'
Expand All @@ -65,10 +72,14 @@
- 'src/include/openvino/genai/visual_language/**/*'
- 'src/cpp/src/visual_language/**/*'
- 'src/python/py_vlm_pipeline.cpp'
- 'tests/python_tests/test_vlm_api.py'
- 'tests/python_tests/test_vlm_pipeline.py'

'category: speculative decoding':
- 'src/cpp/src/speculative_decoding/**/*'
- 'tests/cpp/speculative_decoding.cpp'

'category: prompt lookup':
- 'src/cpp/src/prompt_lookup/**/*'

'category: continuous batching':
- 'src/cpp/include/openvino/genai/cache_eviction.hpp'
Expand All @@ -91,19 +102,19 @@
- 'src/cpp/src/generation_handle.cpp'
- 'src/cpp/src/generation_stream.hpp'
- 'src/cpp/src/model_runner.hpp'
- 'src/cpp/src/paged_attention_transformations.cpp'
- 'src/cpp/src/paged_attention_transformations.hpp'
- 'src/cpp/src/utils/paged_attention_transformations.cpp'
- 'src/cpp/src/utils/paged_attention_transformations.hpp'
- 'src/cpp/src/scheduler.hpp'
- 'src/cpp/src/sequence_group.cpp'
- 'src/cpp/src/sequence_group.hpp'
- 'src/cpp/src/timer.hpp'
- 'src/python/py_continuous_batching_pipeline.cpp'
- 'tests/python_tests/test_cache_optimizations.py'
- 'tests/python_tests/test_preemption.py'
- 'tests/python_tests/test_sampling.py'
- 'tests/python_tests/test_continuous_batching.py'
- 'tests/python_tests/test_kv_cache_eviction.py'
- 'tests/cpp/block_allocator.cpp'
- 'tests/cpp/block_hash_store.cpp'
- 'tests/cpp/block_manager.cpp'
- 'tests/cpp/cache_eviction.cpp'
- 'tests/cpp/cache_manager.cpp'
- 'tests/cpp/device_config.cpp'
- 'tests/cpp/scheduler.cpp'
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ concurrency:
cancel-in-progress: true

env:
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241205_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241205_x86_64.tgz
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/w_openvino_toolkit_windows_2025.0.0.dev20241205_x86_64.zip
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu20_2025.0.0.dev20241224_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/m_openvino_toolkit_macos_12_6_2025.0.0.dev20241224_x86_64.tgz
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
jobs:
cpp-multinomial-greedy_causal_lm-ubuntu:
runs-on: ubuntu-20.04-8-cores
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/job_vlm_sample_llava.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:
type: string

env:
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz

jobs:
visual_language_chat_sample-ubuntu-llava:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lcm_dreamshaper_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ concurrency:

env:
PYTHON_VERSION: '3.9'
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241205_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17539-6abe2e39391/w_openvino_toolkit_windows_2025.0.0.dev20241205_x86_64.zip
LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/l_openvino_toolkit_ubuntu22_2025.0.0.dev20241224_x86_64.tgz
WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.0.0-17709-688f0428cfc/w_openvino_toolkit_windows_2025.0.0.dev20241224_x86_64.zip
OV_INSTALL_DIR: ${{ github.workspace }}/ov

jobs:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ jobs:
matrix:
test:
- name: 'Whisper'
cmd: 'tests/python_tests/test_whisper_generate_api.py'
cmd: 'tests/python_tests/test_whisper_pipeline.py'
- name: 'LLM & VLM'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py -k "not Qwen2-0.5B-Instruct"' # Skip failed tests Qwen2-0.5B-Instruct
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
defaults:
run:
shell: bash
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/llm_bench-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,14 @@ jobs:
- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux Optimum Intel
run: |
huggingface-cli download OpenVINO/LCM_Dreamshaper_v7-int8-ov --local-dir ov_models/lcm_dreamshaper_v7
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum -ic 4
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --optimum --num_steps 4
- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI
run: |
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 -ic 4
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --num_steps 4
- name: Test OpenVINO/LCM_Dreamshaper_v7-int8-ov on Linux with GenAI and LoRA
run: |
wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7 -ic 4
python ./tools/llm_bench/benchmark.py -m ./ov_models/lcm_dreamshaper_v7/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7 --num_steps 4
rm -rf ./ov_models/lcm_dreamshaper_v7/
- name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Deconding mode on Linux
run: |
Expand Down Expand Up @@ -151,7 +151,7 @@ jobs:
rm -rf ./ov_models/internvl2-1B
- name: WWB Tests
run: |
pip install git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a
pip install git+https://github.com/huggingface/optimum-intel.git
GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }}
python -m pytest -v ${{ env.WWB_PATH }}/tests
stateful:
Expand Down Expand Up @@ -190,7 +190,7 @@ jobs:
- name: WWB Tests
run: |
pip install pytest
pip install git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a
pip install git+https://github.com/huggingface/optimum-intel.git
GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }}
python -m pytest -v ${{ env.WWB_PATH }}/tests
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ concurrency:

env:
PYTHON_VERSION: '3.9'
OV_BRANCH: 0080d90974ca84f9a6d359da3388a2a18a93b753
OV_BRANCH: master
OV_TARBALL: ''

jobs:
Expand Down Expand Up @@ -178,7 +178,7 @@ jobs:
if: |
always() &&
(needs.openvino_download.outputs.status == 'success' || needs.openvino_build.result == 'success')
timeout-minutes: 90
timeout-minutes: 120
defaults:
run:
shell: bash
Expand Down Expand Up @@ -225,7 +225,7 @@ jobs:
run: |
source ${OV_INSTALL_DIR}/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_chat_generate_api.py::test_set_chat_template
python -m pytest -v ./tests/python_tests/test_tokenizer.py::test_set_chat_template
env:
PYTHONPATH: "./build/:$PYTHONPATH"

Expand All @@ -235,7 +235,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -290,7 +290,7 @@ jobs:
run: |
source ${OV_INSTALL_DIR}/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
env:
PYTHONPATH: "./build/:$PYTHONPATH"

Expand All @@ -300,7 +300,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
genai_package:
name: OpenVINO genai extension (install to OpenVINO package)
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ concurrency:

env:
PYTHON_VERSION: '3.11'
OV_BRANCH: 0080d90974ca84f9a6d359da3388a2a18a93b753
OV_BRANCH: master
OV_TARBALL: ''

jobs:
Expand Down Expand Up @@ -236,7 +236,7 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_chat_generate_api.py::test_set_chat_template
python -m pytest -v ./tests/python_tests/test_tokenizer.py::test_set_chat_template
env:
PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.

Expand All @@ -245,7 +245,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -301,7 +301,7 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
env:
PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.

Expand All @@ -310,7 +310,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
genai_python_lib_vlm:
name: OpenVINO genai VLM tests (cmake + wheel)
Expand Down Expand Up @@ -366,7 +366,7 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_vlm_api.py
python -m pytest -v ./tests/python_tests/test_vlm_pipeline.py
env:
PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,10 @@ int main(int argc, char* argv[]) try {

std::string device = "CPU";

ov::genai::SchedulerConfig scheduler_config;
scheduler_config.cache_size = 5;

ov::genai::LLMPipeline pipe(
model_path,
device,
ov::genai::prompt_lookup(true),
ov::genai::scheduler_config(scheduler_config));
ov::genai::prompt_lookup(true));

auto streamer = [](std::string subword) {
std::cout << subword << std::flush;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,10 @@ int main(int argc, char* argv[]) try {
// Please, set device for main model in `LLMPipeline` constructor and in in `ov::genai::draft_model` for draft.
std::string main_device = "CPU", draft_device = "CPU";

ov::genai::SchedulerConfig scheduler_config;
scheduler_config.cache_size = 5;

ov::genai::LLMPipeline pipe(
main_model_path,
main_device,
ov::genai::draft_model(draft_model_path, draft_device),
ov::genai::scheduler_config(scheduler_config));
ov::genai::draft_model(draft_model_path, draft_device));

auto streamer = [](std::string subword) {
std::cout << subword << std::flush;
Expand Down
4 changes: 2 additions & 2 deletions samples/export-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
openvino-tokenizers~=2025.0.0.0.dev
optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a
optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
numpy<2.0.0; sys_platform == 'darwin'
einops==0.8.0 # For Qwen
transformers_stream_generator==0.0.5 # For Qwen
diffusers==0.31.0 # For image generation pipelines
diffusers==0.32.1 # For image generation pipelines
timm==1.0.12 # For exporting InternVL2
torchvision # For visual language models
transformers>=4.43 # For Whisper
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@ def main():
args = parser.parse_args()

device = 'CPU'
scheduler_config = openvino_genai.SchedulerConfig()
# cache params
scheduler_config.cache_size = 2

pipe = openvino_genai.LLMPipeline(args.model_dir, device, scheduler_config=scheduler_config, prompt_lookup=True)
pipe = openvino_genai.LLMPipeline(args.model_dir, device, prompt_lookup=True)

config = openvino_genai.GenerationConfig()
config.max_new_tokens = 100
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,9 @@ def main():
main_device = 'CPU' # GPU can be used as well
draft_device = 'CPU'

scheduler_config = openvino_genai.SchedulerConfig()
# cache params
scheduler_config.cache_size = 2

draft_model = openvino_genai.draft_model(args.draft_model_dir, draft_device)

pipe = openvino_genai.LLMPipeline(args.model_dir, main_device, scheduler_config=scheduler_config, draft_model=draft_model)
pipe = openvino_genai.LLMPipeline(args.model_dir, main_device, draft_model=draft_model)

config = openvino_genai.GenerationConfig()
config.max_new_tokens = 100
Expand Down
Loading

0 comments on commit 5ed704a

Please sign in to comment.