diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index fc63129281..67c6cc8fdb 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -232,11 +232,10 @@ jobs: cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j - - name: Test bindings + - name: Install tokenizers run: | . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels - python -m pytest -v ./tests/python_tests/test_tokenizer.py::test_set_chat_template env: PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that. diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index 3e378e78cf..b34217beb8 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -545,7 +545,8 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase { tokenizer, scheduler_config, device, - plugin_config} { + plugin_config + } { m_generation_config = m_impl.get_config(); } @@ -576,7 +577,8 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase { m_tokenizer, scheduler_config, device, - plugin_config} { + plugin_config + } { m_generation_config = m_impl.get_config(); } @@ -703,6 +705,7 @@ ov::genai::LLMPipeline::LLMPipeline( const ov::InferRequest& request, const ov::genai::Tokenizer& tokenizer, OptionalGenerationConfig generation_config) { + OPENVINO_THROW("Not supported"); auto start_time = std::chrono::steady_clock::now(); m_pimpl = std::make_unique(request, tokenizer, generation_config); auto stop_time = std::chrono::steady_clock::now(); @@ -722,6 +725,17 @@ ov::genai::LLMPipeline::LLMPipeline( m_pimpl = std::make_unique(models_path, tokenizer, scheduler_config, device, plugin_config); } else if (device == "NPU") { m_pimpl = std::make_unique(models_path, tokenizer, device, properties); + } else if (true) { + SchedulerConfig scheduler_config; + scheduler_config.cache_size = 1; + scheduler_config.enable_prefix_caching = false; + m_pimpl = std::make_unique( + models_path, + tokenizer, + scheduler_config, + device, + properties + ); } else { m_pimpl = std::make_unique(models_path, tokenizer, device, properties); } @@ -742,6 +756,16 @@ ov::genai::LLMPipeline::LLMPipeline( m_pimpl = std::make_unique(models_path, scheduler_config, device, plugin_config); } else if (device == "NPU") { m_pimpl = std::make_unique(models_path, device, config); + } else if (true) { + SchedulerConfig scheduler_config; + scheduler_config.cache_size = 1; + scheduler_config.enable_prefix_caching = false; + m_pimpl = std::make_unique( + models_path, + scheduler_config, + device, + config + ); } else { m_pimpl = std::make_unique(models_path, device, config); }