diff --git a/.github/workflows/genai-tools.yml b/.github/workflows/genai-tools.yml
index 333bee3e11..bd6cb46362 100644
--- a/.github/workflows/genai-tools.yml
+++ b/.github/workflows/genai-tools.yml
@@ -44,7 +44,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels
- revision: latest_available_commit
+ revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
llm_bench:
name: 'LLM bench tests'
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 0a991e2a54..0d7a5b7bae 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -52,7 +52,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels
- revision: latest_available_commit
+ revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
- name: Clone docker tag from OpenVINO repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 5402b79e70..062b83fc27 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -17,7 +17,7 @@ concurrency:
env:
PYTHON_VERSION: '3.10'
- OV_BRANCH: master
+ OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e
OV_TARBALL: ''
jobs:
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index e0bf5371b3..3b01697f26 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -45,7 +45,7 @@ jobs:
with:
platform: ubuntu22
commit_packages_to_provide: wheels
- revision: latest_available_commit
+ revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
openvino_download_windows:
name: Download OpenVINO for Windows
@@ -71,7 +71,7 @@ jobs:
with:
platform: windows
commit_packages_to_provide: wheels
- revision: latest_available_commit
+ revision: 345163f87953fb0dd8dd590257eb7fc84378da8e
stable_diffusion_1_5_cpp-linux:
runs-on: ubuntu-22.04-8-cores
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index e396671b2c..95a713d7a1 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -17,7 +17,7 @@ concurrency:
env:
PYTHON_VERSION: '3.11'
- OV_BRANCH: master
+ OV_BRANCH: 345163f87953fb0dd8dd590257eb7fc84378da8e
OV_TARBALL: ''
jobs:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 181132e210..3a67a24bab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,7 @@ if(NOT OpenVINODeveloperPackage_FOUND)
endif()
include(cmake/features.cmake)
+include(cmake/version.cmake)
if(ENABLE_PYTHON)
# the following two calls are required for cross-compilation
diff --git a/README.md b/README.md
index be3de5e8ce..c5cf799973 100644
--- a/README.md
+++ b/README.md
@@ -133,13 +133,15 @@ from PIL import Image
# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
pipe = ov_genai.VLMPipeline("./InternVL2-1B", "CPU")
+pipe.start_chat()
image = Image.open("dog.jpg")
image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
image_data = ov.Tensor(image_data)
prompt = "Can you describe the image?"
-print(pipe.generate(prompt, image=image_data, max_new_tokens=100))
+result = pipe.generate(prompt, image=image_data, max_new_tokens=100)
+print(result.texts[0])
```
### Run generation using VLMPipeline in C++
@@ -392,7 +394,7 @@ See [here](https://openvinotoolkit.github.io/openvino_notebooks/?search=Automati
## Additional materials
-- [List of supported models](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/docs/SUPPORTED_MODELS.md) (NOTE: models can work, but were not tried yet)
+- [List of supported models](https://github.com/openvinotoolkit/openvino.genai/blob/master/SUPPORTED_MODELS.md) (NOTE: models can work, but were not tried yet)
- [OpenVINO Generative AI workflow](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html)
- [Optimum-intel and OpenVINO](https://huggingface.co/docs/optimum/intel/openvino/export)
diff --git a/src/docs/SUPPORTED_MODELS.md b/SUPPORTED_MODELS.md
similarity index 95%
rename from src/docs/SUPPORTED_MODELS.md
rename to SUPPORTED_MODELS.md
index 44da29ced4..6b45f47890 100644
--- a/src/docs/SUPPORTED_MODELS.md
+++ b/SUPPORTED_MODELS.md
@@ -147,6 +147,8 @@
+> [!NOTE]
+> LoRA adapters are supported.
The pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature. The model is required to have the following inputs after the conversion:
1. `input_ids` contains the tokens.
@@ -165,12 +167,14 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
Latent Consistency Model |
Supported |
Supported |
+ Supported |
SimianLuo/LCM_Dreamshaper_v7
@@ -181,6 +185,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
Stable Diffusion |
Supported |
Supported |
+ Supported |
CompVis/stable-diffusion-v1-1
@@ -213,6 +218,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
Stable Diffusion XL |
Supported |
Supported |
+ Supported |
stabilityai/stable-diffusion-xl-base-0.9
@@ -225,6 +231,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
Stable Diffusion 3 |
Supported |
Not supported |
+ Not supported |
stabilityai/stable-diffusion-3-medium-diffusers
@@ -237,6 +244,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
Flux |
Supported |
Not supported |
+ Not supported |
black-forest-labs/FLUX.1-schnell
@@ -260,10 +268,12 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
Architecture |
+ LoRA support |
Example HuggingFace Models |
Stable Diffusion |
+ Supported |
|
Stable Diffusion XL |
+ Supported |
|
-
+
@@ -292,11 +311,13 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
Architecture |
Models |
+ LoRA support |
Example HuggingFace Models |
InternVL2 |
InternVL2 |
+ Not supported |
OpenGVLab/InternVL2-1B
@@ -309,6 +330,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
LLaVA |
LLaVA-v1.5 |
+ Not supported |
llava-hf/llava-1.5-7b-hf
@@ -318,6 +340,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
LLaVA-NeXT |
LLaVa-v1.6 |
+ Not supported |
llava-hf/llava-v1.6-mistral-7b-hf
@@ -329,6 +352,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
MiniCPMV |
MiniCPM-V-2_6 |
+ Not supported |
openbmb/MiniCPM-V-2_6
@@ -345,11 +369,13 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
Architecture |
Models |
+ LoRA support |
Example HuggingFace Models |
WhisperForConditionalGeneration |
Whisper |
+ Not supported |
openai/whisper-tiny
@@ -366,6 +392,7 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
|
Distil-Whisper |
+ Not supported |
distil-whisper/distil-small.en
diff --git a/cmake/templates/__version__.py.in b/cmake/templates/__version__.py.in
deleted file mode 100644
index ce8e01a246..0000000000
--- a/cmake/templates/__version__.py.in
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# Will be overwritten by cmake.
-__version__ = "@OpenVINOGenAI_VERSION@"
diff --git a/cmake/templates/version.cpp.in b/cmake/templates/version.cpp.in
new file mode 100644
index 0000000000..f6015832f9
--- /dev/null
+++ b/cmake/templates/version.cpp.in
@@ -0,0 +1,19 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/genai/version.hpp"
+
+namespace ov {
+namespace genai {
+
+const Version get_version() {
+ const static Version version = {
+ "@OpenVINOGenAI_FULL_VERSION@",
+ "OpenVINO GenAI version",
+ };
+
+ return version;
+}
+
+} // namespace genai
+} // namespace ov
diff --git a/cmake/templates/version.hpp.in b/cmake/templates/version.hpp.in
new file mode 100644
index 0000000000..34120ef632
--- /dev/null
+++ b/cmake/templates/version.hpp.in
@@ -0,0 +1,34 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "openvino/core/version.hpp"
+#include "openvino/genai/visibility.hpp"
+
+/**
+ * OpenVINO GenAI major version
+ */
+#define OPENVINO_GENAI_VERSION_MAJOR @OpenVINOGenAI_VERSION_MAJOR@
+
+/**
+ * OpenVINO GenAI minor version
+ */
+#define OPENVINO_GENAI_VERSION_MINOR @OpenVINOGenAI_VERSION_MINOR@
+
+/**
+ * OpenVINO GenAI patch version
+ */
+#define OPENVINO_GENAI_VERSION_PATCH @OpenVINOGenAI_VERSION_PATCH@
+
+namespace ov {
+namespace genai {
+
+/**
+ * Returns OpenVINO GenAI full version including git commit and hash information in form of:
+ * ...--[-]
+ */
+OPENVINO_EXTERN_C OPENVINO_GENAI_EXPORTS const ov::Version OPENVINO_CDECL get_version();
+
+} // namespace genai
+} // namespace ov
diff --git a/cmake/version.cmake b/cmake/version.cmake
new file mode 100644
index 0000000000..b9b51e8fe2
--- /dev/null
+++ b/cmake/version.cmake
@@ -0,0 +1,72 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+find_package(Git QUIET)
+
+function(ov_genai_branch_name VAR)
+ if(GIT_FOUND)
+ execute_process(
+ COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
+ WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR}
+ OUTPUT_VARIABLE GIT_BRANCH
+ RESULT_VARIABLE EXIT_CODE
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ if(EXIT_CODE EQUAL 0)
+ set(${VAR} ${GIT_BRANCH} PARENT_SCOPE)
+ endif()
+ endif()
+endfunction()
+
+function(ov_genai_commit_hash VAR)
+ if(GIT_FOUND)
+ execute_process(
+ COMMAND ${GIT_EXECUTABLE} rev-parse --short=11 HEAD
+ WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR}
+ OUTPUT_VARIABLE GIT_COMMIT_HASH
+ RESULT_VARIABLE EXIT_CODE
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ if(EXIT_CODE EQUAL 0)
+ set(${VAR} ${GIT_COMMIT_HASH} PARENT_SCOPE)
+ endif()
+ endif()
+endfunction()
+
+function(ov_genai_commit_number VAR)
+ set(GIT_COMMIT_NUMBER_FOUND OFF)
+ if(GIT_FOUND)
+ execute_process(
+ COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
+ WORKING_DIRECTORY ${OpenVINOGenAI_SOURCE_DIR}
+ OUTPUT_VARIABLE GIT_COMMIT_NUMBER
+ RESULT_VARIABLE EXIT_CODE
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ if(EXIT_CODE EQUAL 0)
+ set(GIT_COMMIT_NUMBER_FOUND ON)
+ set(${VAR} ${GIT_COMMIT_NUMBER} PARENT_SCOPE)
+ endif()
+ endif()
+ if(NOT GIT_COMMIT_NUMBER_FOUND)
+ # set zeros since git is not available
+ set(${VAR} "000" PARENT_SCOPE)
+ endif()
+endfunction()
+
+function(ov_genai_full_version full_version)
+ if(GIT_FOUND)
+ ov_genai_branch_name(GIT_BRANCH)
+ ov_genai_commit_hash(GIT_COMMIT_HASH)
+ ov_genai_commit_number(GIT_COMMIT_NUMBER)
+
+ if(NOT GIT_BRANCH MATCHES "^(master|HEAD)$")
+ set(GIT_BRANCH_POSTFIX "-${GIT_BRANCH}")
+ endif()
+
+ set(${full_version} "${OpenVINOGenAI_VERSION}-${GIT_COMMIT_NUMBER}-${GIT_COMMIT_HASH}${GIT_BRANCH_POSTFIX}" PARENT_SCOPE)
+ else()
+ set(${full_version} "${OpenVINOGenAI_VERSION}" PARENT_SCOPE)
+ endif()
+endfunction()
+
+ov_genai_full_version(OpenVINOGenAI_FULL_VERSION)
+message(STATUS "OpenVINO GenAI full version: ${OpenVINOGenAI_FULL_VERSION}")
diff --git a/llm_bench/python/README.md b/llm_bench/python/README.md
deleted file mode 100644
index 272ed11d1b..0000000000
--- a/llm_bench/python/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Benchmarking Script for Large Language Models
-
-> [!IMPORTANT]
-> LLM bench code was moved to [tools](../../tools/llm_bench/) directory. Please navigate to the new directory for continue of tool usage.
\ No newline at end of file
diff --git a/llm_bench/python/who_what_benchmark/README.md b/llm_bench/python/who_what_benchmark/README.md
deleted file mode 100644
index 414b4d9342..0000000000
--- a/llm_bench/python/who_what_benchmark/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Simple Accuracy Benchmark for Generative AI models
-
-> [!IMPORTANT]
-> Who What Benchmark code was moved to [tools](../../../tools/who_what_benchmark/) directory. Please navigate to the new directory for continue of tool usage.
\ No newline at end of file
diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
index 39364d51ee..73baf0088a 100644
--- a/samples/cpp/visual_language_chat/README.md
+++ b/samples/cpp/visual_language_chat/README.md
@@ -29,7 +29,7 @@ Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/o
Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model `llava-hf/llava-v1.6-mistral-7b-hf` can benefit from being run on a dGPU. Modify the source code to change the device for inference to the `GPU`.
-See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#visual-language-models) for the list of supported models.
+See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#visual-language-models) for the list of supported models.
## Run benchmark:
diff --git a/samples/cpp/whisper_speech_recognition/README.md b/samples/cpp/whisper_speech_recognition/README.md
index d649266613..2ea3322dee 100644
--- a/samples/cpp/whisper_speech_recognition/README.md
+++ b/samples/cpp/whisper_speech_recognition/README.md
@@ -31,7 +31,7 @@ Output:
timestamps: [0, 2] text: How are you doing today?
```
-See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
+See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
# Whisper pipeline usage
diff --git a/samples/python/multinomial_causal_lm/multinomial_causal_lm.py b/samples/python/multinomial_causal_lm/multinomial_causal_lm.py
index 953388ed6a..5ec9d54601 100755
--- a/samples/python/multinomial_causal_lm/multinomial_causal_lm.py
+++ b/samples/python/multinomial_causal_lm/multinomial_causal_lm.py
@@ -90,7 +90,7 @@ def put(self, token_id: int) -> bool:
word = text[self.print_len:]
self.tokens_cache = []
self.print_len = 0
- elif len(text) >= 3 and text[-3:] == chr(65533):
+ elif len(text) >= 3 and text[-1] == chr(65533):
# Don't print incomplete text.
pass
elif len(text) > self.print_len:
diff --git a/samples/python/whisper_speech_recognition/README.md b/samples/python/whisper_speech_recognition/README.md
index aeb46444bf..5f373df2b7 100644
--- a/samples/python/whisper_speech_recognition/README.md
+++ b/samples/python/whisper_speech_recognition/README.md
@@ -38,7 +38,7 @@ Output:
timestamps: [0, 2] text: How are you doing today?
```
-See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
+See [SUPPORTED_MODELS.md](../../../SUPPORTED_MODELS.md#whisper-models) for the list of supported models.
# Whisper pipeline usage
diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
index 24367c17ce..e954037daf 100644
--- a/src/cpp/CMakeLists.txt
+++ b/src/cpp/CMakeLists.txt
@@ -54,9 +54,18 @@ FetchContent_MakeAvailable(safetensors.h)
ov_genai_build_jinja2cpp()
+# generate version files
+
+configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/version.hpp.in"
+ "${CMAKE_CURRENT_BINARY_DIR}/openvino/genai/version.hpp" @ONLY)
+
+configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/version.cpp.in"
+ "${CMAKE_CURRENT_BINARY_DIR}/version.cpp" @ONLY)
+
# Library
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c")
+list(APPEND SOURCE_FILES "${CMAKE_CURRENT_BINARY_DIR}/version.cpp")
set(TARGET_NAME openvino_genai)
@@ -68,7 +77,9 @@ if(TARGET openvino_tokenizers)
endif()
target_include_directories(${TARGET_NAME}
- PUBLIC "$" "$"
+ PUBLIC "$"
+ "$"
+ "$"
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}")
@@ -101,7 +112,7 @@ endif()
if(OpenVINODeveloperPackage_FOUND)
# must be called after all target_link_libraries
- # ov_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
+ ov_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME}
SOURCE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include")
@@ -145,6 +156,9 @@ install(TARGETS ${TARGET_NAME} EXPORT OpenVINOGenAITargets
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
DESTINATION runtime/include COMPONENT core_genai_dev)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/openvino/genai/version.hpp
+ DESTINATION runtime/include/openvino/genai COMPONENT core_genai_dev)
+
install(EXPORT OpenVINOGenAITargets FILE OpenVINOGenAITargets.cmake
NAMESPACE openvino:: DESTINATION runtime/cmake
COMPONENT core_genai_dev)
diff --git a/src/cpp/src/device_config.hpp b/src/cpp/src/device_config.hpp
index cc2e21b9a1..fee6c7abd1 100644
--- a/src/cpp/src/device_config.hpp
+++ b/src/cpp/src/device_config.hpp
@@ -117,22 +117,22 @@ class DeviceConfig {
}
for (size_t layer_id = 0; layer_id < m_num_decoder_layers; layer_id++) {
- m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
- ov::Dimension(m_num_kv_heads[layer_id]),
- ov::Dimension(m_block_size),
- ov::Dimension(m_head_size)});
-
m_value_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
ov::Dimension(m_num_kv_heads[layer_id]),
ov::Dimension(m_block_size),
ov::Dimension(m_head_size)});
- if (m_device.find("GPU") != std::string::npos) {
+ if (m_device.find("GPU") == std::string::npos) {
+ m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
+ ov::Dimension(m_num_kv_heads[layer_id]),
+ ov::Dimension(m_block_size),
+ ov::Dimension(m_head_size)});
+ } else if (m_device.find("GPU") != std::string::npos) {
// Update key shape, as the key's shape is different from the value's shape
m_key_cache_shape.push_back(ov::PartialShape{ov::Dimension::dynamic(),
- ov::Dimension(m_num_kv_heads[layer_id]),
- ov::Dimension(m_head_size),
- ov::Dimension(m_block_size)});
+ ov::Dimension(m_num_kv_heads[layer_id]),
+ ov::Dimension(m_head_size),
+ ov::Dimension(m_block_size)});
}
}
}
diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp
index 94aa6e19fe..c98b571179 100644
--- a/src/cpp/src/llm_pipeline_static.cpp
+++ b/src/cpp/src/llm_pipeline_static.cpp
@@ -739,7 +739,10 @@ std::shared_ptr StatefulLLMPipeline::setupAndCompileModel(
rename_key(pipeline_config, "PREFILL_CONFIG", "NPUW_LLM_PREFILL_CONFIG");
rename_key(pipeline_config, "GENERATE_CONFIG", "NPUW_LLM_GENERATE_CONFIG");
-
+
+ // Replace CACHE_DIR option if NPUW is enabled
+ set_npuw_cache_dir(pipeline_config);
+
return std::make_shared(genai::utils::singleton_core().compile_model(model, "NPU", pipeline_config));
}
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 75a2fd59a7..1293246260 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -34,9 +34,6 @@ file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py"
"${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/py_openvino_genai.pyi"
DESTINATION "${CMAKE_BINARY_DIR}/openvino_genai/")
-configure_file("${OpenVINOGenAI_SOURCE_DIR}/cmake/templates/__version__.py.in"
- "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py" @ONLY)
-
if(OpenVINODeveloperPackage_FOUND)
# TODO: commit changes separately
# ov_add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
@@ -69,18 +66,12 @@ endif()
install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py"
"${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.pyi"
"${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/py_openvino_genai.pyi"
- "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py"
DESTINATION python/openvino_genai
COMPONENT pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR})
install(TARGETS ${TARGET_NAME}
LIBRARY DESTINATION python/openvino_genai
COMPONENT pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR})
-install(FILES "${CMAKE_BINARY_DIR}/openvino_genai/__version__.py"
- DESTINATION openvino_genai
- COMPONENT wheel_genai
- EXCLUDE_FROM_ALL)
-
install(FILES "${OpenVINOGenAI_SOURCE_DIR}/LICENSE"
"${OpenVINOGenAI_SOURCE_DIR}/third-party-programs.txt"
"${OpenVINOGenAI_SOURCE_DIR}/SECURITY.md"
@@ -154,7 +145,8 @@ if(pybind11_stubgen_AVAILABLE)
endif()
set(stub_files_location "${OpenVINOGenAI_BINARY_DIR}/src/python")
- set(generated_files ${stub_files_location}/openvino_genai/__init__.pyi
+ set(init_pyi_file "${stub_files_location}/openvino_genai/__init__.pyi")
+ set(generated_files ${init_pyi_file}
${stub_files_location}/openvino_genai/py_openvino_genai.pyi)
set_source_files_properties(${generated_files} PROPERTIES GENERATED ON)
@@ -184,6 +176,9 @@ if(pybind11_stubgen_AVAILABLE)
"${CMAKE_BINARY_DIR}/openvino_genai/py_openvino_genai.pyi"
COMMAND "${CMAKE_COMMAND}" -E env PYTHONPATH=${CMAKE_BINARY_DIR}:${openvino_pythonpath}:$ENV{PYTHONPATH}
${pybind11_stubgen} --output-dir ${stub_files_location} openvino_genai
+ COMMAND "${CMAKE_COMMAND}"
+ -D init_pyi_file=${init_pyi_file}
+ -P "${CMAKE_CURRENT_SOURCE_DIR}/clean_version.cmake"
${validation_command}
${copy_to_source_command}
COMMAND "${CMAKE_COMMAND}" -E copy ${generated_files} "${CMAKE_BINARY_DIR}/openvino_genai/"
@@ -192,6 +187,7 @@ if(pybind11_stubgen_AVAILABLE)
${python_sources}
${validation_dependencies}
"${CMAKE_CURRENT_SOURCE_DIR}/openvino_genai/__init__.py"
+ "${CMAKE_CURRENT_SOURCE_DIR}/clean_version.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/compare_pyi.cmake"
COMMENT "[${pybind11_stubgen_dep}] Generate .pyi files"
VERBATIM)
diff --git a/src/python/clean_version.cmake b/src/python/clean_version.cmake
new file mode 100644
index 0000000000..f02e293493
--- /dev/null
+++ b/src/python/clean_version.cmake
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+foreach(var IN ITEMS init_pyi_file)
+ if(NOT DEFINED ${var})
+ message(FATAL_ERROR "Variable ${var} is not defined")
+ endif()
+endforeach()
+
+file(STRINGS ${init_pyi_file} file_lines)
+
+foreach(file_line IN LISTS file_lines)
+ if(file_line MATCHES "^__version__.*")
+ set(file_line "__version__: str")
+ endif()
+
+ set(file_content "${file_content}${file_line}\n")
+endforeach()
+
+file(WRITE ${init_pyi_file} ${file_content})
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index a0b0faf58c..0ad7ba3f12 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -5,8 +5,6 @@
import openvino # add_dll_directory for openvino lib
import os
-from .__version__ import __version__
-
if hasattr(os, "add_dll_directory"):
os.add_dll_directory(os.path.dirname(__file__))
@@ -17,8 +15,11 @@
RawPerfMetrics,
PerfMetrics,
StreamerBase,
+ get_version,
)
+__version__ = get_version()
+
# VLM pipeline
from .py_openvino_genai import (
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index 187e0a0a06..0a401ae958 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -42,7 +42,8 @@ from openvino_genai.py_openvino_genai import WhisperPerfMetrics
from openvino_genai.py_openvino_genai import WhisperPipeline
from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics
from openvino_genai.py_openvino_genai import draft_model
+from openvino_genai.py_openvino_genai import get_version
import os as os
from . import py_openvino_genai
-__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'openvino', 'os', 'py_openvino_genai']
-__version__: str = '2025.0.0.0'
+__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index d405cd9bbf..5adde32db4 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
import openvino._pyopenvino
import os
import typing
-__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model']
+__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
class Adapter:
"""
Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -2204,3 +2204,7 @@ def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvin
"""
device on which inference will be performed
"""
+def get_version() -> str:
+ """
+ OpenVINO GenAI version
+ """
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index 429f48f30d..f8e577d5c8 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -11,6 +11,7 @@
#include
#include "openvino/genai/llm_pipeline.hpp"
+#include "openvino/genai/version.hpp"
#include "py_utils.hpp"
@@ -21,6 +22,7 @@ using ov::genai::DecodedResults;
using ov::genai::EncodedResults;
using ov::genai::StreamerBase;
using ov::genai::StringInputs;
+using ov::genai::get_version;
void init_lora_adapter(py::module_& m);
void init_perf_metrics(py::module_& m);
@@ -82,7 +84,12 @@ class ConstructableStreamer: public StreamerBase {
PYBIND11_MODULE(py_openvino_genai, m) {
m.doc() = "Pybind11 binding for OpenVINO GenAI library";
+ m.def("get_version", [] () -> py::str {
+ return get_version().buildNumber;
+ }, get_version().description);
+
init_perf_metrics(m);
+
py::class_(m, "DecodedResults", decoded_results_docstring)
.def(py::init<>())
.def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List { return pyutils::handle_utf8((std::vector)dr); })
diff --git a/tools/llm_bench/llm_bench_utils/config_class.py b/tools/llm_bench/llm_bench_utils/config_class.py
index 7dd27b198b..9c149c98b6 100644
--- a/tools/llm_bench/llm_bench_utils/config_class.py
+++ b/tools/llm_bench/llm_bench_utils/config_class.py
@@ -102,7 +102,9 @@
"olmo",
"phi3",
"starcoder",
- "instruct-gpt"
+ "instruct-gpt",
+ "granite",
+ "granitemoe",
],
'ldm_super_resolution': ['ldm-super-resolution'],
}
diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py
index 316c9d0b89..596da8cb3a 100644
--- a/tools/llm_bench/llm_bench_utils/ov_utils.py
+++ b/tools/llm_bench/llm_bench_utils/ov_utils.py
@@ -701,7 +701,7 @@ def put(self, token_id: int) -> bool:
word = text[self.print_len:]
self.tokens_cache = []
self.print_len = 0
- elif len(text) >= 3 and text[-3:] == chr(65533):
+ elif len(text) >= 3 and text[-1] == chr(65533):
# Don't print incomplete text.
pass
elif len(text) > self.print_len:
diff --git a/tools/who_what_benchmark/whowhatbench/model_loaders.py b/tools/who_what_benchmark/whowhatbench/model_loaders.py
index 8a00c70852..c792a3c0b2 100644
--- a/tools/who_what_benchmark/whowhatbench/model_loaders.py
+++ b/tools/who_what_benchmark/whowhatbench/model_loaders.py
@@ -41,8 +41,19 @@ def load_text_genai_pipeline(model_dir, device="CPU", ov_config=None):
return GenAIModelWrapper(openvino_genai.LLMPipeline(model_dir, device=device, **ov_config), model_dir, "text")
+def load_text_llamacpp_pipeline(model_dir):
+ try:
+ from llama_cpp import Llama
+ except ImportError:
+ logger.error(
+ "Failed to import llama_cpp package. Please install llama-cpp-python.")
+ exit(-1)
+ model = Llama(model_dir)
+ return model
+
+
def load_text_model(
- model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False
+ model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False, use_llamacpp=False,
):
if use_hf:
logger.info("Using HF Transformers API")
@@ -53,6 +64,9 @@ def load_text_model(
elif use_genai:
logger.info("Using OpenVINO GenAI API")
model = load_text_genai_pipeline(model_id, device, ov_config)
+ elif use_llamacpp:
+ logger.info("Using llama.cpp API")
+ model = load_text_llamacpp_pipeline(model_id)
else:
logger.info("Using Optimum API")
from optimum.intel.openvino import OVModelForCausalLM
@@ -276,7 +290,7 @@ def load_inpainting_model(
def load_model(
- model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False
+ model_type, model_id, device="CPU", ov_config=None, use_hf=False, use_genai=False, use_llamacpp=False
):
if model_id is None:
return None
@@ -288,7 +302,7 @@ def load_model(
ov_options = {}
if model_type == "text":
- return load_text_model(model_id, device, ov_options, use_hf, use_genai)
+ return load_text_model(model_id, device, ov_options, use_hf, use_genai, use_llamacpp)
elif model_type == "text-to-image":
return load_text2image_model(
model_id, device, ov_options, use_hf, use_genai
diff --git a/tools/who_what_benchmark/whowhatbench/text_evaluator.py b/tools/who_what_benchmark/whowhatbench/text_evaluator.py
index 50ce224def..433521a186 100644
--- a/tools/who_what_benchmark/whowhatbench/text_evaluator.py
+++ b/tools/who_what_benchmark/whowhatbench/text_evaluator.py
@@ -108,6 +108,7 @@ def __init__(
generation_config=None,
generation_config_base=None,
seqs_per_request=None,
+ use_chat_template=None,
) -> None:
assert (
base_model is not None or gt_data is not None
@@ -123,6 +124,7 @@ def __init__(
self.generation_config_base = generation_config
self.seqs_per_request = seqs_per_request
self.generation_fn = gen_answer_fn
+ self.use_chat_template = use_chat_template
if self.generation_config is not None:
assert self.seqs_per_request is not None
@@ -202,15 +204,21 @@ def worst_examples(self, top_k: int = 5, metric="similarity"):
return res
def _generate_data(self, model, gen_answer_fn=None, generation_config=None):
- def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question):
- inputs = self.tokenizer(prompt, return_tensors="pt")
-
- tokens = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens)
-
- if crop_question:
- tokens = tokens[:, inputs["input_ids"].shape[-1] :]
-
- return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
+ def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question, use_chat_template=False):
+ if use_chat_template:
+ message = [{"role": "user", "content": prompt}]
+ inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+ tokens = model.generate(inputs, do_sample=False, max_new_tokens=max_new_tokens)
+ if crop_question:
+ tokens = tokens[:, inputs.shape[-1]:]
+ res = self.tokenizer.decode(tokens[0], skip_special_tokens=True)
+ return res
+ else:
+ inputs = self.tokenizer(prompt, return_tensors="pt")
+ tokens = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens)
+ if crop_question:
+ tokens = tokens[:, inputs["input_ids"].shape[-1] :]
+ return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
gen_answer_fn = gen_answer_fn or default_gen_answer
@@ -250,6 +258,7 @@ def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question):
p,
self.max_new_tokens,
self._crop_question,
+ self.use_chat_template
)
)
else:
diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py
index 7acf3cf5aa..7d4354f846 100644
--- a/tools/who_what_benchmark/whowhatbench/wwb.py
+++ b/tools/who_what_benchmark/whowhatbench/wwb.py
@@ -40,6 +40,11 @@ def parse_args():
default=None,
help="Tokenizer for divergency metric. If not provided, it will be load from base_model or target_model.",
)
+ parser.add_argument(
+ "--chat-template",
+ action="store_true",
+ help="Whether apply the default chat template.",
+ )
parser.add_argument(
"--gt-data",
default=None,
@@ -137,6 +142,11 @@ def parse_args():
action="store_true",
help="Use LLMPipeline from transformers library to instantiate the model.",
)
+ parser.add_argument(
+ "--llamacpp",
+ action="store_true",
+ help="Use llama-cpp-python to instantiate the model.",
+ )
parser.add_argument(
"--image-size",
type=int,
@@ -190,9 +200,13 @@ def load_prompts(args):
def load_tokenizer(args):
tokenizer = None
if args.tokenizer is not None:
- tokenizer = AutoTokenizer.from_pretrained(
- args.tokenizer, trust_remote_code=True
- )
+ if args.llamacpp:
+ from llama_cpp.llama_tokenizer import LlamaHFTokenizer
+ tokenizer = LlamaHFTokenizer.from_pretrained(args.tokenizer)
+ else:
+ tokenizer = AutoTokenizer.from_pretrained(
+ args.tokenizer, trust_remote_code=True
+ )
elif args.base_model is not None:
tokenizer = AutoTokenizer.from_pretrained(
args.base_model, trust_remote_code=True
@@ -246,8 +260,29 @@ def diff_strings(a: str, b: str, *, use_loguru_colors: bool = False) -> str:
return "".join(output)
-def genai_gen_text(model, tokenizer, question, max_new_tokens, skip_question):
- return model.generate(question, do_sample=False, max_new_tokens=max_new_tokens)
+def genai_gen_text(model, tokenizer, question, max_new_tokens, skip_question, use_chat_template=False):
+ if use_chat_template:
+ model.start_chat()
+ result = model.generate(question, do_sample=False, max_new_tokens=max_new_tokens)
+ model.finish_chat()
+ return result
+ else:
+ return model.generate(question, do_sample=False, max_new_tokens=max_new_tokens)
+
+
+def llamacpp_gen_text(model, tokenizer, question, max_new_tokens, skip_question, use_chat_template=False):
+ if use_chat_template:
+ output = model.create_chat_completion(messages=[{"role": "user", "content": question}], max_tokens=max_new_tokens, temperature=0.0)
+ text = output["choices"][0]["message"]["content"]
+ if skip_question:
+ text = text[len(question):]
+ return text
+ else:
+ output = model(question, max_tokens=max_new_tokens, echo=True, temperature=0.0)
+ text = output["choices"][0]["text"]
+ if skip_question:
+ text = text[len(question):]
+ return text
def genai_gen_image(model, prompt, num_inference_steps, generator=None):
@@ -322,7 +357,15 @@ def create_evaluator(base_model, args):
prompts = load_prompts(args)
if task == "text":
- tokenizer = load_tokenizer(args)
+ tokenizer = load_tokenizer(args) if not args.llamacpp else None
+
+ if args.genai:
+ gen_answer_fn = genai_gen_text
+ elif args.llamacpp:
+ gen_answer_fn = llamacpp_gen_text
+ else:
+ gen_answer_fn = None
+
return EvaluatorCLS(
base_model=base_model,
gt_data=args.gt_data,
@@ -331,7 +374,8 @@ def create_evaluator(base_model, args):
similarity_model_id=args.data_encoder,
num_samples=args.num_samples,
language=args.language,
- gen_answer_fn=genai_gen_text if args.genai else None,
+ gen_answer_fn=gen_answer_fn,
+ use_chat_template=args.chat_template,
)
elif task == "text-to-image":
return EvaluatorCLS(
@@ -467,10 +511,11 @@ def main():
args.ov_config,
args.hf,
args.genai,
+ args.llamacpp
)
all_metrics_per_question, all_metrics = evaluator.score(
target_model,
- evaluator.get_generation_fn() if args.genai else None,
+ evaluator.get_generation_fn() if args.genai or args.llamacpp else None,
output_dir=args.output
)
logger.info("Metrics for model: %s", args.target_model)
| | | | | | | | | |