Skip to content

Commit

Permalink
Merge branch 'master' into ak/wwb_inpainting
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexKoff88 authored Dec 27, 2024
2 parents 08fe455 + 82b44fa commit 8820b1f
Show file tree
Hide file tree
Showing 16 changed files with 420 additions and 388 deletions.
29 changes: 20 additions & 9 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@
- 'src/python/py_tokenizer.cpp'
- 'thirdparty/openvino_tokenizers'
- 'tests/python_tests/tokenizer_configs.py'
- 'tests/python_tests/test_tokenizer.py'

'category: LLM':
- 'src/cpp/include/openvino/genai/llm_pipeline.hpp'
- 'src/cpp/src/llm_pipeline.cpp'
- 'src/cpp/src/lm_encoding.hpp'
- 'src/cpp/src/lm_encoding.cpp'
- 'src/cpp/src/llm_pipeline_base.hpp'
- 'src/cpp/src/llm_pipeline_static.hpp'
- 'src/cpp/src/llm_pipeline_static.cpp'
- 'src/cpp/src/text_callback_streamer.cpp'
- 'src/cpp/src/text_callback_streamer.hpp'
- 'src/python/py_llm_pipeline.cpp'
- 'tests/python_tests/test_generate_api.py'
- 'tests/python_tests/test_chat_generate_api.py'
- 'tests/python_tests/test_llm_pipeline.py'

'category: sampling':
- 'src/cpp/include/openvino/genai/generation_config.hpp'
Expand All @@ -35,6 +38,7 @@
- 'tests/cpp/logit_filtering.cpp'
- 'tests/cpp/generate_config.cpp'
- 'tests/cpp/sampler.cpp'
- 'tests/python_tests/test_sampling.py'

'category: LoRA':
- 'src/cpp/include/openvino/genai/lora_adapter.hpp'
Expand All @@ -54,9 +58,12 @@
- 'src/cpp/include/openvino/genai/whisper_pipeline.hpp'
- 'src/cpp/src/whisper/**/*'
- 'src/cpp/src/whisper_generation_config.cpp'
- 'src/cpp/src/whisper_pipeline_base.hpp'
- 'src/cpp/src/whisper_pipeline.cpp'
- 'src/cpp/src/whisper_pipeline_static.cpp'
- 'src/cpp/src/whisper_pipeline_static.hpp'
- 'src/python/py_whisper_pipeline.cpp'
- 'tests/python_tests/test_whisper_generate_api.py'
- 'tests/python_tests/test_whisper_pipeline.py'

'category: Python API':
- 'src/python/**/*'
Expand All @@ -65,10 +72,14 @@
- 'src/include/openvino/genai/visual_language/**/*'
- 'src/cpp/src/visual_language/**/*'
- 'src/python/py_vlm_pipeline.cpp'
- 'tests/python_tests/test_vlm_api.py'
- 'tests/python_tests/test_vlm_pipeline.py'

'category: speculative decoding':
- 'src/cpp/src/speculative_decoding/**/*'
- 'tests/cpp/speculative_decoding.cpp'

'category: prompt lookup':
- 'src/cpp/src/prompt_lookup/**/*'

'category: continuous batching':
- 'src/cpp/include/openvino/genai/cache_eviction.hpp'
Expand All @@ -91,19 +102,19 @@
- 'src/cpp/src/generation_handle.cpp'
- 'src/cpp/src/generation_stream.hpp'
- 'src/cpp/src/model_runner.hpp'
- 'src/cpp/src/paged_attention_transformations.cpp'
- 'src/cpp/src/paged_attention_transformations.hpp'
- 'src/cpp/src/utils/paged_attention_transformations.cpp'
- 'src/cpp/src/utils/paged_attention_transformations.hpp'
- 'src/cpp/src/scheduler.hpp'
- 'src/cpp/src/sequence_group.cpp'
- 'src/cpp/src/sequence_group.hpp'
- 'src/cpp/src/timer.hpp'
- 'src/python/py_continuous_batching_pipeline.cpp'
- 'tests/python_tests/test_cache_optimizations.py'
- 'tests/python_tests/test_preemption.py'
- 'tests/python_tests/test_sampling.py'
- 'tests/python_tests/test_continuous_batching.py'
- 'tests/python_tests/test_kv_cache_eviction.py'
- 'tests/cpp/block_allocator.cpp'
- 'tests/cpp/block_hash_store.cpp'
- 'tests/cpp/block_manager.cpp'
- 'tests/cpp/cache_eviction.cpp'
- 'tests/cpp/cache_manager.cpp'
- 'tests/cpp/device_config.cpp'
- 'tests/cpp/scheduler.cpp'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ jobs:
matrix:
test:
- name: 'Whisper'
cmd: 'tests/python_tests/test_whisper_generate_api.py'
cmd: 'tests/python_tests/test_whisper_pipeline.py'
- name: 'LLM & VLM'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_generate_api.py'
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py'
defaults:
run:
shell: bash
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ jobs:
if: |
always() &&
(needs.openvino_download.outputs.status == 'success' || needs.openvino_build.result == 'success')
timeout-minutes: 90
timeout-minutes: 120
defaults:
run:
shell: bash
Expand Down Expand Up @@ -235,7 +235,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -290,7 +290,7 @@ jobs:
run: |
source ${OV_INSTALL_DIR}/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
env:
PYTHONPATH: "./build/:$PYTHONPATH"

Expand All @@ -300,7 +300,7 @@ jobs:
python -m pip install . --verbose --find-links ${OV_INSTALL_DIR}/wheels
python -c "from openvino_genai import LLMPipeline"
python -m pip install ./tools/who_what_benchmark --find-links ${OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
genai_package:
name: OpenVINO genai extension (install to OpenVINO package)
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_generate_api.py --ignore ./tests/python_tests/test_vlm_api.py -k "not test_set_chat_template"
python -m pytest -v ./tests/python_tests/ --ignore ./tests/python_tests/test_whisper_pipeline.py --ignore ./tests/python_tests/test_vlm_pipeline.py -k "not test_set_chat_template"
genai_python_lib_whisper:
name: OpenVINO genai extension whisper tests (cmake + wheel)
Expand Down Expand Up @@ -301,7 +301,7 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k test_smoke
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k test_smoke
env:
PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.

Expand All @@ -310,7 +310,7 @@ jobs:
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install . --verbose --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pip install ./tools/who_what_benchmark --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_whisper_generate_api.py -k "not test_smoke"
python -m pytest -v ./tests/python_tests/test_whisper_pipeline.py -k "not test_smoke"
genai_python_lib_vlm:
name: OpenVINO genai VLM tests (cmake + wheel)
Expand Down Expand Up @@ -366,7 +366,7 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels
python -m pytest -v ./tests/python_tests/test_vlm_api.py
python -m pytest -v ./tests/python_tests/test_vlm_pipeline.py
env:
PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.

Expand Down
12 changes: 4 additions & 8 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,7 @@ std::pair<ov::AnyMap, ov::genai::ModelConfigDesc> split_model_descr(const ov::An
ov::genai::LLMPipeline::LLMPipeline(
const ov::InferRequest& request,
const ov::genai::Tokenizer& tokenizer,
OptionalGenerationConfig generation_config
) {
OptionalGenerationConfig generation_config) {
auto start_time = std::chrono::steady_clock::now();
m_pimpl = std::make_unique<StatefulLLMPipeline>(request, tokenizer, generation_config);
auto stop_time = std::chrono::steady_clock::now();
Expand All @@ -715,8 +714,7 @@ ov::genai::LLMPipeline::LLMPipeline(
const std::filesystem::path& models_path,
const ov::genai::Tokenizer& tokenizer,
const std::string& device,
const ov::AnyMap& properties
){
const ov::AnyMap& properties) {
auto start_time = std::chrono::steady_clock::now();
if (properties.find(ov::genai::scheduler_config.name()) != properties.end() ||
properties.find(utils::DRAFT_MODEL_ARG_NAME) != properties.end() ||
Expand All @@ -735,8 +733,7 @@ ov::genai::LLMPipeline::LLMPipeline(
ov::genai::LLMPipeline::LLMPipeline(
const std::filesystem::path& models_path,
const std::string& device,
const ov::AnyMap& config
){
const ov::AnyMap& config) {
auto start_time = std::chrono::steady_clock::now();

if (config.find(ov::genai::scheduler_config.name()) != config.end() ||
Expand All @@ -759,8 +756,7 @@ ov::genai::LLMPipeline::LLMPipeline(
const ov::genai::Tokenizer& tokenizer,
const std::string& device,
const ov::AnyMap& config,
const ov::genai::GenerationConfig& generation_config
){
const ov::genai::GenerationConfig& generation_config) {
auto [core_properties, plugin_config] = ov::genai::utils::split_core_compile_config(config);

auto start_time = std::chrono::steady_clock::now();
Expand Down
2 changes: 2 additions & 0 deletions src/docs/SUPPORTED_MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
<li><a href="https://huggingface.co/Freepik/flux.1-lite-8B-alpha"><code>Freepik/flux.1-lite-8B-alpha</code></a></li>
<li><a href="https://huggingface.co/black-forest-labs/FLUX.1-dev"><code>black-forest-labs/FLUX.1-dev</code></a></li>
<li><a href="https://huggingface.co/shuttleai/shuttle-3-diffusion"><code>shuttleai/shuttle-3-diffusion</code></a></li>
<li><a href="https://huggingface.co/shuttleai/shuttle-3.1-aesthetic"><code>shuttleai/shuttle-3.1-aesthetic</code></a></li>
<li><a href="https://huggingface.co/Shakker-Labs/AWPortrait-FL"><code>Shakker-Labs/AWPortrait-FL</code></a></li>
</ul>
</td>
</tr>
Expand Down
14 changes: 1 addition & 13 deletions tests/python_tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,18 +364,6 @@ def run_continuous_batching(
return output


def read_models_list(file_name: str):
models = []
with open(file_name) as f:
for model_name in f:
model_name = model_name.strip()
# skip comment in model scope file
if model_name.startswith('#'):
continue
models.append(model_name)
return models


def compare_results(hf_result: GenerationResult, ov_result: GenerationResult, generation_config: GenerationConfig):
if generation_config.is_beam_search():
assert len(hf_result.m_scores) == len(ov_result.m_scores)
Expand Down Expand Up @@ -447,7 +435,7 @@ def generate_and_compare_with_reference_text(models_path: Path, prompts: List[st
assert ref_text == ov_text


def run_test_pipeline(tmp_path: str, model_id: str, scheduler_params: dict = None, generation_config = None):
def run_continuous_batching_pipeline_test(tmp_path: str, model_id: str, scheduler_params: dict = None, generation_config = None):
prompts, generation_configs = get_test_dataset()
scheduler_config = get_scheduler_config(scheduler_params)

Expand Down
29 changes: 15 additions & 14 deletions tests/python_tests/ov_genai_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def get_models_list():
"HuggingFaceH4/zephyr-7b-beta",
"ikala/redpajama-3b-chat",
"mistralai/Mistral-7B-v0.1",

# "meta-llama/Llama-2-7b-chat-hf", # Cannot be downloaded without access token
# "google/gemma-2b-it", # Cannot be downloaded without access token.
# "google/gemma-7b-it", # Cannot be downloaded without access token.
Expand All @@ -49,7 +49,7 @@ def get_models_list():
model_ids = precommit_models
else:
model_ids = nightly_models

if pytest.selected_model_ids:
model_ids = [model_id for model_id in model_ids if model_id in pytest.selected_model_ids.split(' ')]
# pytest.set_trace()
Expand Down Expand Up @@ -82,30 +82,30 @@ def get_chat_models_list():
@functools.lru_cache(1)
def read_model(params, **tokenizer_kwargs):
model_id, path = params

from optimum.intel.openvino import OVModelForCausalLM
from transformers import AutoTokenizer
hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

if (path / "openvino_model.xml").exists():
opt_model = OVModelForCausalLM.from_pretrained(path, trust_remote_code=True,
opt_model = OVModelForCausalLM.from_pretrained(path, trust_remote_code=True,
compile=False, device='CPU')
else:
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
with_detokenizer=True,
**tokenizer_kwargs)
openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml")
openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml")

# to store tokenizer config jsons with special tokens
hf_tokenizer.save_pretrained(path)
opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,

opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
compile=False, device='CPU', load_in_8bit=False)
opt_model.generation_config.save_pretrained(path)
opt_model.config.save_pretrained(path)
opt_model.save_pretrained(path)

return (
model_id,
path,
Expand All @@ -116,11 +116,11 @@ def read_model(params, **tokenizer_kwargs):


# in OpenVINO GenAI this parameter is called stop_criteria,
# while in HF it's called early_stopping.
# while in HF it's called early_stopping.
# HF values True, False and "never" correspond to OV GenAI values "EARLY", "HEURISTIC" and "NEVER"
STOP_CRITERIA_MAP = {
ov_genai.StopCriteria.NEVER: "never",
ov_genai.StopCriteria.EARLY: True,
ov_genai.StopCriteria.NEVER: "never",
ov_genai.StopCriteria.EARLY: True,
ov_genai.StopCriteria.HEURISTIC: False
}

Expand All @@ -137,6 +137,7 @@ def model_tmp_path(tmpdir_factory):
shutil.copy(src_file, temp_path / src_file.name)
yield model_id, Path(temp_path)


@pytest.fixture(scope="module")
def model_tokenizers_path_tmp_path(tmpdir_factory):
model_id, path, _, _, _ = read_model(get_models_list()[0])
Expand All @@ -146,7 +147,7 @@ def model_tokenizers_path_tmp_path(tmpdir_factory):
# There was no easy way to add tokens to IR in tests, so we remove them
# and set tokens in configs and to check if they are read and validated correctly.
import openvino as ov

# copy openvino converted model and tokenizers
for pattern in ['*.xml', '*.bin']:
for src_file in path.glob(pattern):
Expand All @@ -162,7 +163,7 @@ def model_tokenizers_path_tmp_path(tmpdir_factory):
ov_model.set_rt_info("eos_token_id", "")
ov_model.set_rt_info("chat_template", "")
ov.save_model(ov_model, str(temp_path / src_file.name))

if src_file in ['openvino_tokenizer.bin', 'openvino_detokenizer.bin']:
continue
if src_file.is_file():
Expand Down
Loading

0 comments on commit 8820b1f

Please sign in to comment.