From 264e99f08d2c476b48aaf92b89cf199fa2a05200 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 15 May 2024 12:04:36 +0200 Subject: [PATCH] apply comments --- .../include/openvino/genai/llm_pipeline.hpp | 14 ++- src/cpp/src/llm_pipeline.cpp | 2 + src/cpp/src/text_callback_streamer.cpp | 8 +- src/cpp/src/text_callback_streamer.hpp | 2 +- src/tests/python_tests/test_cpp_samples.py | 85 +++++++++++++++++++ text_generation/causal_lm/cpp/CMakeLists.txt | 4 +- .../cpp/generate_pipeline/chat_sample.cpp | 1 + 7 files changed, 102 insertions(+), 14 deletions(-) create mode 100644 src/tests/python_tests/test_cpp_samples.py diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index 5c3e23aa7e..b25d11ecd4 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -11,8 +11,6 @@ #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/streamer_base.hpp" -using namespace std; - namespace ov { using StreamerVariant = std::variant, std::shared_ptr>; @@ -82,7 +80,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline { * @param streamer optional streamer * @return std::string decoded resulting text */ - std::string generate(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt); + std::string generate(std::string text, OptionalGenerationConfig generation_config=std::nullopt, OptionalStreamerVariant streamer=std::nullopt); template util::EnableIfAllStringAny generate( @@ -124,8 +122,8 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline { */ EncodedResults generate(ov::Tensor input_ids, std::optional attention_mask, - OptionalGenerationConfig generation_config=nullopt, - OptionalStreamerVariant streamer=nullopt); + OptionalGenerationConfig generation_config=std::nullopt, + OptionalStreamerVariant streamer=std::nullopt); template util::EnableIfAllStringAny operator()( @@ -134,11 +132,11 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline { return generate(text, AnyMap{std::forward(properties)...}); } - DecodedResults operator()(std::vector text, OptionalGenerationConfig generation_config=nullopt); - DecodedResults operator()(std::initializer_list text, OptionalGenerationConfig generation_config=nullopt); + DecodedResults operator()(std::vector text, OptionalGenerationConfig generation_config=std::nullopt); + DecodedResults operator()(std::initializer_list text, OptionalGenerationConfig generation_config=std::nullopt); // generate with streamers - std::string operator()(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt); + std::string operator()(std::string text, OptionalGenerationConfig generation_config=std::nullopt, OptionalStreamerVariant streamer=std::nullopt); std::string operator()(std::string text, OptionalStreamerVariant streamer); ov::Tokenizer get_tokenizer(); diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index 47bf3495d5..9d4161f859 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -107,6 +107,8 @@ ov::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(std::string& path, std::string nlohmann::json data = nlohmann::json::parse(f); m_chat_template = data.value("chat_template", ""); } + + m_device = device; diff --git a/src/cpp/src/text_callback_streamer.cpp b/src/cpp/src/text_callback_streamer.cpp index a1d2f3b01d..f9b3ad8ccd 100644 --- a/src/cpp/src/text_callback_streamer.cpp +++ b/src/cpp/src/text_callback_streamer.cpp @@ -5,7 +5,7 @@ namespace ov { TextCallbackStreamer::TextCallbackStreamer(const Tokenizer& tokenizer, std::function callback, bool print_eos_token) { m_tokenizer = tokenizer; m_print_eos_token = print_eos_token; - m_callback = callback; + on_decoded_text_callback = callback; m_enabled = true; } @@ -55,18 +55,18 @@ void TextCallbackStreamer::set_tokenizer(Tokenizer tokenizer) { } void TextCallbackStreamer::set_callback(std::function callback) { - m_callback = callback; + on_decoded_text_callback = callback; m_enabled = true; } void TextCallbackStreamer::set_callback() { - m_callback = [](std::string words){ ;}; + on_decoded_text_callback = [](std::string words){}; m_enabled = false; } void TextCallbackStreamer::on_finalized_text(const std::string& subword) { if (m_enabled) { - m_callback(subword); + on_decoded_text_callback(subword); } } diff --git a/src/cpp/src/text_callback_streamer.hpp b/src/cpp/src/text_callback_streamer.hpp index f3d8773fb4..d9c1ba3ee5 100644 --- a/src/cpp/src/text_callback_streamer.hpp +++ b/src/cpp/src/text_callback_streamer.hpp @@ -21,7 +21,7 @@ class TextCallbackStreamer: public StreamerBase { void set_callback(std::function callback); void set_callback(); - std::function m_callback = [](std::string words){ ;}; + std::function on_decoded_text_callback = [](std::string words){}; bool m_enabled = false; int64_t m_eos_token; private: diff --git a/src/tests/python_tests/test_cpp_samples.py b/src/tests/python_tests/test_cpp_samples.py new file mode 100644 index 0000000000..85ab4a9fbd --- /dev/null +++ b/src/tests/python_tests/test_cpp_samples.py @@ -0,0 +1,85 @@ + +import pytest + +model_ids = [ + # ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0-skip-special-tokens"), + + ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/"), + ("google/gemma-2b-it", "gemma-2b-it/pytorch/dldt/FP16/"), + # ("meta-llama/Llama-2-7b-chat-hf", "Llama-2-7b-chat-hf/pytorch/dldt/FP16/"), +] + +def run_cpp_sample_command(command, cwd): + import subprocess + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, text=True) + stdout, stderr = process.communicate() + return stdout, stderr, process.returncode + +def run_transformers_model(model_id, prompt, config=None, add_special_tokens=True): + import transformers + + tokenizer = transformers.AutoTokenizer.from_pretrained(model_id) + model = transformers.AutoModelForCausalLM.from_pretrained(model_id) + tokenized = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=add_special_tokens) + + default_config = dict( + num_beam_groups=3, + num_beams=15, + diversity_penalty=1.0, + num_return_sequences=15, + max_new_tokens=20, + early_stopping=False, + length_penalty=1.0, + no_repeat_ngram_size=9**9, + do_sample=False + ) + + if config is None: + config = default_config + print(tokenized) + beams = model.generate(tokenized, **config) + return map(lambda beam: tokenizer.decode(beam[tokenized.numel():], skip_special_tokens=True), beams) + +@pytest.mark.parametrize("param", model_ids) +def test_model(param): + model_id, path = param + + prompts = ["table is made of", "The Sun is yellow because"] + # prompt = " ".join([f'"{item}"' for item in prompts]) + + prompt = "table is made of" + + # cmd = 'build-Debug/greedy_causal_lm' // for old samples + cmd = 'build-Debug/text_generation/causal_lm/cpp/' + + # beam search old + cmd = 'build-Debug/beam_search_causal_lm' + cwd = '/home/epavel/devel/openvino.genai_' + config = None # None means greedy + + # greedy new + cwd = '/home/epavel/devel/openvino.genai' + cmd = 'build-Debug/text_generation/causal_lm/cpp/greedy_causal_lm' + config = dict(max_new_tokens=75, do_sample=False) + + # beam search new + cwd = '/home/epavel/devel/openvino.genai' + cmd = 'build-Debug/text_generation/causal_lm/cpp/beam_search_causal_lm' + config = None + + predictions, _, _ = run_cpp_sample_command([cmd, '/home/epavel/devel/openvino.genai/text_generation/causal_lm/' + path, prompt], cwd) + print(predictions) + + beams = run_transformers_model(model_id, prompt, config) + for beam in beams: + idx = predictions.find(beam) + if -1 == idx and beam and predictions: + raise RuntimeError(f'Missing "{beam=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(beam):] + + return True + # with open('pred.txt', 'r') as file: + # predictions = file.read() + +for model_id, path in model_ids: + test_model((model_id, path)) diff --git a/text_generation/causal_lm/cpp/CMakeLists.txt b/text_generation/causal_lm/cpp/CMakeLists.txt index 30678f3ad5..8b6281f50e 100644 --- a/text_generation/causal_lm/cpp/CMakeLists.txt +++ b/text_generation/causal_lm/cpp/CMakeLists.txt @@ -4,7 +4,9 @@ cmake_minimum_required(VERSION 3.15) project(causal_lm) -# add_subdirectory(../../../thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/") +if(NOT TARGET openvino_tokenizers) +add_subdirectory(../../../thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/") +endif() add_executable(greedy_causal_lm greedy_causal_lm.cpp) target_compile_definitions(greedy_causal_lm PRIVATE OPENVINO_TOKENIZERS_PATH=\"$\") diff --git a/text_generation/causal_lm/cpp/generate_pipeline/chat_sample.cpp b/text_generation/causal_lm/cpp/generate_pipeline/chat_sample.cpp index c7460dd337..b1ecb5f5f4 100644 --- a/text_generation/causal_lm/cpp/generate_pipeline/chat_sample.cpp +++ b/text_generation/causal_lm/cpp/generate_pipeline/chat_sample.cpp @@ -4,6 +4,7 @@ #include #include "openvino/genai/llm_pipeline.hpp" +using namespace std; std::vector questions = { "1+1=",