apply comments

openvinotoolkit · May 15, 2024 · 264e99f · 264e99f
1 parent 11fbaa2
commit 264e99f
Show file tree

Hide file tree

Showing 7 changed files with 102 additions and 14 deletions.
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -11,8 +11,6 @@
 #include "openvino/genai/tokenizer.hpp"
 #include "openvino/genai/streamer_base.hpp"
 
-using namespace std;
-
 namespace ov {
 
 using StreamerVariant = std::variant<std::function<void (std::string)>, std::shared_ptr<StreamerBase>>;
@@ -82,7 +80,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     * @param streamer optional streamer
     * @return std::string decoded resulting text
     */
-    std::string generate(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt);
+    std::string generate(std::string text, OptionalGenerationConfig generation_config=std::nullopt, OptionalStreamerVariant streamer=std::nullopt);
 
     template <typename... Properties>
     util::EnableIfAllStringAny<std::string, Properties...> generate(
@@ -124,8 +122,8 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     */
     EncodedResults generate(ov::Tensor input_ids, 
                             std::optional<ov::Tensor> attention_mask, 
-                            OptionalGenerationConfig generation_config=nullopt,
-                            OptionalStreamerVariant streamer=nullopt);
+                            OptionalGenerationConfig generation_config=std::nullopt,
+                            OptionalStreamerVariant streamer=std::nullopt);
 
     template <typename InputsType, typename... Properties>
     util::EnableIfAllStringAny<std::string, Properties...> operator()(
@@ -134,11 +132,11 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
         return generate(text, AnyMap{std::forward<Properties>(properties)...});
     }
 
-    DecodedResults operator()(std::vector<std::string> text, OptionalGenerationConfig generation_config=nullopt);
-    DecodedResults operator()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config=nullopt);
+    DecodedResults operator()(std::vector<std::string> text, OptionalGenerationConfig generation_config=std::nullopt);
+    DecodedResults operator()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config=std::nullopt);
 
     // generate with streamers
-    std::string operator()(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt);
+    std::string operator()(std::string text, OptionalGenerationConfig generation_config=std::nullopt, OptionalStreamerVariant streamer=std::nullopt);
     std::string operator()(std::string text, OptionalStreamerVariant streamer);
 
     ov::Tokenizer get_tokenizer();

diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
@@ -107,6 +107,8 @@ ov::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(std::string& path, std::string
         nlohmann::json data = nlohmann::json::parse(f);
         m_chat_template = data.value("chat_template", "");
     }
+
+
 
     m_device = device;
 

diff --git a/src/cpp/src/text_callback_streamer.cpp b/src/cpp/src/text_callback_streamer.cpp
@@ -5,7 +5,7 @@ namespace ov {
 TextCallbackStreamer::TextCallbackStreamer(const Tokenizer& tokenizer, std::function<void (std::string)> callback, bool print_eos_token) {
     m_tokenizer = tokenizer;
     m_print_eos_token = print_eos_token;
-    m_callback = callback;
+    on_decoded_text_callback = callback;
     m_enabled = true;
 }
 
@@ -55,18 +55,18 @@ void TextCallbackStreamer::set_tokenizer(Tokenizer tokenizer) {
 }
 
 void TextCallbackStreamer::set_callback(std::function<void (std::string)> callback) {
-    m_callback = callback;
+    on_decoded_text_callback = callback;
     m_enabled = true;
 }
 
 void TextCallbackStreamer::set_callback() {
-    m_callback = [](std::string words){ ;};
+    on_decoded_text_callback = [](std::string words){};
     m_enabled = false;
 }
 
 void TextCallbackStreamer::on_finalized_text(const std::string& subword) {
     if (m_enabled) {
-        m_callback(subword);
+        on_decoded_text_callback(subword);
     }
 }
 

diff --git a/src/cpp/src/text_callback_streamer.hpp b/src/cpp/src/text_callback_streamer.hpp
@@ -21,7 +21,7 @@ class TextCallbackStreamer: public StreamerBase {
     void set_callback(std::function<void (std::string)> callback);
     void set_callback();
 
-    std::function<void (std::string)> m_callback = [](std::string words){ ;};
+    std::function<void (std::string)> on_decoded_text_callback = [](std::string words){};
     bool m_enabled = false;
     int64_t m_eos_token;
 private:

diff --git a/src/tests/python_tests/test_cpp_samples.py b/src/tests/python_tests/test_cpp_samples.py
@@ -0,0 +1,85 @@
+
+import pytest
+
+model_ids = [
+    # ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0-skip-special-tokens"),
+
+    ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/"),
+    ("google/gemma-2b-it", "gemma-2b-it/pytorch/dldt/FP16/"),
+    # ("meta-llama/Llama-2-7b-chat-hf", "Llama-2-7b-chat-hf/pytorch/dldt/FP16/"),
+]
+
+def run_cpp_sample_command(command, cwd):
+    import subprocess
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, text=True)
+    stdout, stderr = process.communicate()
+    return stdout, stderr, process.returncode
+
+def run_transformers_model(model_id, prompt, config=None, add_special_tokens=True):
+    import transformers
+
+    tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
+    model = transformers.AutoModelForCausalLM.from_pretrained(model_id)
+    tokenized = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=add_special_tokens)
+
+    default_config = dict( 
+        num_beam_groups=3, 
+        num_beams=15, 
+        diversity_penalty=1.0, 
+        num_return_sequences=15, 
+        max_new_tokens=20, 
+        early_stopping=False, 
+        length_penalty=1.0, 
+        no_repeat_ngram_size=9**9, 
+        do_sample=False
+    )
+
+    if config is None:
+        config = default_config
+    print(tokenized)
+    beams = model.generate(tokenized, **config)
+    return map(lambda beam: tokenizer.decode(beam[tokenized.numel():], skip_special_tokens=True), beams)
+
+@pytest.mark.parametrize("param", model_ids)
+def test_model(param):
+    model_id, path = param
+
+    prompts = ["table is made of", "The Sun is yellow because"]
+    # prompt = " ".join([f'"{item}"' for item in prompts])
+
+    prompt = "table is made of"
+
+    # cmd = 'build-Debug/greedy_causal_lm' // for old samples
+    cmd = 'build-Debug/text_generation/causal_lm/cpp/'
+
+    # beam search old
+    cmd = 'build-Debug/beam_search_causal_lm'
+    cwd = '/home/epavel/devel/openvino.genai_'
+    config = None # None means greedy
+
+    # greedy new
+    cwd = '/home/epavel/devel/openvino.genai'
+    cmd = 'build-Debug/text_generation/causal_lm/cpp/greedy_causal_lm'
+    config = dict(max_new_tokens=75, do_sample=False)
+
+    # beam search new
+    cwd = '/home/epavel/devel/openvino.genai'
+    cmd = 'build-Debug/text_generation/causal_lm/cpp/beam_search_causal_lm'
+    config = None
+
+    predictions, _, _ = run_cpp_sample_command([cmd, '/home/epavel/devel/openvino.genai/text_generation/causal_lm/' + path, prompt], cwd)
+    print(predictions)
+
+    beams = run_transformers_model(model_id, prompt, config)
+    for beam in beams:
+        idx = predictions.find(beam)
+        if -1 == idx and beam and predictions:
+            raise RuntimeError(f'Missing "{beam=}" from predictions')
+        predictions = predictions[:idx] + predictions[idx + len(beam):]
+
+    return True
+    # with open('pred.txt', 'r') as file:
+    #     predictions = file.read()
+
+for model_id, path in model_ids:
+    test_model((model_id, path))
diff --git a/text_generation/causal_lm/cpp/CMakeLists.txt b/text_generation/causal_lm/cpp/CMakeLists.txt
@@ -4,7 +4,9 @@
 cmake_minimum_required(VERSION 3.15)
 project(causal_lm)
 
-# add_subdirectory(../../../thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
+if(NOT TARGET openvino_tokenizers)
+add_subdirectory(../../../thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
+endif()
 
 add_executable(greedy_causal_lm greedy_causal_lm.cpp)
 target_compile_definitions(greedy_causal_lm PRIVATE OPENVINO_TOKENIZERS_PATH=\"$<TARGET_FILE:openvino_tokenizers>\")

diff --git a/text_generation/causal_lm/cpp/generate_pipeline/chat_sample.cpp b/text_generation/causal_lm/cpp/generate_pipeline/chat_sample.cpp
@@ -4,6 +4,7 @@
 #include <openvino/openvino.hpp>
 #include "openvino/genai/llm_pipeline.hpp"
 
+using namespace std;
 
 std::vector<string> questions = {
     "1+1=",
-Original file line number
+Diff line change
@@ Expand Up @@
             nlohmann::json data = nlohmann::json::parse(f);
             m_chat_template = data.value("chat_template", "");
         }
         m_device = device;
@@ Expand Down @@