Skip to content

Commit

Permalink
apply comments
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-esir committed May 15, 2024
1 parent 11fbaa2 commit 264e99f
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 14 deletions.
14 changes: 6 additions & 8 deletions src/cpp/include/openvino/genai/llm_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/streamer_base.hpp"

using namespace std;

namespace ov {

using StreamerVariant = std::variant<std::function<void (std::string)>, std::shared_ptr<StreamerBase>>;
Expand Down Expand Up @@ -82,7 +80,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
* @param streamer optional streamer
* @return std::string decoded resulting text
*/
std::string generate(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt);
std::string generate(std::string text, OptionalGenerationConfig generation_config=std::nullopt, OptionalStreamerVariant streamer=std::nullopt);

template <typename... Properties>
util::EnableIfAllStringAny<std::string, Properties...> generate(
Expand Down Expand Up @@ -124,8 +122,8 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
*/
EncodedResults generate(ov::Tensor input_ids,
std::optional<ov::Tensor> attention_mask,
OptionalGenerationConfig generation_config=nullopt,
OptionalStreamerVariant streamer=nullopt);
OptionalGenerationConfig generation_config=std::nullopt,
OptionalStreamerVariant streamer=std::nullopt);

template <typename InputsType, typename... Properties>
util::EnableIfAllStringAny<std::string, Properties...> operator()(
Expand All @@ -134,11 +132,11 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
return generate(text, AnyMap{std::forward<Properties>(properties)...});
}

DecodedResults operator()(std::vector<std::string> text, OptionalGenerationConfig generation_config=nullopt);
DecodedResults operator()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config=nullopt);
DecodedResults operator()(std::vector<std::string> text, OptionalGenerationConfig generation_config=std::nullopt);
DecodedResults operator()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config=std::nullopt);

// generate with streamers
std::string operator()(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt);
std::string operator()(std::string text, OptionalGenerationConfig generation_config=std::nullopt, OptionalStreamerVariant streamer=std::nullopt);
std::string operator()(std::string text, OptionalStreamerVariant streamer);

ov::Tokenizer get_tokenizer();
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ ov::LLMPipeline::LLMPipelineImpl::LLMPipelineImpl(std::string& path, std::string
nlohmann::json data = nlohmann::json::parse(f);
m_chat_template = data.value("chat_template", "");
}



m_device = device;

Expand Down
8 changes: 4 additions & 4 deletions src/cpp/src/text_callback_streamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace ov {
TextCallbackStreamer::TextCallbackStreamer(const Tokenizer& tokenizer, std::function<void (std::string)> callback, bool print_eos_token) {
m_tokenizer = tokenizer;
m_print_eos_token = print_eos_token;
m_callback = callback;
on_decoded_text_callback = callback;
m_enabled = true;
}

Expand Down Expand Up @@ -55,18 +55,18 @@ void TextCallbackStreamer::set_tokenizer(Tokenizer tokenizer) {
}

void TextCallbackStreamer::set_callback(std::function<void (std::string)> callback) {
m_callback = callback;
on_decoded_text_callback = callback;
m_enabled = true;
}

void TextCallbackStreamer::set_callback() {
m_callback = [](std::string words){ ;};
on_decoded_text_callback = [](std::string words){};
m_enabled = false;
}

void TextCallbackStreamer::on_finalized_text(const std::string& subword) {
if (m_enabled) {
m_callback(subword);
on_decoded_text_callback(subword);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/text_callback_streamer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class TextCallbackStreamer: public StreamerBase {
void set_callback(std::function<void (std::string)> callback);
void set_callback();

std::function<void (std::string)> m_callback = [](std::string words){ ;};
std::function<void (std::string)> on_decoded_text_callback = [](std::string words){};
bool m_enabled = false;
int64_t m_eos_token;
private:
Expand Down
85 changes: 85 additions & 0 deletions src/tests/python_tests/test_cpp_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@

import pytest

model_ids = [
# ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0-skip-special-tokens"),

("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/"),
("google/gemma-2b-it", "gemma-2b-it/pytorch/dldt/FP16/"),
# ("meta-llama/Llama-2-7b-chat-hf", "Llama-2-7b-chat-hf/pytorch/dldt/FP16/"),
]

def run_cpp_sample_command(command, cwd):
import subprocess
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, text=True)
stdout, stderr = process.communicate()
return stdout, stderr, process.returncode

def run_transformers_model(model_id, prompt, config=None, add_special_tokens=True):
import transformers

tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
model = transformers.AutoModelForCausalLM.from_pretrained(model_id)
tokenized = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=add_special_tokens)

default_config = dict(
num_beam_groups=3,
num_beams=15,
diversity_penalty=1.0,
num_return_sequences=15,
max_new_tokens=20,
early_stopping=False,
length_penalty=1.0,
no_repeat_ngram_size=9**9,
do_sample=False
)

if config is None:
config = default_config
print(tokenized)
beams = model.generate(tokenized, **config)
return map(lambda beam: tokenizer.decode(beam[tokenized.numel():], skip_special_tokens=True), beams)

@pytest.mark.parametrize("param", model_ids)
def test_model(param):
model_id, path = param

prompts = ["table is made of", "The Sun is yellow because"]
# prompt = " ".join([f'"{item}"' for item in prompts])

prompt = "table is made of"

# cmd = 'build-Debug/greedy_causal_lm' // for old samples
cmd = 'build-Debug/text_generation/causal_lm/cpp/'

# beam search old
cmd = 'build-Debug/beam_search_causal_lm'
cwd = '/home/epavel/devel/openvino.genai_'
config = None # None means greedy

# greedy new
cwd = '/home/epavel/devel/openvino.genai'
cmd = 'build-Debug/text_generation/causal_lm/cpp/greedy_causal_lm'
config = dict(max_new_tokens=75, do_sample=False)

# beam search new
cwd = '/home/epavel/devel/openvino.genai'
cmd = 'build-Debug/text_generation/causal_lm/cpp/beam_search_causal_lm'
config = None

predictions, _, _ = run_cpp_sample_command([cmd, '/home/epavel/devel/openvino.genai/text_generation/causal_lm/' + path, prompt], cwd)
print(predictions)

beams = run_transformers_model(model_id, prompt, config)
for beam in beams:
idx = predictions.find(beam)
if -1 == idx and beam and predictions:
raise RuntimeError(f'Missing "{beam=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(beam):]

return True
# with open('pred.txt', 'r') as file:
# predictions = file.read()

for model_id, path in model_ids:
test_model((model_id, path))
4 changes: 3 additions & 1 deletion text_generation/causal_lm/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
cmake_minimum_required(VERSION 3.15)
project(causal_lm)

# add_subdirectory(../../../thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
if(NOT TARGET openvino_tokenizers)
add_subdirectory(../../../thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
endif()

add_executable(greedy_causal_lm greedy_causal_lm.cpp)
target_compile_definitions(greedy_causal_lm PRIVATE OPENVINO_TOKENIZERS_PATH=\"$<TARGET_FILE:openvino_tokenizers>\")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <openvino/openvino.hpp>
#include "openvino/genai/llm_pipeline.hpp"

using namespace std;

std::vector<string> questions = {
"1+1=",
Expand Down

0 comments on commit 264e99f

Please sign in to comment.