Skip to content

Commit

Permalink
tokenizer minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-esir committed May 15, 2024
1 parent 72c045e commit 11fbaa2
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 9 deletions.
8 changes: 6 additions & 2 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ std::string ov::LLMPipeline::LLMPipelineImpl::generate(

auto [input_ids, attention_mask] = m_tokenizer.encode(text);

// todo: W/A If sentence begins with a special tokens (<bos>, <s>, etc.) openvino_tokenizer inserts 2 special extra tokens <bos> and "▁",
// todo: W/A If sentence begins with a specfial tokens (<bos>, <s>, etc.) openvino_tokenizer inserts 2 special extra tokens <bos> and "▁",
// but HF does not do that. Moreover openvino_tokenizer always inserts <bos> but in chat scenario HF does not do that because skip_special_tokens=True.
// Need to remove both of that tokens manually to get exact token by token alignment with HF
auto size = input_ids.get_shape();
Expand All @@ -155,7 +155,7 @@ std::string ov::LLMPipeline::LLMPipelineImpl::generate(
std::vector<float> tmp_attn_mask(attention_mask_data, attention_mask_data + attention_mask.get_size());
// tmp_attn_mask.erase(tmp_attn_mask.begin());

std::vector<std::string> prefixes_to_exclude = {"<s>", "</s>"}; // todo: for TinyLlama, need to get them form generation_config
std::vector<std::string> prefixes_to_exclude = {config.eos_token, config.bos_token};
auto prefix_match = [&text](std::string prefix) { return text.substr(0, prefix.length()) == prefix; };
if (std::any_of(prefixes_to_exclude.begin(), prefixes_to_exclude.end(), prefix_match)) {
tmp_ids.erase(tmp_ids.begin());
Expand Down Expand Up @@ -221,6 +221,10 @@ ov::EncodedResults ov::LLMPipeline::LLMPipelineImpl::generate(
} else if (auto callback = std::get_if<std::function<void(std::string)>>(&*streamer)) {
streamer_ptr = std::make_shared<TextCallbackStreamer>(m_tokenizer, *callback);
}
auto batch_size = input_ids.get_shape().at(0);
if ((batch_size != 1 || !config_helper.is_greedy_decoding()) && streamer_ptr) {
OPENVINO_THROW("Currently streaming is possible only with batch size=1 and greedy decoding");
}

auto attention_mask_data = attention_mask.has_value() ? *attention_mask : ov::generate_utils::init_attention_mask(input_ids);

Expand Down
8 changes: 1 addition & 7 deletions src/cpp/src/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,7 @@ class Tokenizer::TokenizerImpl {
m_tokenize_request.set_input_tensor(ov::Tensor{ov::element::string, {prompts.size()}, prompts.data()});
auto size_ = m_tokenize_request.get_input_tensor().get_shape();
m_tokenize_request.infer();

::pad_left(m_tokenize_request.get_tensor("input_ids"), m_tokenize_request.get_tensor("attention_mask"), m_pad_token_id);
// todo: fix mask filled with '2' instead of '0'
ov::Tensor attention_mask = m_tokenize_request.get_tensor("attention_mask");
int64_t* attention_mask_data = attention_mask.data<int64_t>();
std::replace(attention_mask_data, attention_mask_data + attention_mask.get_size(), 2, 0);

pad_left(m_tokenize_request.get_tensor("input_ids"), m_tokenize_request.get_tensor("attention_mask"), m_pad_token_id);
return {m_tokenize_request.get_tensor("input_ids"), m_tokenize_request.get_tensor("attention_mask")};
}

Expand Down

0 comments on commit 11fbaa2

Please sign in to comment.