Skip to content

Commit

Permalink
Added i64 support for text encoders inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed Dec 23, 2024
1 parent 05d01ac commit e6b39a3
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 28 deletions.
16 changes: 11 additions & 5 deletions src/cpp/src/image_generation/models/clip_text_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,20 @@ ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string
const size_t text_embedding_batch_size = do_classifier_free_guidance ? 2 : 1;

auto perform_tokenization = [&](const std::string& prompt, ov::Tensor input_ids) {
std::fill_n(input_ids.data<int32_t>(), input_ids.get_size(), pad_token_id);

ov::Tensor input_ids_token = m_clip_tokenizer.encode(prompt).input_ids;
std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<std::int32_t>());

if (input_ids.get_element_type() == ov::element::i32) {
std::fill_n(input_ids.data<int32_t>(), input_ids.get_size(), pad_token_id);
std::copy_n(input_ids_token.data<int64_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
} else {
std::fill_n(input_ids.data<int64_t>(), input_ids.get_size(), pad_token_id);
std::copy_n(input_ids_token.data<int64_t>(), input_ids_token.get_size(), input_ids.data<int64_t>());
}
};

ov::Tensor input_ids(ov::element::i32, {text_embedding_batch_size, m_config.max_position_embeddings});
ov::Tensor input_ids = m_request.get_input_tensor();
input_ids.set_shape({text_embedding_batch_size, m_config.max_position_embeddings});

size_t current_batch_idx = 0;

if (do_classifier_free_guidance) {
Expand All @@ -141,7 +148,6 @@ ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string
{current_batch_idx + 1, m_config.max_position_embeddings}));

// text embeddings
m_request.set_tensor("input_ids", input_ids);
m_request.infer();

return m_request.get_output_tensor(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,20 @@ ov::Tensor CLIPTextModelWithProjection::infer(const std::string& pos_prompt, con
const size_t text_embedding_batch_size = do_classifier_free_guidance ? 2 : 1;

auto perform_tokenization = [&](const std::string& prompt, ov::Tensor input_ids) {
std::fill_n(input_ids.data<int64_t>(), input_ids.get_size(), pad_token_id);

ov::Tensor input_ids_token = m_clip_tokenizer.encode(prompt).input_ids;
std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<std::int64_t>());

if (input_ids.get_element_type() == ov::element::i32) {
std::fill_n(input_ids.data<int32_t>(), input_ids.get_size(), pad_token_id);
std::copy_n(input_ids_token.data<int64_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
} else {
std::fill_n(input_ids.data<int64_t>(), input_ids.get_size(), pad_token_id);
std::copy_n(input_ids_token.data<int64_t>(), input_ids_token.get_size(), input_ids.data<int64_t>());
}
};

ov::Tensor input_ids(ov::element::i64, {text_embedding_batch_size, m_config.max_position_embeddings});
ov::Tensor input_ids = m_request.get_input_tensor();
input_ids.set_shape({text_embedding_batch_size, m_config.max_position_embeddings});

size_t current_batch_idx = 0;

if (do_classifier_free_guidance) {
Expand All @@ -132,7 +139,6 @@ ov::Tensor CLIPTextModelWithProjection::infer(const std::string& pos_prompt, con
{current_batch_idx + 1, m_config.max_position_embeddings}));

// text embeddings
m_request.set_tensor("input_ids", input_ids);
m_request.infer();

return m_request.get_output_tensor(0);
Expand Down
11 changes: 7 additions & 4 deletions src/cpp/src/image_generation/models/t5_encoder_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,14 @@ ov::Tensor T5EncoderModel::infer(const std::string& pos_prompt, const std::strin

auto perform_tokenization = [&](const std::string& prompt, ov::Tensor input_ids) {
ov::Tensor input_ids_token = m_tokenizer.encode(prompt).input_ids;
size_t min_length = std::min(input_ids.get_size(), input_ids_token.get_size());

std::fill_n(input_ids.data<int32_t>(), input_ids.get_size(), pad_token_id);
std::copy_n(input_ids_token.data<std::int64_t>(), min_length, input_ids.data<std::int32_t>());
if (input_ids.get_element_type() == ov::element::i32) {
std::fill_n(input_ids.data<int32_t>(), input_ids.get_size(), pad_token_id);
std::copy_n(input_ids_token.data<int64_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
} else {
std::fill_n(input_ids.data<int64_t>(), input_ids.get_size(), pad_token_id);
std::copy_n(input_ids_token.data<int64_t>(), input_ids_token.get_size(), input_ids.data<int64_t>());
}
};

ov::Tensor input_ids = m_request.get_input_tensor();
Expand Down Expand Up @@ -114,7 +118,6 @@ ov::Tensor T5EncoderModel::infer(const std::string& pos_prompt, const std::strin
{current_batch_idx + 1, input_ids.get_shape()[1]}));

// text embeddings
m_request.set_tensor("input_ids", input_ids);
m_request.infer();

return m_request.get_output_tensor(0);
Expand Down
16 changes: 4 additions & 12 deletions src/cpp/src/image_generation/models/unet_inference_dynamic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,26 @@ namespace genai {


class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel::UNetInference {

public:

virtual void compile(std::shared_ptr<ov::Model> model, const std::string& device, const ov::AnyMap& properties) override
{
virtual void compile(std::shared_ptr<ov::Model> model, const std::string& device, const ov::AnyMap& properties) override {
ov::Core core = utils::singleton_core();

ov::CompiledModel compiled_model = core.compile_model(model, device, properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "UNet 2D Condition dynamic model");
m_request = compiled_model.create_infer_request();
}

virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) override
{
virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) override {
OPENVINO_ASSERT(m_request, "UNet model must be compiled first");
m_request.set_tensor(tensor_name, encoder_hidden_states);
}

virtual void set_adapters(AdapterController &adapter_controller, const AdapterConfig& adapters) override
{
virtual void set_adapters(AdapterController &adapter_controller, const AdapterConfig& adapters) override {
OPENVINO_ASSERT(m_request, "UNet model must be compiled first");
adapter_controller.apply(m_request, adapters);
}

virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) override
{
virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) override {
OPENVINO_ASSERT(m_request, "UNet model must be compiled first. Cannot infer non-compiled model");

m_request.set_tensor("sample", sample);
Expand All @@ -49,10 +43,8 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel::
}

private:

ov::InferRequest m_request;
};


} // namespace genai
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel
ov::CompiledModel compiled_model = core.compile_model(model, device, properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "UNet 2D Condition batch-1 model");

for (int i = 0; i < m_native_batch_size; i++)
{
for (int i = 0; i < m_native_batch_size; i++) {
m_requests[i] = compiled_model.create_infer_request();
}
}
Expand Down

0 comments on commit e6b39a3

Please sign in to comment.