Skip to content

Commit

Permalink
Merge branch 'master' into dm/npu_llm_stateful_default
Browse files Browse the repository at this point in the history
  • Loading branch information
dmatveev authored Jan 17, 2025
2 parents f33df3c + bb6138e commit 049732a
Show file tree
Hide file tree
Showing 13 changed files with 743 additions and 9 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,36 @@ jobs:
<<< $'Who drew this painting?\nWhen did the painter live?'
timeout-minutes: 4

visual_language_chat_sample-ubuntu-qwen2vl:
runs-on: ubuntu-22.04-16-cores
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-python@v4
with:
python-version: 3.11
- uses: ./.github/actions/install_openvino
with:
ov_link: ${{ env.l_u22_ov_link }}
- uses: ./.github/actions/build_app
with:
build_target: 'visual_language_chat py_openvino_genai'
- uses: ./.github/actions/install_python_deps
- name: Download and convert Qwen2VL model
run: |
source ./ov/setupvars.sh
optimum-cli export openvino --model Qwen/Qwen2-VL-2B-Instruct ./qwen2_vl_2b_ov/ --trust-remote-code
- name: Download images
run: |
wget https://llava-vl.github.io/static/images/monalisa.jpg
- name: Run visual_language_chat C++ sample - Qwen2VL
run: >
source ./ov/setupvars.sh
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./qwen2_vl_2b_ov/ monalisa.jpg
<<< $'Who drew this painting?\nWhen did the painter live?'
timeout-minutes: 4

cpp-continuous-batching-ubuntu:
runs-on: ubuntu-20.04-8-cores
defaults:
Expand Down
11 changes: 11 additions & 0 deletions SUPPORTED_MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,17 @@ In addition to image generation models, `InpaintingPipeline` supports specialize
</ul>
</td>
</tr>
<tr>
<td><code>Qwen2-VL</code></td>
<td>Qwen2-VL</td>
<td>Not supported</td>
<td>
<ul>
<li><a href="https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct"><code>Qwen/Qwen2-VL-2B-Instruct</code></a></li>
<li><a href="https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct"><code>Qwen/Qwen2-VL-7B-Instruct</code></a></li>
</ul>
</td>
</tr>
</tbody>
</table>

Expand Down
26 changes: 24 additions & 2 deletions src/cpp/src/lm_encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@ void update_position_ids(ov::Tensor&& position_ids, const ov::Tensor&& attention
}
}

void update_3d_position_ids(ov::Tensor&& position_ids, const ov::Tensor& attention_mask, const int64_t rope_delta) {
const size_t batch_size = attention_mask.get_shape().at(0);
const size_t sequence_length = attention_mask.get_shape().at(1);
const size_t thw_dim_size = 3;

position_ids.set_shape({thw_dim_size, batch_size, 1});
int64_t* position_ids_data = position_ids.data<int64_t>();

int64_t pos_id = static_cast<int64_t>(sequence_length) - 1 + rope_delta;

for (size_t batch = 0; batch < batch_size; batch++) {
for (size_t dim = 0; dim < thw_dim_size; ++dim) {
position_ids_data[dim * batch_size + batch] = pos_id;
}
}
}

void update_attention_mask_with_beams(ov::Tensor&& attention_mask, std::vector<int32_t> next_beams) {
ov::Tensor original_mask{ov::element::i64, attention_mask.get_shape()};
ov::Shape original_shape = original_mask.get_shape();
Expand Down Expand Up @@ -58,7 +75,8 @@ std::pair<EncodedResults, std::optional<int64_t>> get_lm_encoded_results(
Sampler& sampler,
std::vector<SequenceGroup::Ptr> sequence_groups,
std::optional<ov::Tensor> position_ids,
std::optional<EmbeddingsModel> m_embedding
std::optional<EmbeddingsModel> m_embedding,
std::optional<int64_t> rope_delta
) {
std::vector<GenerationHandle> generations;
for (SequenceGroup::Ptr sequence_group : sequence_groups) {
Expand Down Expand Up @@ -196,7 +214,11 @@ std::pair<EncodedResults, std::optional<int64_t>> get_lm_encoded_results(
update_attention_mask_with_beams(m_llm.get_tensor("attention_mask"), next_beams);

if (position_ids.has_value()) {
update_position_ids(m_llm.get_tensor("position_ids"), m_llm.get_tensor("attention_mask"));
if (position_ids->get_shape().size() == 3 && rope_delta.has_value()) {
update_3d_position_ids(m_llm.get_tensor("position_ids"), m_llm.get_tensor("attention_mask"), rope_delta.value());
} else {
update_position_ids(m_llm.get_tensor("position_ids"), m_llm.get_tensor("attention_mask"));
}
}

m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, next_beams.data()});
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/lm_encoding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace genai {

std::pair<EncodedResults, std::optional<int64_t>> get_lm_encoded_results(ov::InferRequest& m_llm, const ov::Tensor& input_ids, const ov::Tensor& attention_mask,
const std::shared_ptr<StreamerBase>& streamer_ptr, Sampler& sampler, std::vector<SequenceGroup::Ptr> sequence_groups,
std::optional<ov::Tensor> position_ids, std::optional<EmbeddingsModel> m_embedding);
std::optional<ov::Tensor> position_ids, std::optional<EmbeddingsModel> m_embedding, std::optional<int64_t> rope_delta = std::nullopt);

}
}
Loading

0 comments on commit 049732a

Please sign in to comment.