Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into properties
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed Dec 19, 2024
2 parents e0c2f17 + 499096a commit 7201694
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
const std::string& device,
const ov::AnyMap& plugin_config
) : StatefulLLMPipeline{
utils::singleton_core().read_model(models_path, {}, plugin_config),
utils::singleton_core().read_model(models_path / "openvino_model.xml", {}, plugin_config),
tokenizer,
device,
plugin_config,
Expand Down
2 changes: 1 addition & 1 deletion tools/llm_bench/task/speech_to_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
- np.array(perf_metrics.raw_metrics.m_new_token_times[:-1])
).tolist()
tm_list = (np.array([first_token_time] + second_tokens_durations) / 1000).tolist()
tm_infer_list = None
tm_infer_list = (np.array(perf_metrics.raw_metrics.token_infer_durations) / 1000 / 1000).tolist()
result_text = result_text.texts[0]
else:
start = time.perf_counter()
Expand Down
3 changes: 2 additions & 1 deletion tools/llm_bench/task/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def token_printer():
).tolist()

tm_list = np.array([first_token_time] + second_tokens_durations) / 1000
inference_durations = (np.array(perf_metrics.raw_metrics.token_infer_durations) / 1000 / 1000).tolist()
log.debug('latency of all tokens:')
[log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_list)]
iter_data = gen_output_data.gen_iterate_data(
Expand All @@ -323,7 +324,7 @@ def token_printer():
num,
iter_data,
tm_list.tolist(),
None,
inference_durations.tolist(),
warm_up=(num == 0),
max_rss_mem=max_rss_mem_consumption,
max_shared_mem=max_shared_mem_consumption,
Expand Down
3 changes: 2 additions & 1 deletion tools/llm_bench/task/visual_language_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,12 @@ def run_visual_language_generation_genai(
mm_embeddings_preparation_time=perf_metrics.get_prepare_embeddings_duration().mean
)
iter_data_list.append(iter_data)
inference_durations = np.array(perf_metrics.raw_metrics.token_infer_durations) / 1000 / 1000
metrics_print.print_metrics(
num,
iter_data,
tm_list.tolist(),
None,
inference_durations.tolist(),
warm_up=(num == 0),
max_rss_mem=max_rss_mem_consumption,
max_shared_mem=max_shared_mem_consumption,
Expand Down

0 comments on commit 7201694

Please sign in to comment.