diff --git a/src/cpp/src/whisper_pipeline_static.cpp b/src/cpp/src/whisper_pipeline_static.cpp index 91de478b1c..3c5c3c1a9a 100644 --- a/src/cpp/src/whisper_pipeline_static.cpp +++ b/src/cpp/src/whisper_pipeline_static.cpp @@ -509,7 +509,7 @@ ov::InferRequest DecoderCache::get_model(uint8_t input_ids_size) { reshape_input_ids(m_decoder_model, input_ids_size); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU"); + ov::CompiledModel compiled_model = core.compile_model(m_decoder_model, "NPU", m_properties); ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder model"); m_cache.emplace(input_ids_size, compiled_model.create_infer_request()); } @@ -544,14 +544,14 @@ WhisperPipeline::StaticWhisperPipeline::StaticWhisperPipeline(const std::filesys preprocess_decoder(decoder_with_past_model); ov::CompiledModel compiled_model; - compiled_model = core.compile_model(encoder_model, "NPU"); + compiled_model = core.compile_model(encoder_model, "NPU", properties); ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper encoder model"); m_models.encoder = compiled_model.create_infer_request(); // Will compile decoder model when it's needed - m_decoder_cache = DecoderCache(decoder_model); + m_decoder_cache = DecoderCache(decoder_model, properties); - compiled_model = core.compile_model(decoder_with_past_model, "NPU"); + compiled_model = core.compile_model(decoder_with_past_model, "NPU", properties); ov::genai::utils::print_compiled_model_properties(compiled_model, "Static Whisper decoder with past model"); m_models.decoder_with_past = compiled_model.create_infer_request(); diff --git a/src/cpp/src/whisper_pipeline_static.hpp b/src/cpp/src/whisper_pipeline_static.hpp index b0618452d4..48425356b2 100644 --- a/src/cpp/src/whisper_pipeline_static.hpp +++ b/src/cpp/src/whisper_pipeline_static.hpp @@ -18,12 +18,15 @@ namespace genai { class DecoderCache { public: DecoderCache() = default; - DecoderCache(std::shared_ptr model) : m_decoder_model(model) {} + DecoderCache(std::shared_ptr model, ov::AnyMap properties) + : m_decoder_model(model) + , m_properties(properties) {} ov::InferRequest get_model(uint8_t input_ids_size); private: std::unordered_map m_cache; std::shared_ptr m_decoder_model; + ov::AnyMap m_properties; }; class WhisperPipeline::StaticWhisperPipeline : public WhisperPipeline::WhisperPipelineImplBase {