Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Static llm pipeline dynamic shape model #1240

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
12 changes: 6 additions & 6 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,13 +678,13 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase {

/*
* NPU reads some properties from the config file, but when LLMPipeline is initialized
* from the model_str and weights_tensor, there are not files.
* from the model_str and weights_tensor, there are no files.
* In the later case ModelDesc is stored in properties.
* This function pops ModelDescr from the the properties and returns a pair of updated properties and ModelDescr.
*/
std::pair<ov::AnyMap, ov::genai::ModelConfigDesc> split_model_descr(const ov::AnyMap& properties) {
std::pair<ov::AnyMap, ov::genai::static_llm::ModelConfigDesc> split_model_descr(const ov::AnyMap& properties) {
ov::AnyMap main_properties = properties;
ov::genai::ModelConfigDesc model_descr;
ov::genai::static_llm::ModelConfigDesc model_descr;

auto pop_property = [](ov::AnyMap& orig_propertis, const std::string& key, auto& value) {
if (orig_propertis.find(key) != orig_propertis.end()) {
Expand Down Expand Up @@ -722,7 +722,7 @@ ov::genai::LLMPipeline::LLMPipeline(
auto [plugin_config, scheduler_config] = utils::split_scheduler_config(properties);
m_pimpl = std::make_unique<ContinuousBatchingAdapter>(models_path, tokenizer, scheduler_config, device, plugin_config);
} else if (device == "NPU") {
m_pimpl = std::make_unique<StaticLLMPipeline>(models_path, tokenizer, device, properties);
m_pimpl = static_llm::LLMPipelineFactory::create(models_path, tokenizer, device, properties);
} else {
m_pimpl = std::make_unique<StatefulLLMPipeline>(models_path, tokenizer, device, properties);
}
Expand All @@ -741,7 +741,7 @@ ov::genai::LLMPipeline::LLMPipeline(
auto [plugin_config, scheduler_config] = utils::split_scheduler_config(config);
m_pimpl = std::make_unique<ContinuousBatchingAdapter>(models_path, scheduler_config, device, plugin_config);
} else if (device == "NPU") {
m_pimpl = std::make_unique<StaticLLMPipeline>(models_path, device, config);
m_pimpl = static_llm::LLMPipelineFactory::create(models_path, device, config);
} else {
m_pimpl = std::make_unique<StatefulLLMPipeline>(models_path, device, config);
}
Expand Down Expand Up @@ -778,7 +778,7 @@ ov::genai::LLMPipeline::LLMPipeline(
// This will convert from AnyMap to ModelDesc.
auto [properties, model_descr] = split_model_descr(plugin_config);

m_pimpl = std::make_unique<StaticLLMPipeline>(
m_pimpl = static_llm::LLMPipelineFactory::create(
utils::singleton_core().read_model(model_str, weights_tensor),
model_descr,
tokenizer,
Expand Down
Loading
Loading