From 7c8ff06cb020a5b070e1038b91b769f420c84edf Mon Sep 17 00:00:00 2001 From: Anastasiya Pronina Date: Sat, 30 Nov 2024 00:43:38 +0000 Subject: [PATCH] Removed testing definitions in sample --- samples/cpp/chat_sample/chat_sample.cpp | 2 +- src/cpp/src/llm_pipeline_static.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/cpp/chat_sample/chat_sample.cpp b/samples/cpp/chat_sample/chat_sample.cpp index 70ecb93821..41d63fc0f1 100644 --- a/samples/cpp/chat_sample/chat_sample.cpp +++ b/samples/cpp/chat_sample/chat_sample.cpp @@ -10,7 +10,7 @@ int main(int argc, char* argv[]) try { std::string prompt; std::string models_path = argv[1]; - std::string device = "NPU"; // GPU, NPU can be used as well + std::string device = "CPU"; // GPU, NPU can be used as well ov::genai::LLMPipeline pipe(models_path, device); ov::genai::GenerationConfig config; diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index e4300f9248..113a422bcf 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -644,6 +644,10 @@ SMStaticLLMPipeline::SMStaticLLMPipeline( update_config(properties, {"NPUW_LLM_MAX_PROMPT_LEN", kMaxPromptLen}); update_config(properties, {"NPUW_LLM_MIN_RESPONSE_LEN", kMinResponseLen}); update_config(properties, {"NPUW_LLM_GENERATE_HINT", generate_hint}); + + // FIXME: Support CACHE_DIR in future + drop_cache_dir(properties); + auto compiled = core.compile_model(model, "NPU", properties); m_request = compiled.create_infer_request();