Skip to content

Commit

Permalink
[LLM] [NPU] StaticLLMPipeline: Compiler DQ update (#1515)
Browse files Browse the repository at this point in the history
  • Loading branch information
smirnov-alexey authored Jan 13, 2025
1 parent 5146984 commit 3d226ec
Showing 1 changed file with 27 additions and 20 deletions.
47 changes: 27 additions & 20 deletions src/cpp/src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,17 +475,16 @@ std::optional<NPUDesc> extract_npu_descriptor(ov::Core& core) {
}
const auto arch = core.get_property("NPU", ov::device::architecture);
const auto max_tiles = core.get_property("NPU", ov::intel_npu::max_tiles);

bool compiler_dq = false;
const auto device_caps = core.get_property("NPU", ov::device::capabilities);
if (std::find(device_caps.begin(), device_caps.end(),
"COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) {
const auto supported_properties = core.get_property("NPU", ov::supported_properties);
if (std::find(supported_properties.begin(), supported_properties.end(),
"NPU_COMPILER_DYNAMIC_QUANTIZATION") != supported_properties.end()) {
compiler_dq = true;
}
return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq});
}

ov::AnyMap get_baseline_common_config() {
ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
ov::AnyMap config = {
{ "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm" },
{ "NPUW_DEVICES", "NPU" },
Expand All @@ -497,11 +496,20 @@ ov::AnyMap get_baseline_common_config() {
{ "NPUW_SLICE_OUT", "YES" },
{ "NPUW_FUNCALL_ASYNC", "YES" }
};
// FIXME: this config logic is getting more and more complex
if (npudesc.has_value() && npudesc->compiler_dq) {
config.emplace("NPUW_DQ", "YES");
config.emplace("NPUW_DQ_FULL", "NO");
config.emplace("NPU_COMPILER_DYNAMIC_QUANTIZATION", "YES");
config.erase("NPUW_DCOFF_TYPE");
config.erase("NPUW_DCOFF_SCALE");
}
return config;
}

ov::AnyMap get_default_common_config(const std::shared_ptr<ov::Model>& model) {
auto config = get_baseline_common_config();
ov::AnyMap get_default_common_config(const std::shared_ptr<ov::Model>& model,
const std::optional<NPUDesc>& npudesc) {
auto config = get_baseline_common_config(npudesc);
const char* npu_l0 = std::getenv("DISABLE_OPENVINO_GENAI_NPU_L0");
if (npu_l0 && std::atoi(npu_l0) == 1) {
config.emplace("NPUW_WEIGHTS_BANK_ALLOC", "CPU");
Expand All @@ -513,40 +521,39 @@ ov::AnyMap get_default_common_config(const std::shared_ptr<ov::Model>& model) {

ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model,
const std::optional<NPUDesc>& npudesc) {
auto config = get_default_common_config(model);
if (is_cw_compressed(model)) {
config.emplace("NPUW_DQ", "YES");
} else {
config.emplace("NPUW_PMM", "NO");
}
auto config = get_default_common_config(model, npudesc);
if (npudesc.has_value() &&
npudesc->arch == "4000" &&
npudesc->max_tiles != -1) {
config.emplace("NPU_DPU_GROUPS", npudesc->max_tiles);
}
if (npudesc.has_value() && npudesc->compiler_dq) {
config.emplace("NPUW_DQ_FULL", "NO");
// Specify NPUW DQ if Compiler DQ is not enabled
if (!npudesc.has_value() || !npudesc->compiler_dq) {
if (is_cw_compressed(model)) {
config.emplace("NPUW_DQ", "YES");
} else {
config.emplace("NPUW_PMM", "NO");
}
}
return config;
}

ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
const std::optional<NPUDesc>& npudesc,
const GenerateHint hint) {
auto config = get_default_common_config(model);
auto config = get_default_common_config(model, npudesc);
if (hint == GenerateHint::BEST_PERF) {
config.emplace("NPUW_ONLINE_PIPELINE", "NONE");
}
// NB: Unconditionally set for generation model
config.emplace("NPUW_DQ", "YES");
if (npudesc.has_value() && npudesc->arch == "4000") {
config.emplace("NPU_DPU_GROUPS", 4);
}
if (hint == GenerateHint::FAST_COMPILE) {
config.emplace("NPUW_UNFOLD_IREQS", "YES");
}
if (npudesc.has_value() && npudesc->compiler_dq) {
config.emplace("NPUW_DQ_FULL", "NO");
// Specify NPUW DQ if Compiler DQ is not enabled
if (!npudesc.has_value() || !npudesc->compiler_dq) {
config.emplace("NPUW_DQ", "YES");
}
return config;
}
Expand Down

0 comments on commit 3d226ec

Please sign in to comment.