Skip to content

Commit

Permalink
[GPU] fix property overwritten issue (#28209)
Browse files Browse the repository at this point in the history
### Details:
- Avoid `ov::hint::dynamic_quantization_group_size` and
`ov::hint::kv_cache_precision` is overwritten to be default value if
`ExecutionConfig::apply_user_properties` is called twice.
 
 - For example
If user set `ov::hint::dynamic_quantization_group_size` to be 128, the
second `ExecutionConfig::apply_user_properties` calling will rewrite it
to be 32, such behavior will call performance drop on MTL 125H.
- This issue is brought by PR:
#26940


 - Performance  before and after this PR:


![image](https://github.com/user-attachments/assets/190048f7-fb77-490a-ba32-162f465bb233)

   
Test result on master branch:


![image](https://github.com/user-attachments/assets/37e9dd5d-b9ac-40e9-9db4-3ffe45425777)



### Tickets:
 - *[CVS-159322](https://jira.devtools.intel.com/browse/CVS-159322)*
  • Loading branch information
riverlijunjie authored Jan 3, 2025
1 parent 782accc commit 2e24dfa
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ class ExecutionConfig {
void apply_performance_hints(const cldnn::device_info& info);
void apply_priority_hints(const cldnn::device_info& info);
void apply_debug_options(const cldnn::device_info& info);
void update_specific_default_properties(const cldnn::device_info& info);

template <typename T, PropertyMutability mutability>
void apply_rt_info_property(const ov::Property<T, mutability>& property, const ov::RTMap& rt_info) {
Expand All @@ -167,6 +168,8 @@ class ExecutionConfig {

std::map<std::string, PropertyVisibility> supported_properties;
std::map<std::string, BaseValidator::Ptr> property_validators;

bool specific_default_properties_is_set = false;
};

} // namespace intel_gpu
Expand Down
30 changes: 20 additions & 10 deletions src/plugins/intel_gpu/src/runtime/execution_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,27 @@ void ExecutionConfig::apply_hints(const cldnn::device_info& info) {
apply_debug_options(info);
}

void ExecutionConfig::update_specific_default_properties(const cldnn::device_info& info) {
// These default properties should be set once.
if (specific_default_properties_is_set)
return;
specific_default_properties_is_set = true;

// Enable KV-cache compression by default for non-systolic platforms
if (get_property(ov::hint::kv_cache_precision) == ov::element::undefined && !info.supports_immad) {
set_property(ov::hint::kv_cache_precision(ov::element::i8));
}

// Enable dynamic quantization by default for non-systolic platforms
if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) {
set_property(ov::hint::dynamic_quantization_group_size(32));
}
}

void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
// Update specific default properties, call once before internal_properties updated.
update_specific_default_properties(info);

// Copy internal properties before applying hints to ensure that
// a property set by hint won't be overriden by a value in user config.
// E.g num_streams=AUTO && hint=THROUGHPUT
Expand All @@ -249,16 +269,6 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
}

// Enable KV-cache compression by default for non-systolic platforms
if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) {
set_property(ov::hint::kv_cache_precision(ov::element::i8));
}

// Enable dynamic quantization by default for non-systolic platforms
if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) {
set_property(ov::hint::dynamic_quantization_group_size(32));
}

user_properties.clear();
}

Expand Down

0 comments on commit 2e24dfa

Please sign in to comment.