From 589a540dcc3cd4a98533eba24b7cc9dcc19ae153 Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Fri, 31 Jan 2025 16:08:13 +0000 Subject: [PATCH 1/4] Properly match fused layers --- .../quantization/compressed_tensors/utils.py | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index 8fcbda377428e..afafd9d2ce90b 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -102,8 +102,8 @@ def find_matched_target(layer_name: Optional[str], module: Module, layer_name = "" matched_target = (_find_first_match(layer_name, targets) - or _find_first_match(module.__class__.__name__, targets, - True)) + or _find_first_match(module.__class__.__name__, targets, True) + or _match_fused_layer(layer_name, targets)) if matched_target is None: raise ValueError(f"Unable to find matching target for {module} in the " @@ -152,3 +152,38 @@ def _is_equal_or_regex_match(value: str, elif target == value: return True return False + + +def _match_fused_layer(layer_name: str, + target_layers: Iterable[str]) -> Optional[str]: + """ + Match a fused layer name to its corresponding individual layer in target_layers. + + Examples: + layer_name = "model.layers.0.self_attn.qkv_proj" + target_layers = ["model.layers.0.self_attn.q_proj", + "model.layers.0.self_attn.k_proj", + "model.layers.0.self_attn.v_proj"] + """ + # Split into parent path and layer type + # e.g., "model.layers.0.self_attn" and "qkv_proj" + parent_path = ".".join(layer_name.split(".")[:-1]) + layer_type = layer_name.split(".")[-1] + + if layer_type not in FUSED_LAYER_NAME_MAPPING: + return None + + possible_layer_types = FUSED_LAYER_NAME_MAPPING[layer_type] + + # Look for a target layer that: + # 1. Has the same parent path + # 2. Ends with one of the possible individual layer types + for target in target_layers: + is_same_parent = parent_path in target + is_matching_type = any(type_suffix in target + for type_suffix in possible_layer_types) + + if is_same_parent and is_matching_type: + return target + + return None From e2f9563c125f2a523b99c9d7e79ad9dde35952ad Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Fri, 31 Jan 2025 16:49:46 +0000 Subject: [PATCH 2/4] fix line length --- .../layers/quantization/compressed_tensors/utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index afafd9d2ce90b..e2e7966de8456 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -101,9 +101,11 @@ def find_matched_target(layer_name: Optional[str], module: Module, if layer_name is None: layer_name = "" - matched_target = (_find_first_match(layer_name, targets) - or _find_first_match(module.__class__.__name__, targets, True) - or _match_fused_layer(layer_name, targets)) + matched_target = ( + _find_first_match(layer_name, targets) + or _find_first_match(module.__class__.__name__, targets, True) + or _match_fused_layer(layer_name, targets) + ) if matched_target is None: raise ValueError(f"Unable to find matching target for {module} in the " @@ -157,7 +159,8 @@ def _is_equal_or_regex_match(value: str, def _match_fused_layer(layer_name: str, target_layers: Iterable[str]) -> Optional[str]: """ - Match a fused layer name to its corresponding individual layer in target_layers. + Match a fused layer name to its corresponding individual layer in + target_layers. Examples: layer_name = "model.layers.0.self_attn.qkv_proj" From 777baaed4c89022390867d255fd9d7a4623fb8bd Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Fri, 31 Jan 2025 20:11:05 +0000 Subject: [PATCH 3/4] fix style --- .../layers/quantization/compressed_tensors/utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index e2e7966de8456..1da7cc123d43f 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -101,11 +101,10 @@ def find_matched_target(layer_name: Optional[str], module: Module, if layer_name is None: layer_name = "" - matched_target = ( - _find_first_match(layer_name, targets) - or _find_first_match(module.__class__.__name__, targets, True) - or _match_fused_layer(layer_name, targets) - ) + matched_target = (_find_first_match(layer_name, targets) + or _find_first_match(module.__class__.__name__, targets, + True) + or _match_fused_layer(layer_name, targets)) if matched_target is None: raise ValueError(f"Unable to find matching target for {module} in the " @@ -184,7 +183,7 @@ def _match_fused_layer(layer_name: str, for target in target_layers: is_same_parent = parent_path in target is_matching_type = any(type_suffix in target - for type_suffix in possible_layer_types) + for type_suffix in possible_layer_types) if is_same_parent and is_matching_type: return target From cf2511c5b21756e7d84c1f87ab440e8c2b7e8b81 Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Fri, 31 Jan 2025 21:39:10 +0000 Subject: [PATCH 4/4] make sure all fused layers have the same quant --- .../layers/quantization/compressed_tensors/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index 1da7cc123d43f..5bab5a02d83b7 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -185,7 +185,9 @@ def _match_fused_layer(layer_name: str, is_matching_type = any(type_suffix in target for type_suffix in possible_layer_types) - if is_same_parent and is_matching_type: + if is_same_parent and is_matching_type and all( + '.'.join([parent_path, type_suffix]) + for type_suffix in possible_layer_types): return target return None