From 1eacf30b5d30bcfbe1b4102149f0527df0948c9c Mon Sep 17 00:00:00 2001 From: Felix Marty Date: Mon, 3 Feb 2025 09:29:19 +0100 Subject: [PATCH] fix undefined variables Signed-off-by: Felix Marty --- .../layers/quantization/quark/schemes/quark_w8a8_fp8.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py index 888753932d162..2713a9d8cdfc0 100644 --- a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py +++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py @@ -37,9 +37,10 @@ def process_weights_after_loading(self, layer) -> None: weight=layer.weight, weight_scale=layer.weight_scale, input_scale=layer.input_scale) - if input_scale is not None: - layer.input_scale = Parameter(input_scale, - requires_grad=False) + else: + max_w_scale = layer.weight_scale + weight = layer.weight + input_scale = layer.input_scape max_w_scale, weight = requantize_with_max_scale( weight=weight, @@ -49,6 +50,8 @@ def process_weights_after_loading(self, layer) -> None: layer.weight = Parameter(weight.t(), requires_grad=False) layer.weight_scale = Parameter(max_w_scale, requires_grad=False) + if input_scale is not None: + layer.input_scale = Parameter(input_scale, requires_grad=False) # If channelwise, scales are already lined up, so just transpose. elif self.qscheme == "per_channel":