From 1eacf30b5d30bcfbe1b4102149f0527df0948c9c Mon Sep 17 00:00:00 2001
From: Felix Marty <felmarty@amd.com>
Date: Mon, 3 Feb 2025 09:29:19 +0100
Subject: [PATCH] fix undefined variables

Signed-off-by: Felix Marty <felmarty@amd.com>
---
 .../layers/quantization/quark/schemes/quark_w8a8_fp8.py  | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py
index 888753932d162..2713a9d8cdfc0 100644
--- a/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py
+++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py
@@ -37,9 +37,10 @@ def process_weights_after_loading(self, layer) -> None:
                     weight=layer.weight,
                     weight_scale=layer.weight_scale,
                     input_scale=layer.input_scale)
-                if input_scale is not None:
-                    layer.input_scale = Parameter(input_scale,
-                                                  requires_grad=False)
+            else:
+                max_w_scale = layer.weight_scale
+                weight = layer.weight
+                input_scale = layer.input_scape
 
             max_w_scale, weight = requantize_with_max_scale(
                 weight=weight,
@@ -49,6 +50,8 @@ def process_weights_after_loading(self, layer) -> None:
 
             layer.weight = Parameter(weight.t(), requires_grad=False)
             layer.weight_scale = Parameter(max_w_scale, requires_grad=False)
+            if input_scale is not None:
+                layer.input_scale = Parameter(input_scale, requires_grad=False)
 
         # If channelwise, scales are already lined up, so just transpose.
         elif self.qscheme == "per_channel":