format

vllm-project · Jul 18, 2024 · 586dd80 · 586dd80
1 parent 5b05175
commit 586dd80
Showing 1 changed file with 2 additions and 3 deletions.
diff --git a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
@@ -143,17 +143,16 @@ def apply_fp8_linear(
             # Fallback for channelwise case, where the weight scales are
             # applied separately.
 
-
             # Symmetric quantized GEMM by definition computes the following:
             #   C = (s_x * X) (s_w * W) + bias
             # This is equivalent to dequantizing the weights and activations
             # before applying a GEMM.
             #
-            # In order to compute quantized operands, a quantized kernel 
+            # In order to compute quantized operands, a quantized kernel
             # will rewrite the above like so:
             #   C = s_w * s_x * (X * W) + bias
             #
-            # For the scaled_mm fallback case, we break this down, since it 
+            # For the scaled_mm fallback case, we break this down, since it
             # does not support s_w being a vector.
 
             # This computes C = sx * (X * W).