Remove smoothquant from examples (#1121)

## Purpose ## * When writing the multimodal blog, I initially included smoothquant in the examples. However I later decided to remove this and focus on W4A16 in order to maintain conciseness for the blog. This PR updates the examples to reflect this Signed-off-by: Kyle Sayers <[email protected]> Co-authored-by: Dipika Sikka <[email protected]>
vllm-project · Feb 4, 2025 · 9129872 · 9129872
1 parent 206d894
commit 9129872
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 15 deletions.
diff --git a/examples/multimodal_audio/whisper_example.py b/examples/multimodal_audio/whisper_example.py
@@ -3,7 +3,6 @@
 from transformers import WhisperProcessor
 
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
 from llmcompressor.transformers import oneshot
 from llmcompressor.transformers.tracing import TraceableWhisperForConditionalGeneration
 
@@ -78,10 +77,7 @@ def data_collator(batch):
 
 
 # Recipe
-recipe = [
-    SmoothQuantModifier(smoothing_strength=0.8),
-    GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
-]
+recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"])
 
 # Apply algorithms.
 oneshot(

diff --git a/examples/multimodal_vision/phi3_vision_example.py b/examples/multimodal_vision/phi3_vision_example.py
@@ -3,7 +3,6 @@
 from transformers import AutoModelForCausalLM, AutoProcessor
 
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
 from llmcompressor.transformers import oneshot
 
 # Load model.
@@ -67,15 +66,12 @@ def data_collator(batch):
 
 
 # Recipe
-recipe = [
-    SmoothQuantModifier(smoothing_strength=0.8),
-    GPTQModifier(
-        targets="Linear",
-        scheme="W4A16",
-        sequential_targets=["Phi3DecoderLayer"],
-        ignore=["lm_head", "re:model.vision_embed_tokens.*"],
-    ),
-]
+recipe = GPTQModifier(
+    targets="Linear",
+    scheme="W4A16",
+    sequential_targets=["Phi3DecoderLayer"],
+    ignore=["lm_head", "re:model.vision_embed_tokens.*"],
+)
 
 # Perform oneshot
 oneshot(