Skip to content

Commit

Permalink
Remove smoothquant from examples (#1121)
Browse files Browse the repository at this point in the history
## Purpose ##
* When writing the multimodal blog, I initially included smoothquant in
the examples. However I later decided to remove this and focus on W4A16
in order to maintain conciseness for the blog. This PR updates the
examples to reflect this

Signed-off-by: Kyle Sayers <[email protected]>
Co-authored-by: Dipika Sikka <[email protected]>
  • Loading branch information
kylesayrs and dsikka authored Feb 4, 2025
1 parent 206d894 commit 9129872
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 15 deletions.
6 changes: 1 addition & 5 deletions examples/multimodal_audio/whisper_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from transformers import WhisperProcessor

from llmcompressor.modifiers.quantization import GPTQModifier
from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
from llmcompressor.transformers import oneshot
from llmcompressor.transformers.tracing import TraceableWhisperForConditionalGeneration

Expand Down Expand Up @@ -78,10 +77,7 @@ def data_collator(batch):


# Recipe
recipe = [
SmoothQuantModifier(smoothing_strength=0.8),
GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
]
recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"])

# Apply algorithms.
oneshot(
Expand Down
16 changes: 6 additions & 10 deletions examples/multimodal_vision/phi3_vision_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from transformers import AutoModelForCausalLM, AutoProcessor

from llmcompressor.modifiers.quantization import GPTQModifier
from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
from llmcompressor.transformers import oneshot

# Load model.
Expand Down Expand Up @@ -67,15 +66,12 @@ def data_collator(batch):


# Recipe
recipe = [
SmoothQuantModifier(smoothing_strength=0.8),
GPTQModifier(
targets="Linear",
scheme="W4A16",
sequential_targets=["Phi3DecoderLayer"],
ignore=["lm_head", "re:model.vision_embed_tokens.*"],
),
]
recipe = GPTQModifier(
targets="Linear",
scheme="W4A16",
sequential_targets=["Phi3DecoderLayer"],
ignore=["lm_head", "re:model.vision_embed_tokens.*"],
)

# Perform oneshot
oneshot(
Expand Down

0 comments on commit 9129872

Please sign in to comment.