Merge branch 'main' into kylesayrs/hooks-mixin-keep

vllm-project · Feb 4, 2025 · 46ae8eb · 46ae8eb
2 parents ea4f2a2 + 9129872
commit 46ae8eb
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 26 deletions.
diff --git a/examples/multimodal_audio/whisper_example.py b/examples/multimodal_audio/whisper_example.py
@@ -3,7 +3,6 @@
 from transformers import WhisperProcessor
 
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
 from llmcompressor.transformers import oneshot
 from llmcompressor.transformers.tracing import TraceableWhisperForConditionalGeneration
 
@@ -78,10 +77,7 @@ def data_collator(batch):
 
 
 # Recipe
-recipe = [
-    SmoothQuantModifier(smoothing_strength=0.8),
-    GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
-]
+recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"])
 
 # Apply algorithms.
 oneshot(

diff --git a/examples/multimodal_vision/phi3_vision_example.py b/examples/multimodal_vision/phi3_vision_example.py
@@ -3,7 +3,6 @@
 from transformers import AutoModelForCausalLM, AutoProcessor
 
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
 from llmcompressor.transformers import oneshot
 
 # Load model.
@@ -67,15 +66,12 @@ def data_collator(batch):
 
 
 # Recipe
-recipe = [
-    SmoothQuantModifier(smoothing_strength=0.8),
-    GPTQModifier(
-        targets="Linear",
-        scheme="W4A16",
-        sequential_targets=["Phi3DecoderLayer"],
-        ignore=["lm_head", "re:model.vision_embed_tokens.*"],
-    ),
-]
+recipe = GPTQModifier(
+    targets="Linear",
+    scheme="W4A16",
+    sequential_targets=["Phi3DecoderLayer"],
+    ignore=["lm_head", "re:model.vision_embed_tokens.*"],
+)
 
 # Perform oneshot
 oneshot(

diff --git a/examples/multimodal_vision/pixtral_example.py b/examples/multimodal_vision/pixtral_example.py
@@ -16,18 +16,20 @@
 
 # Oneshot arguments
 DATASET_ID = "flickr30k"
-DATASET_SPLIT = {"calibration": "test[:512]"}
 NUM_CALIBRATION_SAMPLES = 512
+DATASET_SPLIT = {"calibration": f"test[:{NUM_CALIBRATION_SAMPLES}]"}
 MAX_SEQUENCE_LENGTH = 2048
 
 
 # Define a oneshot data collator for multimodal inputs.
+# NOTE: for transformers<4.48.0, please squeeze the first dimension of `pixel_values`
+# by appending `[0]` to the end of line 32
 def data_collator(batch):
     assert len(batch) == 1
     return {
         "input_ids": torch.LongTensor(batch[0]["input_ids"]),
         "attention_mask": torch.tensor(batch[0]["attention_mask"]),
-        "pixel_values": torch.tensor(batch[0]["pixel_values"])[0],
+        "pixel_values": torch.tensor(batch[0]["pixel_values"]),
     }
 
 

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
@@ -2,12 +2,12 @@
 import re
 import shutil
 from pathlib import Path
-from typing import Callable
 
 import pytest
 import yaml
 from huggingface_hub import HfApi
 from loguru import logger
+from parameterized import parameterized_class
 
 from llmcompressor.core import active_session
 from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing
@@ -34,15 +34,10 @@
 ]
 
 
-@pytest.fixture
-def record_config_file(record_testsuite_property: Callable[[str, object], None]):
-    test_data_file_name = TEST_DATA_FILE.split("configs/")[-1]
-    record_testsuite_property("TEST_DATA_FILE_NAME", test_data_file_name)
-
-
 # Will run each test case in its own process through run_tests.sh
 # emulating vLLM CI testing
 @requires_gpu_count(1)
+@parameterized_class("test_data_file", [(TEST_DATA_FILE,)])
 @pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test")
 class TestvLLM:
     """
@@ -62,7 +57,9 @@ class TestvLLM:
     """  # noqa: E501
 
     def set_up(self):
-        eval_config = yaml.safe_load(Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
+        eval_config = yaml.safe_load(
+            Path(self.test_data_file).read_text(encoding="utf-8")
+        )
 
         if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
             pytest.skip("Skipping test; cadence mismatch")
@@ -90,7 +87,6 @@ def set_up(self):
         ]
         self.api = HfApi()
 
-    @pytest.mark.usefixtures("record_config_file")
     def test_vllm(self):
         # Run vLLM with saved model
         import torch