diff --git a/examples/multimodal_audio/whisper_example.py b/examples/multimodal_audio/whisper_example.py index 303c9e935..917ae2a35 100644 --- a/examples/multimodal_audio/whisper_example.py +++ b/examples/multimodal_audio/whisper_example.py @@ -3,7 +3,6 @@ from transformers import WhisperProcessor from llmcompressor.modifiers.quantization import GPTQModifier -from llmcompressor.modifiers.smoothquant import SmoothQuantModifier from llmcompressor.transformers import oneshot from llmcompressor.transformers.tracing import TraceableWhisperForConditionalGeneration @@ -78,10 +77,7 @@ def data_collator(batch): # Recipe -recipe = [ - SmoothQuantModifier(smoothing_strength=0.8), - GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]), -] +recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]) # Apply algorithms. oneshot( diff --git a/examples/multimodal_vision/phi3_vision_example.py b/examples/multimodal_vision/phi3_vision_example.py index 73de4a356..6c9580b17 100644 --- a/examples/multimodal_vision/phi3_vision_example.py +++ b/examples/multimodal_vision/phi3_vision_example.py @@ -3,7 +3,6 @@ from transformers import AutoModelForCausalLM, AutoProcessor from llmcompressor.modifiers.quantization import GPTQModifier -from llmcompressor.modifiers.smoothquant import SmoothQuantModifier from llmcompressor.transformers import oneshot # Load model. @@ -67,15 +66,12 @@ def data_collator(batch): # Recipe -recipe = [ - SmoothQuantModifier(smoothing_strength=0.8), - GPTQModifier( - targets="Linear", - scheme="W4A16", - sequential_targets=["Phi3DecoderLayer"], - ignore=["lm_head", "re:model.vision_embed_tokens.*"], - ), -] +recipe = GPTQModifier( + targets="Linear", + scheme="W4A16", + sequential_targets=["Phi3DecoderLayer"], + ignore=["lm_head", "re:model.vision_embed_tokens.*"], +) # Perform oneshot oneshot( diff --git a/examples/multimodal_vision/pixtral_example.py b/examples/multimodal_vision/pixtral_example.py index ebb18df12..891819bc6 100644 --- a/examples/multimodal_vision/pixtral_example.py +++ b/examples/multimodal_vision/pixtral_example.py @@ -16,18 +16,20 @@ # Oneshot arguments DATASET_ID = "flickr30k" -DATASET_SPLIT = {"calibration": "test[:512]"} NUM_CALIBRATION_SAMPLES = 512 +DATASET_SPLIT = {"calibration": f"test[:{NUM_CALIBRATION_SAMPLES}]"} MAX_SEQUENCE_LENGTH = 2048 # Define a oneshot data collator for multimodal inputs. +# NOTE: for transformers<4.48.0, please squeeze the first dimension of `pixel_values` +# by appending `[0]` to the end of line 32 def data_collator(batch): assert len(batch) == 1 return { "input_ids": torch.LongTensor(batch[0]["input_ids"]), "attention_mask": torch.tensor(batch[0]["attention_mask"]), - "pixel_values": torch.tensor(batch[0]["pixel_values"])[0], + "pixel_values": torch.tensor(batch[0]["pixel_values"]), } diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 70a6a35e4..d233f5ee1 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -2,12 +2,12 @@ import re import shutil from pathlib import Path -from typing import Callable import pytest import yaml from huggingface_hub import HfApi from loguru import logger +from parameterized import parameterized_class from llmcompressor.core import active_session from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing @@ -34,15 +34,10 @@ ] -@pytest.fixture -def record_config_file(record_testsuite_property: Callable[[str, object], None]): - test_data_file_name = TEST_DATA_FILE.split("configs/")[-1] - record_testsuite_property("TEST_DATA_FILE_NAME", test_data_file_name) - - # Will run each test case in its own process through run_tests.sh # emulating vLLM CI testing @requires_gpu_count(1) +@parameterized_class("test_data_file", [(TEST_DATA_FILE,)]) @pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test") class TestvLLM: """ @@ -62,7 +57,9 @@ class TestvLLM: """ # noqa: E501 def set_up(self): - eval_config = yaml.safe_load(Path(TEST_DATA_FILE).read_text(encoding="utf-8")) + eval_config = yaml.safe_load( + Path(self.test_data_file).read_text(encoding="utf-8") + ) if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"): pytest.skip("Skipping test; cadence mismatch") @@ -90,7 +87,6 @@ def set_up(self): ] self.api = HfApi() - @pytest.mark.usefixtures("record_config_file") def test_vllm(self): # Run vLLM with saved model import torch