Skip to content

Commit

Permalink
Merge branch 'main' into kylesayrs/hooks-mixin-keep
Browse files Browse the repository at this point in the history
  • Loading branch information
dsikka authored Feb 4, 2025
2 parents ea4f2a2 + 9129872 commit 46ae8eb
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 26 deletions.
6 changes: 1 addition & 5 deletions examples/multimodal_audio/whisper_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from transformers import WhisperProcessor

from llmcompressor.modifiers.quantization import GPTQModifier
from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
from llmcompressor.transformers import oneshot
from llmcompressor.transformers.tracing import TraceableWhisperForConditionalGeneration

Expand Down Expand Up @@ -78,10 +77,7 @@ def data_collator(batch):


# Recipe
recipe = [
SmoothQuantModifier(smoothing_strength=0.8),
GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
]
recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"])

# Apply algorithms.
oneshot(
Expand Down
16 changes: 6 additions & 10 deletions examples/multimodal_vision/phi3_vision_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from transformers import AutoModelForCausalLM, AutoProcessor

from llmcompressor.modifiers.quantization import GPTQModifier
from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
from llmcompressor.transformers import oneshot

# Load model.
Expand Down Expand Up @@ -67,15 +66,12 @@ def data_collator(batch):


# Recipe
recipe = [
SmoothQuantModifier(smoothing_strength=0.8),
GPTQModifier(
targets="Linear",
scheme="W4A16",
sequential_targets=["Phi3DecoderLayer"],
ignore=["lm_head", "re:model.vision_embed_tokens.*"],
),
]
recipe = GPTQModifier(
targets="Linear",
scheme="W4A16",
sequential_targets=["Phi3DecoderLayer"],
ignore=["lm_head", "re:model.vision_embed_tokens.*"],
)

# Perform oneshot
oneshot(
Expand Down
6 changes: 4 additions & 2 deletions examples/multimodal_vision/pixtral_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@

# Oneshot arguments
DATASET_ID = "flickr30k"
DATASET_SPLIT = {"calibration": "test[:512]"}
NUM_CALIBRATION_SAMPLES = 512
DATASET_SPLIT = {"calibration": f"test[:{NUM_CALIBRATION_SAMPLES}]"}
MAX_SEQUENCE_LENGTH = 2048


# Define a oneshot data collator for multimodal inputs.
# NOTE: for transformers<4.48.0, please squeeze the first dimension of `pixel_values`
# by appending `[0]` to the end of line 32
def data_collator(batch):
assert len(batch) == 1
return {
"input_ids": torch.LongTensor(batch[0]["input_ids"]),
"attention_mask": torch.tensor(batch[0]["attention_mask"]),
"pixel_values": torch.tensor(batch[0]["pixel_values"])[0],
"pixel_values": torch.tensor(batch[0]["pixel_values"]),
}


Expand Down
14 changes: 5 additions & 9 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
import re
import shutil
from pathlib import Path
from typing import Callable

import pytest
import yaml
from huggingface_hub import HfApi
from loguru import logger
from parameterized import parameterized_class

from llmcompressor.core import active_session
from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing
Expand All @@ -34,15 +34,10 @@
]


@pytest.fixture
def record_config_file(record_testsuite_property: Callable[[str, object], None]):
test_data_file_name = TEST_DATA_FILE.split("configs/")[-1]
record_testsuite_property("TEST_DATA_FILE_NAME", test_data_file_name)


# Will run each test case in its own process through run_tests.sh
# emulating vLLM CI testing
@requires_gpu_count(1)
@parameterized_class("test_data_file", [(TEST_DATA_FILE,)])
@pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test")
class TestvLLM:
"""
Expand All @@ -62,7 +57,9 @@ class TestvLLM:
""" # noqa: E501

def set_up(self):
eval_config = yaml.safe_load(Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
eval_config = yaml.safe_load(
Path(self.test_data_file).read_text(encoding="utf-8")
)

if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
pytest.skip("Skipping test; cadence mismatch")
Expand Down Expand Up @@ -90,7 +87,6 @@ def set_up(self):
]
self.api = HfApi()

@pytest.mark.usefixtures("record_config_file")
def test_vllm(self):
# Run vLLM with saved model
import torch
Expand Down

0 comments on commit 46ae8eb

Please sign in to comment.