Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
Signed-off-by: Kyle Sayers <[email protected]>
kylesayrs committed Jan 30, 2025
1 parent 29f93d3 commit de38a64
Showing 7 changed files with 43 additions and 19 deletions.
5 changes: 4 additions & 1 deletion examples/quantization_w4a16/llama3_example.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,8 @@
from llmcompressor.transformers import oneshot

# Select model and load it.
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
#MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
@@ -22,6 +23,7 @@
# Increasing the number of samples can improve accuracy.
NUM_CALIBRATION_SAMPLES = 512
MAX_SEQUENCE_LENGTH = 2048
BATCH_SIZE = 2

# Load dataset and preprocess.
ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
@@ -64,6 +66,7 @@ def tokenize(sample):
recipe=recipe,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
per_device_oneshot_batch_size=BATCH_SIZE,
)

# Confirm generations of the quantized model look sane.
2 changes: 2 additions & 0 deletions src/llmcompressor/modifiers/quantization/gptq/base.py
Original file line number Diff line number Diff line change
@@ -254,6 +254,8 @@ def on_initialize(self, state: State, **kwargs) -> bool:
)
if isinstance(exception, unfixable_errors):
raise exception

raise exception

warnings.warn("Falling back to layer_sequential pipeline")
try:
1 change: 1 addition & 0 deletions src/llmcompressor/pipelines/sequential/helpers.py
Original file line number Diff line number Diff line change
@@ -71,6 +71,7 @@ def trace_subgraphs(
concrete_args = populate_concrete_args(model, sample_input)

# trace
breakpoint()
with (
calibration_forward_context(model),
HooksMixin.disable_hooks(),
8 changes: 2 additions & 6 deletions src/llmcompressor/transformers/finetune/data/base.py
Original file line number Diff line number Diff line change
@@ -53,11 +53,7 @@ def __init__(
self.tokenizer = getattr(self.processor, "tokenizer", self.processor)

if self.tokenizer is not None:
# fill in pad token
if not self.tokenizer.pad_token:
self.tokenizer.pad_token = self.tokenizer.eos_token

# configure sequence length
# resolve sequence length
max_seq_length = data_args.max_seq_length
if data_args.max_seq_length > self.tokenizer.model_max_length:
logger.warning(
@@ -69,7 +65,7 @@ def __init__(
data_args.max_seq_length, self.tokenizer.model_max_length
)

# configure padding
# resolve padding
self.padding = (
False
if self.data_args.concatenate_data
15 changes: 9 additions & 6 deletions src/llmcompressor/transformers/finetune/data/data_helpers.py
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ def format_calibration_data(
batch_size: int = 1,
do_shuffle: bool = True,
processor: Optional[Processor] = None,
collate_fn: Callable = default_data_collator,
collate_fn: Optional[Callable] = None,
accelerator: Optional[Any] = None,
) -> List[torch.Tensor]:
"""
@@ -41,7 +41,9 @@ def format_calibration_data(
:param num_calibration_samples: number of data samples to convert
:param do_shuffle: whether to shuffle the dataset before selecting calibration
samples, true by default
:param collate_fn: optional custom collate function, or use default
:param collate_fn: optional custom collate function, defaults to
`DataCollatorWithPadding` if None is provided. uses . If the tokenizer fails to
resolve, then `default_data_collator` is used
:param accelerator: optional accelerator for if preparing in FSDP mode
:return: list of trimmed calibration data tensors
"""
@@ -61,16 +63,17 @@ def format_calibration_data(
tokenized_calibration = tokenized_dataset.select(range(safe_calibration_samples))

# collate data
breakpoint()
if collate_fn is None:
tokenizer = getattr(processor, "tokenizer", processor)
if tokenizer is None:
if hasattr(tokenizer, "pad"):
collate_fn = DataCollatorWithPadding(tokenizer)
else:
warnings.warn(
"Could not find processor, attempting to collate with without padding "
"(may fail for batch_size > 1)"
)
return default_data_collator()

collate_fn = DataCollatorWithPadding(tokenizer)
collate_fn = default_data_collator

dataloader_params = {
"batch_size": batch_size,
7 changes: 4 additions & 3 deletions src/llmcompressor/transformers/finetune/runner.py
Original file line number Diff line number Diff line change
@@ -49,14 +49,15 @@ def __init__(
data_args: "DataTrainingArguments",
model_args: "ModelArguments",
training_args: "TrainingArguments",
processor: Processor,
):
self._data_args = data_args
self._model_args = model_args
self._training_args = training_args

self.datasets = {}
self.trainer = None
self.processor = None
self.processor = processor
self.parent_output_dir = self._training_args.output_dir
self._output_dir = self._training_args.output_dir

@@ -68,8 +69,8 @@ def populate_datasets(self, processor: Processor, add_labels: bool = True):
:param processor: processor or tokenizer to use for dataset tokenization
:param add_labels: if True, add labels column to dataset splits
"""
# TODO: remove `processor` arg in favor of self.processor
if self._data_args.dataset is None:
self.processor = self._model_args.processor
logger.info(
"Running oneshot without calibration data. This is expected for "
"weight-only and dynamic quantization"
@@ -110,7 +111,7 @@ def _get_split_name(inp_str):
registry_id,
data_args=self._data_args,
split=split_str,
processor=processor,
processor=self.processor,
)
tokenized_datasets[split_name] = dataset_manager(add_labels=add_labels)

24 changes: 21 additions & 3 deletions src/llmcompressor/transformers/finetune/text_generation.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@
import os
import warnings
from pathlib import PosixPath
from types import NoneType

from loguru import logger
from transformers import (
@@ -286,6 +287,20 @@ def initialize_processor_from_path(
return processor


def configure_processor(processor: Processor):
# configure tokenizer pad_token, required for padding and data collation
tokenizer = getattr(processor, "tokenizer", processor)
if getattr(tokenizer, "pad_token", None) is None:
if hasattr(tokenizer, "eos_token"):
logger.debug("Tokenizer is missing pad_token, using eos_token instead")
tokenizer.pad_token = tokenizer.eos_token
else:
logger.debug(
"Tokenizer is missing pad_token and eos_token, this may lead to issues "
" when padding"
)


def main(
model_args: ModelArguments,
data_args: DataTrainingArguments,
@@ -361,8 +376,9 @@ def main(
teacher.eval()

processor = model_args.processor
if isinstance(processor, str) or processor is None:
if isinstance(processor, (str, NoneType)):
processor = initialize_processor_from_path(model_args, model, teacher)
configure_processor(processor)

pre_initialize_structure(model=model)

@@ -371,10 +387,12 @@ def main(

# Load datasets
stage_runner = StageRunner(
model_args=model_args, data_args=data_args, training_args=training_args
model_args=model_args, data_args=data_args, training_args=training_args, processor=processor
)
add_labels = training_args.do_train or training_args.run_stages
stage_runner.populate_datasets(processor=processor, add_labels=add_labels)
stage_runner.populate_datasets(
processor=processor, add_labels=add_labels
)
train_dataset = stage_runner.get_dataset_split("train")
eval_dataset = stage_runner.get_dataset_split("validation")
calib_dataset = stage_runner.get_dataset_split("calibration")

0 comments on commit de38a64

Please sign in to comment.