Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix End Epoch Default #39

Merged
merged 2 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/llmcompressor/modifiers/distillation/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def on_initialize(self, state: State, **kwargs) -> bool:
hidden_size = (
kwargs.get("metadata").get("per_device_train_batch_size", 1),
kwargs.get("metadata").get("max_seq_length", 512),
state.model.model.config.hidden_size,
state.model.config.hidden_size,
)

for target in (
Expand Down Expand Up @@ -77,18 +77,18 @@ def on_initialize(self, state: State, **kwargs) -> bool:
)
self.wrappers_[key] = (student_wrapper, teacher_wrapper)

with summon_full_params_context(state.teacher_model.model, offload_to_cpu=True):
with summon_full_params_context(state.teacher_model, offload_to_cpu=True):
for key, (student_wrapper, teacher_wrapper) in self.wrappers_.items():
set_layer(key, student_wrapper, state.model)
set_layer(key, teacher_wrapper, state.teacher_model)

self.wrapped_kd_model_ = self._create_model_wrapper(
student_model=maybe_get_wrapped(state.model),
teacher_model=state.teacher_model.model,
teacher_model=state.teacher_model,
state=state,
)

set_wrapped_model(state.model, self.wrapped_kd_model_)
set_wrapped_model(state, self.wrapped_kd_model_)

# for square-head distillation we want to scale the loss by the number of
# layers if the user doesn't alter the default scale. This is done so the
Expand All @@ -99,9 +99,9 @@ def on_initialize(self, state: State, **kwargs) -> bool:
return True

def on_finalize(self, state: State, **kwargs) -> bool:
set_wrapped_model(state.model, self.wrapped_kd_model_.student_model)
set_wrapped_model(state, self.wrapped_kd_model_.student_model)

with summon_full_params_context(state.teacher_model.model, offload_to_cpu=True):
with summon_full_params_context(state.teacher_model, offload_to_cpu=True):
for key, (student_wrapper, teacher_wrapper) in self.wrappers_.items():
set_layer(key, student_wrapper.layer, state.model)
set_layer(key, teacher_wrapper.layer, state.teacher_model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def recursive_combine(
val_two: TensorOrCollectionType,
func: Callable[[Tensor, Tensor], Tensor],
):
if isinstance(val_one, type(val_two)):
if not isinstance(val_one, type(val_two)):
raise ValueError(
f"val_one type of {type(val_one)} must match "
f"val_two type of {type(val_two)}"
Expand Down
4 changes: 2 additions & 2 deletions src/llmcompressor/modifiers/modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ class Modifier(BaseModel, ModifierInterface):

index: Optional[int] = None
group: Optional[str] = None
start: Optional[float] = -1
end: Optional[float] = -1
start: Optional[float] = None
end: Optional[float] = None
update: Optional[float] = None

initialized_structure_: bool = False
Expand Down
2 changes: 1 addition & 1 deletion src/llmcompressor/recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ def create_recipe_string_from_modifiers(
recipe_dict = {
f"{modifier_group_name}_stage": {
f"{default_group_name}_modifiers": {
modifier.__class__.__name__: modifier.model_dump()
modifier.__class__.__name__: modifier.model_dump(exclude_unset=True)
for modifier in modifiers
}
}
Expand Down
17 changes: 8 additions & 9 deletions src/llmcompressor/utils/fsdp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import torch
from torch.nn import Module

from llmcompressor.core.state import State
from llmcompressor.pytorch.model_load.helpers import save_model_and_recipe
from llmcompressor.utils.pytorch import set_layer

Expand Down Expand Up @@ -56,20 +57,18 @@ def maybe_get_wrapped(model: Module) -> Module:
return model


def set_wrapped_model(model: Module, wrapped_model: Module):
def set_wrapped_model(state: State, wrapped_model: Module):
"""
Given a model that may or may not have a distributed wrapper, set the underlying
wrapped model.

#TODO: will probably have to fix this
Given a state with a model that may or may not have a distributed wrapper, set
the underlying wrapped model.

:param input_model: input model to be updated
:param state: state to update model of
:param updated_wrapped: model to inject into input_model
"""
if is_fsdp_model(model):
model._fsdp_wrapped_module = wrapped_model
if is_fsdp_model(state.model):
state.model._fsdp_wrapped_module = wrapped_model
else:
model = wrapped_model
state.model = wrapped_model


def unwrap_and_export_model(model, accelerator, output_dir, tokenizer):
Expand Down
6 changes: 2 additions & 4 deletions tests/llmcompressor/recipe/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,11 @@ def test_recipe_can_be_created_from_modifier_instances():


class A_FirstDummyModifier(Modifier):
def model_dump(self):
return {}
pass


class B_SecondDummyModifier(Modifier):
def model_dump(self):
return {}
pass


def test_create_recipe_string_from_modifiers_with_default_group_name():
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import os
import shutil
import unittest

import pytest
from compressed_tensors.compressors.model_compressor import ModelCompressor
from parameterized import parameterized_class
from transformers import AutoConfig

from tests.testing_utils import parse_params, requires_gpu, requires_torch

Expand Down Expand Up @@ -35,6 +38,18 @@ def _test_oneshot_and_finetune(self):
dataset_config_name=self.dataset_config_name,
)

config_os = ModelCompressor.parse_sparsity_config(
AutoConfig.from_pretrained(
os.path.join(self.output, "stage_test_oneshot")
).compression_config
)
config_ft = ModelCompressor.parse_sparsity_config(
AutoConfig.from_pretrained(
os.path.join(self.output, "stage_test_oneshot")
).compression_config
)
assert config_ft["global_sparsity"] >= config_os["global_sparsity"]

def tearDown(self):
# TODO: we get really nice stats from finetune that we should log
# stored in results.json
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Any, Dict, Optional, Union

import pytest
from datasets import load_dataset
from torch.nn import Module
from transformers import AutoModelForCausalLM, Trainer

Expand Down Expand Up @@ -44,8 +43,8 @@ def mixin_trainer():
model_state_path = "Xenova/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_state_path)
recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"
train_dataset = load_dataset("garage-bAInd/Open-Platypus", split="train[:5%]")
eval_dataset = load_dataset("garage-bAInd/Open-Platypus", split="train[5%:6%]")
train_dataset = "open-platypus"
eval_dataset = "open-platypus"

return MixInTest(
model=model,
Expand Down
Loading