Skip to content

Commit

Permalink
merge with main
Browse files Browse the repository at this point in the history
Signed-off-by: Kyle Sayers <[email protected]>
  • Loading branch information
kylesayrs committed Feb 13, 2025
1 parent 4093df7 commit 5b384bb
Show file tree
Hide file tree
Showing 58 changed files with 986 additions and 340 deletions.
13 changes: 0 additions & 13 deletions examples/automodelforcausallm/README.md

This file was deleted.

11 changes: 0 additions & 11 deletions examples/automodelforcausallm/run_automodelforcausallm.py

This file was deleted.

12 changes: 4 additions & 8 deletions examples/finetuning/example_alternating_recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ initial_sparsity_stage:
SparseGPTModifier:
sparsity: 0.5
block_size: 128
sequential_update: False
percdamp: 0.01
mask_structure: "0:0"
targets: [
"re:model.layers.\\d+$"
]
targets: ["Linear"]
ignore: ["re:.*lm_head"]
initial_training_stage:
run_type: train
pruning_modifiers:
Expand All @@ -22,12 +20,10 @@ next_sparsity_stage:
SparseGPTModifier:
sparsity: 0.7
block_size: 128
sequential_update: False
percdamp: 0.01
mask_structure: "0:0"
targets: [
"re:model.layers.\\d+$"
]
targets: ["Linear"]
ignore: ["re:.*lm_head"]
next_training_stage:
run_type: train
pruning_modifiers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ sparsity_stage:
SparseGPTModifier:
sparsity: 0.5
mask_structure: "2:4"
sequential_update: false
targets: ["Linear"]
ignore: ["re:.*lm_head"]
finetuning_stage:
run_type: train
finetuning_modifiers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ sparsity_stage:
SparseGPTModifier:
sparsity: 0.5
mask_structure: "2:4"
sequential_update: false
targets: ["Linear"]
ignore: ["re:.*lm_head"]
finetuning_stage:
run_type: train
finetuning_modifiers:
Expand Down
4 changes: 4 additions & 0 deletions examples/quantizing_moe/deepseek_moe_w4a16.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
from llmcompressor.transformers import oneshot
from llmcompressor.transformers.compression.helpers import calculate_offload_device_map

# NOTE: transformers 4.48.0 has an import error with DeepSeek.
# Please consider either downgrading your transformers version to a
# previous version or upgrading to a version where this bug is fixed

# select a Mixture of Experts model for quantization
MODEL_ID = "deepseek-ai/DeepSeek-V2.5"

Expand Down
4 changes: 4 additions & 0 deletions examples/quantizing_moe/deepseek_moe_w8a8_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from llmcompressor.modifiers.quantization import QuantizationModifier
from llmcompressor.transformers import oneshot

# NOTE: transformers 4.48.0 has an import error with DeepSeek.
# Please consider either downgrading your transformers version to a
# previous version or upgrading to a version where this bug is fixed

# select a Mixture of Experts model for quantization
MODEL_ID = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"

Expand Down
4 changes: 4 additions & 0 deletions examples/quantizing_moe/deepseek_moe_w8a8_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from llmcompressor.transformers import oneshot
from llmcompressor.transformers.compression.helpers import calculate_offload_device_map

# NOTE: transformers 4.48.0 has an import error with DeepSeek.
# Please consider either downgrading your transformers version to a
# previous version or upgrading to a version where this bug is fixed

# select a Mixture of Experts model for quantization
MODEL_ID = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"

Expand Down
13 changes: 12 additions & 1 deletion examples/sparse_2of4_quantization_fp8/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ oneshot(
)
```

3. **Save the Compressed Model**
### Saving the Compressed Model

The compressed model and tokenizer are saved to the output directory:

Expand All @@ -106,6 +106,17 @@ Output Directories:
- Without FP8: `Meta-Llama-3-8B-Instruct-2of4-sparse`
- With FP8: `Meta-Llama-3-8B-Instruct-2of4-W8A8-FP8-Dynamic-Per-Token`

#### Saving Without Sparse Compression

To save the model on disk without sparse compression:

```python
model.save_pretrained(save_dir, save_compressed=True, disable_sparse_compression=True)
tokenizer.save_pretrained(save_dir)
```

> **Note:** Saving a model with both the `save_compressed` and `disable_sparse_compression` options will compress the model using the quantization compressor; however, instead of using the more disk-efficient sparsity compressor(s), the dense sparsity compressor will be used. The `dense` sparsity compressor saves model params as is, and does not leverage sparsity for disk-efficient storage. These options only affect how the model(s) are saved on disk and do not impact the actual pruning or quantization processes.
### Validation

After compression, the script validates the model by generating a sample output:
Expand Down
4 changes: 3 additions & 1 deletion examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,7 @@ def get_recipe(fp8_enabled):
print("==========================================\n")

# Save compressed model and tokenizer
model.save_pretrained(save_dir, save_compressed=args.fp8)
model.save_pretrained(
save_dir, save_compressed=args.fp8, disable_sparse_compression=True
)
tokenizer.save_pretrained(save_dir)
2 changes: 1 addition & 1 deletion examples/trl_mixin/ex_trl_constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import DataCollatorForCompletionOnlyLM

from llmcompressor.transformers import TrainingArguments
from llmcompressor.args import TrainingArguments

model_path = "neuralmagic/Llama-2-7b-pruned50-retrained"
output_dir = "./output_trl_sft_test_7b_gsm8k_sft_data"
Expand Down
9 changes: 3 additions & 6 deletions examples/trl_mixin/ex_trl_distillation.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from sft_trainer import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, DefaultDataCollator

from llmcompressor.transformers import (
DataTrainingArguments,
TextGenerationDataset,
TrainingArguments,
)
from llmcompressor.args import DatasetArguments, TrainingArguments
from llmcompressor.transformers import TextGenerationDataset

model_path = "neuralmagic/Llama-2-7b-pruned50-retrained"
teacher_path = "neuralmagic/Llama-2-7b-gsm8k"
Expand All @@ -21,7 +18,7 @@
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load gsm8k using SparseML dataset tools
data_args = DataTrainingArguments(
data_args = DatasetArguments(
dataset="gsm8k", dataset_config_name="main", max_seq_length=512
)
dataset_manager = TextGenerationDataset.load_from_registry(
Expand Down
2 changes: 1 addition & 1 deletion examples/trl_mixin/sft_trainer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from trl import SFTConfig as TRLSFTConfig
from trl import SFTTrainer as TRLSFTTrainer

from llmcompressor.transformers import TrainingArguments
from llmcompressor.args import TrainingArguments
from llmcompressor.transformers.finetune.session_mixin import SessionManagerMixIn

__all__ = ["SFTTrainer"]
Expand Down
7 changes: 4 additions & 3 deletions src/llmcompressor/transformers/finetune/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@ train(

Finetuning arguments are split up into 3 groups:

* ModelArguments: `src/llmcompressor/transformers/finetune/model_args.py`
* TrainingArguments: `src/llmcompressor/transformers/finetune/training_args.py`
* DataTrainingArguments: `src/llmcompressor/transformers/finetune/data/data_training_args.py`
* ModelArguments: `src/llmcompressor/transformers/utils/arg_parser/model_arguments.py`
* TrainingArguments: `src/llmcompressor/transformers/utils/arg_parser/training_arguments.py`
* DatasetArguments: `src/llmcompressor/transformers/utils/arg_parser/dataset_arguments.py`
* RecipeArguments: `src/llmcompressor/transformers/utils/arg_parser/recipe_arguments.py`


## Running One-Shot with FSDP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ sparsity_stage:
SparseGPTModifier:
sparsity: 0.5
mask_structure: "2:4"
sequential_update: false
targets: ["Linear"]
ignore: ["re:.*lm_head"]
quantization_stage:
run_type: oneshot
quantization_modifiers:
Expand Down
3 changes: 2 additions & 1 deletion tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ sparsity_stage:
SparseGPTModifier:
sparsity: 0.5
mask_structure: "2:4"
sequential_update: false
targets: ["Linear"]
ignore: ["re:.*lm_head"]
quantization_stage:
run_type: oneshot
quantization_modifiers:
Expand Down
86 changes: 72 additions & 14 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os
import re
import shutil
from pathlib import Path
from typing import Callable

import pytest
import yaml
from huggingface_hub import HfApi
from loguru import logger
from parameterized import parameterized_class

from llmcompressor.core import active_session
from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing
Expand All @@ -20,19 +21,24 @@
vllm_installed = False
logger.warning("vllm is not installed. This test will be skipped")


HF_MODEL_HUB_NAME = "nm-testing"
TEST_DATA_FILE = os.environ.get("TEST_DATA_FILE", "")

TEST_DATA_FILE = os.environ.get("TEST_DATA_FILE", "")
SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")

@pytest.fixture
def record_config_file(record_testsuite_property: Callable[[str, object], None]):
test_data_file_name = TEST_DATA_FILE.split("configs/")[-1]
record_testsuite_property("TEST_DATA_FILE_NAME", test_data_file_name)
EXPECTED_SAVED_FILES = [
"config.json",
r"^model(?:-\d{5}-of-\d{5})?\.safetensors$",
"recipe.yaml",
"tokenizer.json",
]


# Will run each test case in its own process through run_tests.sh
# emulating vLLM CI testing
@requires_gpu_count(1)
@parameterized_class("test_data_file", [(TEST_DATA_FILE,)])
@pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test")
class TestvLLM:
"""
Expand All @@ -52,7 +58,9 @@ class TestvLLM:
""" # noqa: E501

def set_up(self):
eval_config = yaml.safe_load(Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
eval_config = yaml.safe_load(
Path(self.test_data_file).read_text(encoding="utf-8")
)

if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
pytest.skip("Skipping test; cadence mismatch")
Expand All @@ -65,6 +73,7 @@ def set_up(self):
self.recipe = eval_config.get("recipe")
self.quant_type = eval_config.get("quant_type")
self.save_dir = eval_config.get("save_dir")
self.save_compressed = eval_config.get("save_compressed", True)

logger.info("========== RUNNING ==============")
logger.info(self.scheme)
Expand All @@ -79,7 +88,6 @@ def set_up(self):
]
self.api = HfApi()

@pytest.mark.usefixtures("record_config_file")
def test_vllm(self):
# Run vLLM with saved model
import torch
Expand All @@ -100,11 +108,19 @@ def test_vllm(self):
quant_type=self.quant_type,
)

# check that session contains recipe
self._check_session_contains_recipe()

logger.info("================= SAVING TO DISK ======================")
oneshot_model.save_pretrained(self.save_dir)
oneshot_model.save_pretrained(
self.save_dir, save_compressed=self.save_compressed
)
tokenizer.save_pretrained(self.save_dir)
recipe_path = os.path.join(self.save_dir, "recipe.yaml")

# check that expected files exist
self._check_save_dir_has_expected_files()

# Use the session to fetch the recipe;
# Reset session for next test case
session = active_session()
Expand All @@ -113,12 +129,22 @@ def test_vllm(self):
fp.write(recipe_yaml_str)
session.reset()

logger.info("================= UPLOADING TO HUB ======================")
if SKIP_HF_UPLOAD.lower() != "yes":
logger.info("================= UPLOADING TO HUB ======================")

self.api.upload_folder(
repo_id=f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e",
folder_path=self.save_dir,
)
stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e"

self.api.create_repo(
repo_id=stub,
exist_ok=True,
repo_type="model",
private=False,
)

self.api.upload_folder(
repo_id=stub,
folder_path=self.save_dir,
)

logger.info("================= RUNNING vLLM =========================")

Expand Down Expand Up @@ -146,3 +172,35 @@ def test_vllm(self):
def tear_down(self):
if self.save_dir is not None:
shutil.rmtree(self.save_dir)

def _check_session_contains_recipe(self) -> None:
session = active_session()
recipe_yaml_str = session.get_serialized_recipe()
assert recipe_yaml_str is not None

def _check_save_dir_has_expected_files(self):
files = os.listdir(self.save_dir)
logger.debug("Saved files: ", files)

matched_patterns = set()

for expected in EXPECTED_SAVED_FILES:
# Find all files matching the expected pattern
matches = [
file
for file in files
if (
re.fullmatch(expected, file)
if expected.startswith("^")
else file == expected
)
]
if len(matches) > 0:
matched_patterns.add(expected)

assert len(matched_patterns) == len(EXPECTED_SAVED_FILES), (
"expected: ",
EXPECTED_SAVED_FILES,
"\n saved: ",
list(matched_patterns),
)
7 changes: 6 additions & 1 deletion tests/examples/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ def copy_and_run_command(


def copy_and_run_script(
tmp_path: Path, example_dir: str, script_filename: str
tmp_path: Path,
example_dir: str,
script_filename: str,
flags: Optional[list[str]] = None,
) -> Tuple[List[str], CompletedProcess[str]]:
"""
Copies the contents of example_dir (relative to the current working directory) to
Expand All @@ -81,6 +84,8 @@ def copy_and_run_script(
:return: subprocess.CompletedProcess object
"""
command = [sys.executable, script_filename]
if flags:
command.extend(flags)
return command, copy_and_run_command(tmp_path, example_dir, command)


Expand Down
Loading

0 comments on commit 5b384bb

Please sign in to comment.