Skip to content

Commit

Permalink
Supports W8A8 quantization for more models
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan committed Dec 3, 2024
1 parent 3913ead commit 9627436
Show file tree
Hide file tree
Showing 9 changed files with 15 additions and 5,357 deletions.
3 changes: 3 additions & 0 deletions lmdeploy/lite/apis/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
'ChatGLMForConditionalGeneration': 'GLMBlock',
'MixtralForCausalLM': 'MixtralDecoderLayer',
'Qwen2VLForConditionalGeneration': 'Qwen2VLDecoderLayer',
'MistralForCausalLM': 'MistralDecoderLayer',
}

NORM_TYPE_MAP = {
Expand All @@ -44,6 +45,7 @@
'ChatGLMForConditionalGeneration': 'RMSNorm',
'MixtralForCausalLM': 'MixtralRMSNorm',
'Qwen2VLForConditionalGeneration': 'Qwen2RMSNorm',
'MistralForCausalLM': 'MistralRMSNorm',
}

HEAD_NAME_MAP = {
Expand All @@ -61,6 +63,7 @@
'ChatGLMForConditionalGeneration': 'output_layer',
'MixtralForCausalLM': 'lm_head',
'Qwen2VLForConditionalGeneration': 'lm_head',
'MistralForCausalLM': 'lm_head',
}


Expand Down
66 changes: 2 additions & 64 deletions lmdeploy/lite/apis/smooth_quant.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,15 @@
# Copyright (c) OpenMMLab. All rights reserved.

import os.path as osp
import shutil

import fire
import torch
from torch import nn

import lmdeploy
from lmdeploy.lite.apis.calibrate import calibrate
from lmdeploy.lite.apis.calibrate import (LAYER_TYPE_MAP, NORM_TYPE_MAP,
calibrate)
from lmdeploy.lite.quantization.awq import (FC_FCS_MAP, NORM_FCS_MAP,
awq_layers, smooth_layers)
from lmdeploy.lite.utils import collect_target_modules
from lmdeploy.pytorch.models import QLinear, QRMSNorm

LAYER_TYPE_MAP = {
'InternLMForCausalLM': 'InternLMDecoderLayer',
'InternLM2ForCausalLM': 'InternLM2DecoderLayer',
'QWenLMHeadModel': 'QWenBlock',
'BaiChuanForCausalLM': 'DecoderLayer',
'LlamaForCausalLM': 'LlamaDecoderLayer',
'ChatGLMForConditionalGeneration': 'GLMBlock',
}
NORM_TYPE_MAP = {
'InternLMForCausalLM': 'InternLMRMSNorm',
'InternLM2ForCausalLM': 'InternLM2RMSNorm',
'QWenLMHeadModel': 'RMSNorm',
'BaiChuanForCausalLM': 'RMSNorm',
'LlamaForCausalLM': 'LlamaRMSNorm',
'ChatGLMForConditionalGeneration': 'RMSNorm',
}

LMDEPLOY_ROOT = lmdeploy.__path__[0]

MODEL_PATH_MAP = {
'InternLMForCausalLM':
osp.join(LMDEPLOY_ROOT, 'pytorch/modeling/modeling_internlm.py'),
'InternLM2ForCausalLM':
osp.join(LMDEPLOY_ROOT, 'pytorch/modeling/modeling_internlm2.py'),
'LlamaForCausalLM':
osp.join(LMDEPLOY_ROOT, 'pytorch/modeling/modeling_llama.py'),
'BaiChuanForCausalLM':
osp.join(LMDEPLOY_ROOT, 'pytorch/modeling/modeling_baichuan.py')
}

AUTO_MAP = {
'InternLMForCausalLM': {
'AutoConfig': 'configuration_internlm.InternLMConfig',
'AutoModel': 'modeling_internlm.InternLMForCausalLM',
'AutoModelForCausalLM': 'modeling_internlm.InternLMForCausalLM'
},
'InternLM2ForCausalLM': {
'AutoConfig': 'configuration_internlm2.InternLMConfig',
'AutoModelForCausalLM': 'modeling_internlm2.InternLM2ForCausalLM',
'AutoModel': 'modeling_internlm2.InternLM2ForCausalLM'
},
'LlamaForCausalLM': {
'AutoModel': 'modeling_llama.LlamaForCausalLM',
'AutoModelForCausalLM': 'modeling_llama.LlamaForCausalLM'
},
'BaiChuanForCausalLM': {
'AutoConfig': 'configuration_baichuan.BaiChuanConfig',
'AutoModelForCausalLM': 'modeling_baichuan.BaiChuanForCausalLM'
}
}


def smooth_quant(model: str,
work_dir: str = './work_dir',
Expand Down Expand Up @@ -146,11 +91,6 @@ def smooth_quant(model: str,
setattr(parent, child_name, q_norm)
norm.to('cpu')

if hasattr(model.config, 'auto_map'):
model.config.auto_map.update(AUTO_MAP[type(model).__name__])
else:
model.config.auto_map = AUTO_MAP[type(model).__name__]

if vl_model:
from .auto_awq import save_vl_model
save_vl_model(vl_model, model_path, work_dir)
Expand All @@ -162,8 +102,6 @@ def smooth_quant(model: str,
safe_serialization=False)
tokenizer.save_pretrained(work_dir)

shutil.copy(MODEL_PATH_MAP[type(model).__name__], work_dir)


if __name__ == '__main__':
fire.Fire(smooth_quant)
11 changes: 10 additions & 1 deletion lmdeploy/lite/quantization/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@
'input_layernorm':
['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
'post_attention_layernorm': ['mlp.gate_proj', 'mlp.up_proj']
}
},
'MistralDecoderLayer': {
'input_layernorm':
['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
'post_attention_layernorm': ['mlp.gate_proj', 'mlp.up_proj']
},
}

FC_FCS_MAP = {
Expand Down Expand Up @@ -92,6 +97,10 @@
'Qwen2VLDecoderLayer': {
'self_attn.v_proj': ['self_attn.o_proj'],
'mlp.up_proj': ['mlp.down_proj']
},
'MistralDecoderLayer': {
'self_attn.v_proj': ['self_attn.o_proj'],
'mlp.up_proj': ['mlp.down_proj']
}
}

Expand Down
1 change: 0 additions & 1 deletion lmdeploy/pytorch/modeling/__init__.py

This file was deleted.

59 changes: 0 additions & 59 deletions lmdeploy/pytorch/modeling/convert_to_qmodules.py

This file was deleted.

Loading

0 comments on commit 9627436

Please sign in to comment.