Skip to content

Commit

Permalink
add qwen2-moe
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz committed Nov 7, 2024
1 parent 71f1d0f commit e0b221e
Show file tree
Hide file tree
Showing 16 changed files with 1,777 additions and 227 deletions.
2 changes: 2 additions & 0 deletions lmdeploy/turbomind/deploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class ModelConfig:
expert_num: int = 0
expert_inter_size: int = 0
experts_per_token: int = 0
moe_shared_gate: int = False
moe_norm_topk: int = False

def verify(self):
invalid = {}
Expand Down
20 changes: 16 additions & 4 deletions lmdeploy/turbomind/deploy/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,18 @@ class MoeFfn(Ffn):
requires:
r.moe_ffn_expert(e, i, kind)
r.moe_ffn_gate(i)
r.moe_ffn_shared_gate(i)
"""

_moe_ffn_expert = 'layers.{0}.moe_ffn.experts.E.{1}.{2}'
_moe_ffn_gate = 'layers.{0}.moe_ffn.gate.{1}'
_moe_ffn_gate = 'layers.{0}.moe_ffn.gate.weight'
_moe_ffn_shared_gate = 'layers.{0}.moe_ffn.shared_gate.weight'

def __init__(self, model: BaseOutputModel):
super().__init__(model)
self.expert_num = model.model_config.expert_num
self.inter_size = model.model_config.expert_inter_size
self.shared_gate = model.model_config.moe_shared_gate

def apply(self, i: int, r: BaseReader):
for p in get_params(r.moe_ffn_expert()):
Expand All @@ -157,7 +161,12 @@ def apply(self, i: int, r: BaseReader):
i)

gate = transpose(r.moe_ffn_gate(i))
self.model.save_split(gate, self._moe_ffn_gate.format(i, 'weight'))
self.model.save_split(gate, self._moe_ffn_gate.format(i))

if self.shared_gate:
shared_gate = transpose(r.moe_ffn_shared_gate(i))
print(shared_gate)
self.model.save_split(shared_gate, self._moe_ffn_shared_gate.format(i))


class Attn(Module):
Expand Down Expand Up @@ -248,8 +257,11 @@ class Transformer:

def __init__(self, model: BaseOutputModel):
self.model = model
ffn = MoeFfn if model.model_config.expert_num else Ffn
modules = [Attn, LayerNorm, ffn]
modules = [Attn, LayerNorm]
if model.model_config.inter_size:
modules.append(Ffn)
if model.model_config.expert_num:
modules.append(MoeFfn)
self.modules = [c(model) for c in modules]
self.misc = Misc(model)

Expand Down
2 changes: 2 additions & 0 deletions lmdeploy/turbomind/deploy/source_model/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ def model_info(self):
info['expert_num'] = cfg['num_local_experts']
info['expert_inter_size'] = cfg['intermediate_size']
info['experts_per_token'] = cfg['num_experts_per_tok']
info['moe_norm_topk'] = True
info['inter_size'] = 0
return info
64 changes: 64 additions & 0 deletions lmdeploy/turbomind/deploy/source_model/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,67 @@ def model_info(self):
cfg = super().model_info()
cfg['attn_bias'] = 1
return cfg



class Qwen2MoeReader(LlamaReader):

ffn_pattern = r'shared_expert\.'

def moe_ffn_expert(self, e=None, i=None, kind=None):
if not kind:
return self.filter(r'experts')
result = []
for key in ['gate', 'down', 'up']:
name = f'model.layers.{i}.mlp.experts.{e}.{key}_proj.{kind}'
tensor = self.params.get(name)
tensor = self.transform(tensor, kind)
result.append(tensor)
return (*result, )

def moe_ffn_gate(self, i):
return self.params.get(
f'model.layers.{i}.mlp.gate.weight')

def _ffn(self, i: int, kind: str):
"""Get ffn kind for layer i."""
if not kind:
return self.filter(self.ffn_pattern)
result = []
for key in ['gate', 'down', 'up']:
tensor = self.params[
f'model.layers.{i}.mlp.shared_expert.{key}_proj.{kind}']
tensor = self.transform(tensor, kind)
result.append(tensor)
return (*result, )

def moe_ffn_shared_gate(self, i):
return self.params.get(
f'model.layers.{i}.mlp.shared_expert_gate.weight'
)

@INPUT_MODELS.register_module(name='qwen2-moe')
class Qwen2MoeModel(LlamaModel):

Reader = Qwen2MoeReader

def tokenizer_info(self):
"""
https://huggingface.co/Qwen/Qwen1.5-7B-Chat/blob/main/generation_config.json
""" # noqa: E501
n_words = 152064
bos_id = 151643
eos_id = 151645
return n_words, bos_id, eos_id

def model_info(self):
cfg = self.model_config
info = super().model_info()
info['expert_num'] = cfg['num_experts']
info['expert_inter_size'] = cfg['moe_intermediate_size']
info['experts_per_token'] = cfg['num_experts_per_tok']
info['inter_size'] = cfg['shared_expert_intermediate_size']
info['moe_shared_gate'] = True
info['moe_norm_topk_prob'] = cfg['norm_topk_prob']
info['attn_bias'] = 1
return info
1 change: 1 addition & 0 deletions lmdeploy/turbomind/supported_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
QWenLMHeadModel='qwen',
# Qwen2
Qwen2ForCausalLM='qwen2',
Qwen2MoeForCausalLM='qwen2-moe',
# mistral
MistralForCausalLM='llama',
# llava
Expand Down
Loading

0 comments on commit e0b221e

Please sign in to comment.