From 4769ef81f5dc8204b27439d9ab66f3408a9404e9 Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Wed, 23 Oct 2024 05:04:21 +0000 Subject: [PATCH] fix inter_size config --- lmdeploy/turbomind/deploy/source_model/deepseek_vl.py | 4 +++- lmdeploy/turbomind/deploy/source_model/internvl.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lmdeploy/turbomind/deploy/source_model/deepseek_vl.py b/lmdeploy/turbomind/deploy/source_model/deepseek_vl.py index 2b60454767..19a7d185e0 100644 --- a/lmdeploy/turbomind/deploy/source_model/deepseek_vl.py +++ b/lmdeploy/turbomind/deploy/source_model/deepseek_vl.py @@ -47,7 +47,8 @@ def model_info(self): 'language_config'].get('model_type', None) == 'llama': model_arg = model_arg['language_config'] # depseek-vl num_layer = model_arg['num_hidden_layers'] - hidden_units = model_arg['hidden_size'] + hidden_units = model_arg.get('hidden_size', 4096) + inter_size = model_arg.get('intermediate_size', 11008) norm_eps = model_arg.get('rms_norm_eps', 1e-06) attn_head_num = model_arg.get('num_attention_heads', 32) if 'num_key_value_heads' in model_arg: @@ -71,6 +72,7 @@ def model_info(self): head_num=attn_head_num, kv_head_num=kv_head_num, hidden_units=hidden_units, + inter_size=inter_size, rope_theta=rope_theta, max_position_embeddings=max_position_embeddings, use_dynamic_ntk=use_dynamic_ntk, diff --git a/lmdeploy/turbomind/deploy/source_model/internvl.py b/lmdeploy/turbomind/deploy/source_model/internvl.py index 83161adb15..82c98a7af8 100644 --- a/lmdeploy/turbomind/deploy/source_model/internvl.py +++ b/lmdeploy/turbomind/deploy/source_model/internvl.py @@ -62,6 +62,7 @@ def model_info(self): num_layer = model_arg['num_hidden_layers'] norm_eps = model_arg['rms_norm_eps'] hidden_units = model_arg['hidden_size'] + inter_size = model_arg['intermediate_size'] attn_head_num = model_arg['num_attention_heads'] if 'num_key_value_heads' in model_arg: kv_head_num = model_arg['num_key_value_heads'] @@ -82,6 +83,7 @@ def model_info(self): return dict(num_layer=num_layer, norm_eps=norm_eps, hidden_units=hidden_units, + inter_size=inter_size, head_num=attn_head_num, kv_head_num=kv_head_num, rope_theta=rope_theta,