Skip to content

Commit

Permalink
lint: fix lint warning
Browse files Browse the repository at this point in the history
  • Loading branch information
deepindeed2022 committed Nov 5, 2024
1 parent eea842c commit 20c7476
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 36 deletions.
2 changes: 1 addition & 1 deletion examples/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@

`python3 offline_vl.py models/llava-interleave-qwen-7b-hf`

`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`
`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`
46 changes: 24 additions & 22 deletions examples/python/offline_vl.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,34 @@
from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
from lmdeploy.vl import load_image
import argparse

if __name__ == "__main__":
from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline
from lmdeploy.vl import load_image

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='test model')
parser.add_argument('model_path',
type=str,
help='the path of the model in localhost or '
'the repo_id of the model in huggingface.co',
default="llava-hf/llava-interleave-qwen-7b-hf")
parser.add_argument(
'--model-format',
type=str,
help='model format',
default='hf',
choices=['hf', 'awq'])
parser.add_argument(
'--max-new-tokens',
type=int,
help='output max tokens number',
default=128)
default='llava-hf/llava-interleave-qwen-7b-hf')
parser.add_argument('--model-format',
type=str,
help='model format',
default='hf',
choices=['hf', 'awq'])
parser.add_argument('--max-new-tokens',
type=int,
help='output max tokens number',
default=128)
args = parser.parse_args()
pipe = pipeline(args.model_path,
backend_config=TurbomindEngineConfig(cache_max_entry_count=0.5, model_format=args.model_format),
gen_config=GenerationConfig(max_new_tokens=args.max_new_tokens))
pipe = pipeline(
args.model_path,
backend_config=TurbomindEngineConfig(cache_max_entry_count=0.5,
model_format=args.model_format),
gen_config=GenerationConfig(max_new_tokens=args.max_new_tokens))

image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg')
for prompt in ["Describe the image.", "How many people in the image?"]:
print(f"prompt:{prompt}")
image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/' +
'Qwen-VL/assets/demo.jpeg')
for prompt in ['Describe the image.', 'How many people in the image?']:
print(f'prompt:{prompt}')
response = pipe((prompt, image))
print(response)
print(response)
2 changes: 1 addition & 1 deletion lmdeploy/turbomind/deploy/source_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from .internlm2 import InternLM2Model # noqa: F401
from .internvl import InternVLModel # noqa: F401
from .llama import LlamaModel # noqa: F401
from .llava_qwen2 import LlavaQwen2Model # noqa: F401
from .meta_llama import MetaLlamaModel # noqa: F401
from .minicpmv import MiniCPMVModel # noqa: F401
from .mixtral import MixtralModel # noqa: F401
from .qwen import QwenModel # noqa: F401
from .xcomposer2 import Xcomposer2Model # noqa: F401
from .llava_qwen2 import LlavaQwen2Model # noqa: F401
15 changes: 7 additions & 8 deletions lmdeploy/turbomind/deploy/source_model/llava_qwen2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os.path as osp

from .base import INPUT_MODELS
from .llama import LlamaReader, LlamaModel
from .llama import LlamaModel, LlamaReader


class LlavaQwen2Reader(LlamaReader):
Expand All @@ -17,7 +18,8 @@ class LlavaQwen2Reader(LlamaReader):
def __init__(self, new_params: dict, unused_params: dict, last_bin: bool,
model_cfg: dict, policy):
model_cfg = model_cfg.get('text_config')
super().__init__(new_params, unused_params, last_bin, model_cfg, policy)
super().__init__(new_params, unused_params, last_bin, model_cfg,
policy)


@INPUT_MODELS.register_module(name='llava_qwen2')
Expand All @@ -26,9 +28,6 @@ class LlavaQwen2Model(LlamaModel):

def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
super().__init__(model_path, tokenizer_path, **kwargs)
from transformers import AutoConfig
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
arch = config.text_config.architectures[0]
self.Reader = LlavaQwen2Reader

def model_info(self):
Expand All @@ -53,7 +52,7 @@ def model_info(self):
# special for the model: llava-hf/llava-interleave-qwen-7b-hf
hidden_units = model_arg.get('hidden_size', 4096)
vocab_size = model_arg.get('vocab_size', 152000)
intermediate_size = model_arg.get("intermediate_size", 11008)
intermediate_size = model_arg.get('intermediate_size', 11008)
attn_bias = int(model_arg.get('attn_bias', 1))
use_logn_attn = int(model_arg.get('use_logn_attn', 0))

Expand All @@ -73,6 +72,6 @@ def model_info(self):
use_dynamic_ntk=use_dynamic_ntk,
rope_scaling_factor=scaling_factor,
inter_size=intermediate_size,
use_logn_attn = use_logn_attn,
use_logn_attn=use_logn_attn,
attn_bias=attn_bias,
vocab_size=vocab_size)
vocab_size=vocab_size)
7 changes: 4 additions & 3 deletions lmdeploy/turbomind/generate_gemm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ def main(head_num: int = 32,
inter_size = config.intermediate_size
vocab_size = config.vocab_size
except AttributeError as e:
if hasattr(config, "text_config"):
if hasattr(config, 'text_config'):
config = config.text_config
elif hasattr(config, "llm_config"):
elif hasattr(config, 'llm_config'):
config = config.llm_config
else:
raise AttributeError(f"not found attribute in {config}, please check your model config file. {e}")
raise AttributeError(f'not found attribute in {config},\
please check your model config file.{e}')
head_num = config.num_attention_heads
size_per_head = config.hidden_size // head_num
inter_size = config.intermediate_size
Expand Down
2 changes: 1 addition & 1 deletion lmdeploy/turbomind/supported_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
LlavaLlamaForCausalLM='llama',
LlavaMistralForCausalLM='llama',
# Llava_interleave
LlavaForConditionalGeneration="llava_qwen2",
LlavaForConditionalGeneration='llava_qwen2',
# xcomposer2
InternLMXComposer2ForCausalLM='xcomposer2',
# internvl
Expand Down

0 comments on commit 20c7476

Please sign in to comment.