diff --git a/examples/python/offline_vl.py b/examples/python/offline_vl.py index 54d881e013..715ba65878 100644 --- a/examples/python/offline_vl.py +++ b/examples/python/offline_vl.py @@ -21,12 +21,16 @@ help='output max tokens number', default=128) args = parser.parse_args() - pipe = pipeline(args.model_path, - backend_config=TurbomindEngineConfig(cache_max_entry_count=0.5, model_format=args.model_format), - gen_config=GenerationConfig(max_new_tokens=args.max_new_tokens)) + pipe = pipeline(args.model_path, + backend_config=TurbomindEngineConfig( + cache_max_entry_count=0.5, + model_format=args.model_format), + gen_config=GenerationConfig( + max_new_tokens=args.max_new_tokens)) - image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg') + image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/\ + Qwen-VL/assets/demo.jpeg') for prompt in ["Describe the image.", "How many people in the image?"]: print(f"prompt:{prompt}") response = pipe((prompt, image)) - print(response) \ No newline at end of file + print(response) diff --git a/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py b/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py index 16d662cf06..4b81da994b 100644 --- a/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py +++ b/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py @@ -1,3 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. import json import os.path as osp @@ -17,7 +18,8 @@ class LlavaQwen2Reader(LlamaReader): def __init__(self, new_params: dict, unused_params: dict, last_bin: bool, model_cfg: dict, policy): model_cfg = model_cfg.get('text_config') - super().__init__(new_params, unused_params, last_bin, model_cfg, policy) + super().__init__(new_params, unused_params, last_bin, + model_cfg, policy) @INPUT_MODELS.register_module(name='llava_qwen2') @@ -26,9 +28,6 @@ class LlavaQwen2Model(LlamaModel): def __init__(self, model_path: str, tokenizer_path: str, **kwargs): super().__init__(model_path, tokenizer_path, **kwargs) - from transformers import AutoConfig - config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) - arch = config.text_config.architectures[0] self.Reader = LlavaQwen2Reader def model_info(self): @@ -73,6 +72,6 @@ def model_info(self): use_dynamic_ntk=use_dynamic_ntk, rope_scaling_factor=scaling_factor, inter_size=intermediate_size, - use_logn_attn = use_logn_attn, + use_logn_attn=use_logn_attn, attn_bias=attn_bias, - vocab_size=vocab_size) \ No newline at end of file + vocab_size=vocab_size) diff --git a/lmdeploy/turbomind/generate_gemm_config.py b/lmdeploy/turbomind/generate_gemm_config.py index b697218fd7..dde661525d 100644 --- a/lmdeploy/turbomind/generate_gemm_config.py +++ b/lmdeploy/turbomind/generate_gemm_config.py @@ -65,7 +65,8 @@ def main(head_num: int = 32, elif hasattr(config, "llm_config"): config = config.llm_config else: - raise AttributeError(f"not found attribute in {config}, please check your model config file. {e}") + raise AttributeError(f"not found attribute in {config},\ + please check your model config file.{e}") head_num = config.num_attention_heads size_per_head = config.hidden_size // head_num inter_size = config.intermediate_size