lint: fix lint warning

InternLM · Nov 5, 2024 · 20c7476 · 20c7476
1 parent eea842c
commit 20c7476
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 36 deletions.
diff --git a/examples/python/README.md b/examples/python/README.md
@@ -12,4 +12,4 @@
 
 `python3 offline_vl.py models/llava-interleave-qwen-7b-hf`
 
-`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`
+`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`
diff --git a/examples/python/offline_vl.py b/examples/python/offline_vl.py
@@ -1,32 +1,34 @@
-from lmdeploy import pipeline, TurbomindEngineConfig, GenerationConfig
-from lmdeploy.vl import load_image
 import argparse
 
-if __name__ == "__main__":
+from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline
+from lmdeploy.vl import load_image
+
+if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='test model')
     parser.add_argument('model_path',
                         type=str,
                         help='the path of the model in localhost or '
                         'the repo_id of the model in huggingface.co',
-                        default="llava-hf/llava-interleave-qwen-7b-hf")
-    parser.add_argument(
-        '--model-format',
-        type=str,
-        help='model format',
-        default='hf',
-        choices=['hf', 'awq'])
-    parser.add_argument(
-        '--max-new-tokens',
-        type=int,
-        help='output max tokens number',
-        default=128)
+                        default='llava-hf/llava-interleave-qwen-7b-hf')
+    parser.add_argument('--model-format',
+                        type=str,
+                        help='model format',
+                        default='hf',
+                        choices=['hf', 'awq'])
+    parser.add_argument('--max-new-tokens',
+                        type=int,
+                        help='output max tokens number',
+                        default=128)
     args = parser.parse_args()
-    pipe = pipeline(args.model_path, 
-                    backend_config=TurbomindEngineConfig(cache_max_entry_count=0.5, model_format=args.model_format), 
-                    gen_config=GenerationConfig(max_new_tokens=args.max_new_tokens))
+    pipe = pipeline(
+        args.model_path,
+        backend_config=TurbomindEngineConfig(cache_max_entry_count=0.5,
+                                             model_format=args.model_format),
+        gen_config=GenerationConfig(max_new_tokens=args.max_new_tokens))
 
-    image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg')
-    for prompt in ["Describe the image.", "How many people in the image?"]:
-        print(f"prompt:{prompt}")
+    image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/' +
+                       'Qwen-VL/assets/demo.jpeg')
+    for prompt in ['Describe the image.', 'How many people in the image?']:
+        print(f'prompt:{prompt}')
         response = pipe((prompt, image))
-        print(response)
+        print(response)
diff --git a/lmdeploy/turbomind/deploy/source_model/__init__.py b/lmdeploy/turbomind/deploy/source_model/__init__.py
@@ -5,9 +5,9 @@
 from .internlm2 import InternLM2Model  # noqa: F401
 from .internvl import InternVLModel  # noqa: F401
 from .llama import LlamaModel  # noqa: F401
+from .llava_qwen2 import LlavaQwen2Model  # noqa: F401
 from .meta_llama import MetaLlamaModel  # noqa: F401
 from .minicpmv import MiniCPMVModel  # noqa: F401
 from .mixtral import MixtralModel  # noqa: F401
 from .qwen import QwenModel  # noqa: F401
 from .xcomposer2 import Xcomposer2Model  # noqa: F401
-from .llava_qwen2 import LlavaQwen2Model  # noqa: F401
diff --git a/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py b/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py
@@ -1,8 +1,9 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import json
 import os.path as osp
 
 from .base import INPUT_MODELS
-from .llama import LlamaReader, LlamaModel
+from .llama import LlamaModel, LlamaReader
 
 
 class LlavaQwen2Reader(LlamaReader):
@@ -17,7 +18,8 @@ class LlavaQwen2Reader(LlamaReader):
     def __init__(self, new_params: dict, unused_params: dict, last_bin: bool,
                  model_cfg: dict, policy):
         model_cfg = model_cfg.get('text_config')
-        super().__init__(new_params, unused_params, last_bin, model_cfg, policy)
+        super().__init__(new_params, unused_params, last_bin, model_cfg,
+                         policy)
 
 
 @INPUT_MODELS.register_module(name='llava_qwen2')
@@ -26,9 +28,6 @@ class LlavaQwen2Model(LlamaModel):
 
     def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
         super().__init__(model_path, tokenizer_path, **kwargs)
-        from transformers import AutoConfig
-        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
-        arch = config.text_config.architectures[0]
         self.Reader = LlavaQwen2Reader
 
     def model_info(self):
@@ -53,7 +52,7 @@ def model_info(self):
             # special for the model: llava-hf/llava-interleave-qwen-7b-hf
             hidden_units = model_arg.get('hidden_size', 4096)
             vocab_size = model_arg.get('vocab_size', 152000)
-            intermediate_size = model_arg.get("intermediate_size", 11008)
+            intermediate_size = model_arg.get('intermediate_size', 11008)
             attn_bias = int(model_arg.get('attn_bias', 1))
             use_logn_attn = int(model_arg.get('use_logn_attn', 0))
 
@@ -73,6 +72,6 @@ def model_info(self):
                     use_dynamic_ntk=use_dynamic_ntk,
                     rope_scaling_factor=scaling_factor,
                     inter_size=intermediate_size,
-                    use_logn_attn = use_logn_attn,
+                    use_logn_attn=use_logn_attn,
                     attn_bias=attn_bias,
-                    vocab_size=vocab_size)
+                    vocab_size=vocab_size)
diff --git a/lmdeploy/turbomind/generate_gemm_config.py b/lmdeploy/turbomind/generate_gemm_config.py
@@ -60,12 +60,13 @@ def main(head_num: int = 32,
                 inter_size = config.intermediate_size
                 vocab_size = config.vocab_size
             except AttributeError as e:
-                if hasattr(config, "text_config"):
+                if hasattr(config, 'text_config'):
                     config = config.text_config
-                elif hasattr(config, "llm_config"):
+                elif hasattr(config, 'llm_config'):
                     config = config.llm_config
                 else:
-                    raise AttributeError(f"not found attribute in {config}, please check your model config file. {e}")
+                    raise AttributeError(f'not found attribute in {config},\
+                            please check your model config file.{e}')
                 head_num = config.num_attention_heads
                 size_per_head = config.hidden_size // head_num
                 inter_size = config.intermediate_size

diff --git a/lmdeploy/turbomind/supported_models.py b/lmdeploy/turbomind/supported_models.py
@@ -26,7 +26,7 @@
     LlavaLlamaForCausalLM='llama',
     LlavaMistralForCausalLM='llama',
     # Llava_interleave
-    LlavaForConditionalGeneration="llava_qwen2",
+    LlavaForConditionalGeneration='llava_qwen2',
     # xcomposer2
     InternLMXComposer2ForCausalLM='xcomposer2',
     # internvl
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,4 +12,4 @@

		`python3 offline_vl.py models/llava-interleave-qwen-7b-hf`

		`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`
		`python3 offline_vl.py models/llava-interleave-qwen-7b-hf/awq --model-format awq`