diff --git a/examples/python/offline_vl.py b/examples/python/offline_vl.py
index 54d881e013..715ba65878 100644
--- a/examples/python/offline_vl.py
+++ b/examples/python/offline_vl.py
@@ -21,12 +21,16 @@
         help='output max tokens number',
         default=128)
     args = parser.parse_args()
-    pipe = pipeline(args.model_path, 
-                    backend_config=TurbomindEngineConfig(cache_max_entry_count=0.5, model_format=args.model_format), 
-                    gen_config=GenerationConfig(max_new_tokens=args.max_new_tokens))
+    pipe = pipeline(args.model_path,
+                    backend_config=TurbomindEngineConfig(
+                                        cache_max_entry_count=0.5,
+                                        model_format=args.model_format),
+                    gen_config=GenerationConfig(
+                                        max_new_tokens=args.max_new_tokens))
 
-    image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg')
+    image = load_image('https://qianwen-res.oss-cn-beijing.aliyuncs.com/\
+                       Qwen-VL/assets/demo.jpeg')
     for prompt in ["Describe the image.", "How many people in the image?"]:
         print(f"prompt:{prompt}")
         response = pipe((prompt, image))
-        print(response)
\ No newline at end of file
+        print(response)
diff --git a/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py b/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py
index 16d662cf06..4b81da994b 100644
--- a/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py
+++ b/lmdeploy/turbomind/deploy/source_model/llava_qwen2.py
@@ -1,3 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
 import json
 import os.path as osp
 
@@ -17,7 +18,8 @@ class LlavaQwen2Reader(LlamaReader):
     def __init__(self, new_params: dict, unused_params: dict, last_bin: bool,
                  model_cfg: dict, policy):
         model_cfg = model_cfg.get('text_config')
-        super().__init__(new_params, unused_params, last_bin, model_cfg, policy)
+        super().__init__(new_params, unused_params, last_bin, 
+                         model_cfg, policy)
 
 
 @INPUT_MODELS.register_module(name='llava_qwen2')
@@ -26,9 +28,6 @@ class LlavaQwen2Model(LlamaModel):
 
     def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
         super().__init__(model_path, tokenizer_path, **kwargs)
-        from transformers import AutoConfig
-        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
-        arch = config.text_config.architectures[0]
         self.Reader = LlavaQwen2Reader
 
     def model_info(self):
@@ -73,6 +72,6 @@ def model_info(self):
                     use_dynamic_ntk=use_dynamic_ntk,
                     rope_scaling_factor=scaling_factor,
                     inter_size=intermediate_size,
-                    use_logn_attn = use_logn_attn,
+                    use_logn_attn=use_logn_attn,
                     attn_bias=attn_bias,
-                    vocab_size=vocab_size)
\ No newline at end of file
+                    vocab_size=vocab_size)
diff --git a/lmdeploy/turbomind/generate_gemm_config.py b/lmdeploy/turbomind/generate_gemm_config.py
index b697218fd7..dde661525d 100644
--- a/lmdeploy/turbomind/generate_gemm_config.py
+++ b/lmdeploy/turbomind/generate_gemm_config.py
@@ -65,7 +65,8 @@ def main(head_num: int = 32,
                 elif hasattr(config, "llm_config"):
                     config = config.llm_config
                 else:
-                    raise AttributeError(f"not found attribute in {config}, please check your model config file. {e}")
+                    raise AttributeError(f"not found attribute in {config},\
+                            please check your model config file.{e}")
                 head_num = config.num_attention_heads
                 size_per_head = config.hidden_size // head_num
                 inter_size = config.intermediate_size