From a21def9e7722058689e8a4eeeb1c957c2a948b88 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Thu, 14 Nov 2024 11:34:12 +0800 Subject: [PATCH] Support chemvlm (#2738) * update to support chemvlm * update docs * add ut --- README.md | 1 + README_ja.md | 2 ++ README_zh-CN.md | 1 + docs/en/supported_models/supported_models.md | 2 ++ .../supported_models/supported_models.md | 2 ++ lmdeploy/model.py | 3 +++ lmdeploy/vl/model/internvl.py | 12 +++++++++--- tests/test_lmdeploy/test_model.py | 19 +++++++++++++++++++ 8 files changed, 39 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index efbb87a22e..5b6ad47bdf 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,7 @@ For detailed inference benchmarks in more devices and more settings, please refe
  • InternVL-Chat (v1.1-v1.5)
  • InternVL2 (1B-76B)
  • Mono-InternVL (2B)
  • +
  • ChemVLM (8B-26B)
  • MiniGeminiLlama (7B)
  • CogVLM-Chat (17B)
  • CogVLM2-Chat (19B)
  • diff --git a/README_ja.md b/README_ja.md index df4647d868..bdd9ddb02d 100644 --- a/README_ja.md +++ b/README_ja.md @@ -152,6 +152,8 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
  • DeepSeek-VL (7B)
  • InternVL-Chat (v1.1-v1.5)
  • InternVL2 (1B-76B)
  • +
  • Mono-InternVL (2B)
  • +
  • ChemVLM (8B-26B)
  • MiniGeminiLlama (7B)
  • CogVLM-Chat (17B)
  • CogVLM2-Chat (19B)
  • diff --git a/README_zh-CN.md b/README_zh-CN.md index 477fed6f79..550922d081 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -158,6 +158,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
  • InternVL-Chat (v1.1-v1.5)
  • InternVL2 (1B-76B)
  • Mono-InternVL (2B)
  • +
  • ChemVLM (8B-26B)
  • MiniGeminiLlama (7B)
  • CogVLM-Chat (17B)
  • CogVLM2-Chat (19B)
  • diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md index 371e4968e0..90ca90388b 100644 --- a/docs/en/supported_models/supported_models.md +++ b/docs/en/supported_models/supported_models.md @@ -30,6 +30,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine | LLaVA(1.5,1.6) | 7B - 34B | MLLM | Yes | Yes | Yes | Yes | | InternVL | v1.1 - v1.5 | MLLM | Yes | Yes | Yes | Yes | | InternVL2 | 2B, 8B - 76B | MLLM | Yes | Yes | Yes | Yes | +| ChemVLM | 8B - 26B | MLLM | Yes | Yes | Yes | Yes | | MiniCPM-Llama3-V-2_5 | - | MLLM | Yes | Yes | Yes | Yes | | MiniCPM-V-2_6 | - | MLLM | Yes | Yes | Yes | Yes | | MiniGeminiLlama | 7B | MLLM | Yes | - | - | Yes | @@ -81,6 +82,7 @@ The TurboMind engine doesn't support window attention. Therefore, for models tha | InternVL(v1.5) | 2B-26B | MLLM | Yes | Yes | Yes | No | Yes | | InternVL2 | 1B-40B | MLLM | Yes | Yes | Yes | No | - | | Mono-InternVL | 2B | MLLM | Yes\* | Yes | Yes | No | - | +| ChemVLM | 8B-26B | MLLM | Yes | Yes | No | No | - | | Gemma2 | 9B-27B | LLM | Yes | Yes | Yes | No | - | | GLM4 | 9B | LLM | Yes | Yes | Yes | No | No | | GLM-4V | 9B | MLLM | Yes | Yes | Yes | No | No | diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md index 7d59a59899..fecfdee200 100644 --- a/docs/zh_cn/supported_models/supported_models.md +++ b/docs/zh_cn/supported_models/supported_models.md @@ -30,6 +30,7 @@ | LLaVA(1.5,1.6) | 7B - 34B | MLLM | Yes | Yes | Yes | Yes | | InternVL | v1.1 - v1.5 | MLLM | Yes | Yes | Yes | Yes | | InternVL2 | 2B, 8B - 76B | MLLM | Yes | Yes | Yes | Yes | +| ChemVLM | 8B - 26B | MLLM | Yes | Yes | Yes | Yes | | MiniCPM-Llama3-V-2_5 | - | MLLM | Yes | Yes | Yes | Yes | | MiniCPM-V-2_6 | - | MLLM | Yes | Yes | Yes | Yes | | MiniGeminiLlama | 7B | MLLM | Yes | - | - | Yes | @@ -81,6 +82,7 @@ turbomind 引擎不支持 window attention。所以,对于应用了 window att | InternVL(v1.5) | 2B-26B | MLLM | Yes | Yes | Yes | No | Yes | | InternVL2 | 1B-40B | MLLM | Yes | Yes | Yes | No | - | | Mono-InternVL | 2B | MLLM | Yes\* | Yes | Yes | No | - | +| ChemVLM | 8B-26B | MLLM | Yes | Yes | No | No | - | | Gemma2 | 9B-27B | LLM | Yes | Yes | Yes | No | - | | GLM4 | 9B | LLM | Yes | Yes | Yes | No | No | | GLM-4V | 9B | MLLM | Yes | Yes | Yes | No | No | diff --git a/lmdeploy/model.py b/lmdeploy/model.py index 1872502334..db864a8344 100644 --- a/lmdeploy/model.py +++ b/lmdeploy/model.py @@ -565,6 +565,9 @@ def match(cls, model_path: str) -> Optional[str]: return None return 'internvl-internlm2' + if 'chemvlm' in path: + return 'internvl-internlm2' + @MODELS.register_module(name='internvl2-internlm2') class InternVL2InternLM2(InternLM2Chat7B): diff --git a/lmdeploy/vl/model/internvl.py b/lmdeploy/vl/model/internvl.py index d85fe30939..fa67192f11 100644 --- a/lmdeploy/vl/model/internvl.py +++ b/lmdeploy/vl/model/internvl.py @@ -108,8 +108,15 @@ def build_model(self): # avoid randomness in inference. self.model = model.eval() self.config = config + dynamic_image_size = getattr(self.config, 'dynamic_image_size', False) + image_processor = None + try: + image_processor = CLIPImageProcessor.from_pretrained( + self.model_path) + except OSError: + pass - if getattr(self.config, 'dynamic_image_size', False): + if dynamic_image_size or image_processor is None: logger.info('using InternVL-Chat-V1-5 vision preprocess') MEAN = (0.485, 0.456, 0.406) STD = (0.229, 0.224, 0.225) @@ -126,8 +133,7 @@ def build_model(self): ]) self._forward_func = self._forward_v1_5 else: - self.image_processor = CLIPImageProcessor.from_pretrained( - self.model_path) + self.image_processor = image_processor self._forward_func = self._forward def _preprocess_v1_5(self, images: List[Image], params: List[Dict] = None): diff --git a/tests/test_lmdeploy/test_model.py b/tests/test_lmdeploy/test_model.py index a38971e4d0..7e3e71793d 100644 --- a/tests/test_lmdeploy/test_model.py +++ b/tests/test_lmdeploy/test_model.py @@ -475,6 +475,25 @@ def test_internvl2(): assert res == expected +def test_chemvlm(): + deduced_name = best_match_model('AI4Chem/ChemVLM-8B') + + assert deduced_name == 'internvl-internlm2' + model = MODELS.get(deduced_name)() + messages = [{ + 'role': 'user', + 'content': 'who are you' + }, { + 'role': 'assistant', + 'content': 'I am an AI' + }] + expected = '<|im_start|>system\nYou are an AI assistant whose name is '\ + 'InternLM (书生·浦语).<|im_end|>\n<|im_start|>user\nwho are you'\ + '<|im_end|>\n<|im_start|>assistant\nI am an AI' + res = model.messages2prompt(messages) + assert res == expected + + def test_codegeex4(): model_path_and_name = 'THUDM/codegeex4-all-9b' deduced_name = best_match_model(model_path_and_name)