From a21def9e7722058689e8a4eeeb1c957c2a948b88 Mon Sep 17 00:00:00 2001
From: RunningLeon <mnsheng@yeah.net>
Date: Thu, 14 Nov 2024 11:34:12 +0800
Subject: [PATCH] Support chemvlm (#2738)

* update to support chemvlm

* update docs

* add ut
---
 README.md                                     |  1 +
 README_ja.md                                  |  2 ++
 README_zh-CN.md                               |  1 +
 docs/en/supported_models/supported_models.md  |  2 ++
 .../supported_models/supported_models.md      |  2 ++
 lmdeploy/model.py                             |  3 +++
 lmdeploy/vl/model/internvl.py                 | 12 +++++++++---
 tests/test_lmdeploy/test_model.py             | 19 +++++++++++++++++++
 8 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index efbb87a22e..5b6ad47bdf 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,7 @@ For detailed inference benchmarks in more devices and more settings, please refe
   <li>InternVL-Chat (v1.1-v1.5)</li>
   <li>InternVL2 (1B-76B)</li>
   <li>Mono-InternVL (2B)</li>
+  <li>ChemVLM (8B-26B)</li>
   <li>MiniGeminiLlama (7B)</li>
   <li>CogVLM-Chat (17B)</li>
   <li>CogVLM2-Chat (19B)</li>
diff --git a/README_ja.md b/README_ja.md
index df4647d868..bdd9ddb02d 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -152,6 +152,8 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
   <li>DeepSeek-VL (7B)</li>
   <li>InternVL-Chat (v1.1-v1.5)</li>
   <li>InternVL2 (1B-76B)</li>
+  <li>Mono-InternVL (2B)</li>
+  <li>ChemVLM (8B-26B)</li>
   <li>MiniGeminiLlama (7B)</li>
   <li>CogVLM-Chat (17B)</li>
   <li>CogVLM2-Chat (19B)</li>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 477fed6f79..550922d081 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -158,6 +158,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力，在各种规模的模型
   <li>InternVL-Chat (v1.1-v1.5)</li>
   <li>InternVL2 (1B-76B)</li>
   <li>Mono-InternVL (2B)</li>
+  <li>ChemVLM (8B-26B)</li>
   <li>MiniGeminiLlama (7B)</li>
   <li>CogVLM-Chat (17B)</li>
   <li>CogVLM2-Chat (19B)</li>
diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md
index 371e4968e0..90ca90388b 100644
--- a/docs/en/supported_models/supported_models.md
+++ b/docs/en/supported_models/supported_models.md
@@ -30,6 +30,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
 |    LLaVA(1.5,1.6)     |   7B - 34B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |       InternVL        | v1.1 - v1.5  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |       InternVL2       | 2B, 8B - 76B | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|        ChemVLM        |   8B - 26B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 | MiniCPM-Llama3-V-2_5  |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |     MiniCPM-V-2_6     |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |    MiniGeminiLlama    |      7B      | MLLM |    Yes    |    -    |    -    |  Yes  |
@@ -81,6 +82,7 @@ The TurboMind engine doesn't support window attention. Therefore, for models tha
 | InternVL(v1.5) |   2B-26B    | MLLM |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |   InternVL2    |   1B-40B    | MLLM |    Yes    |   Yes   |   Yes   |  No  |   -   |
 | Mono-InternVL  |     2B      | MLLM |   Yes\*   |   Yes   |   Yes   |  No  |   -   |
+|    ChemVLM     |   8B-26B    | MLLM |    Yes    |   Yes   |   No    |  No  |   -   |
 |     Gemma2     |   9B-27B    | LLM  |    Yes    |   Yes   |   Yes   |  No  |   -   |
 |      GLM4      |     9B      | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
 |     GLM-4V     |     9B      | MLLM |    Yes    |   Yes   |   Yes   |  No  |  No   |
diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md
index 7d59a59899..fecfdee200 100644
--- a/docs/zh_cn/supported_models/supported_models.md
+++ b/docs/zh_cn/supported_models/supported_models.md
@@ -30,6 +30,7 @@
 |    LLaVA(1.5,1.6)     |   7B - 34B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |       InternVL        | v1.1 - v1.5  | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |       InternVL2       | 2B, 8B - 76B | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
+|        ChemVLM        |   8B - 26B   | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 | MiniCPM-Llama3-V-2_5  |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |     MiniCPM-V-2_6     |      -       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |    MiniGeminiLlama    |      7B      | MLLM |    Yes    |    -    |    -    |  Yes  |
@@ -81,6 +82,7 @@ turbomind 引擎不支持 window attention。所以，对于应用了 window att
 | InternVL(v1.5) |   2B-26B    | MLLM |    Yes    |   Yes   |   Yes   |  No  |  Yes  |
 |   InternVL2    |   1B-40B    | MLLM |    Yes    |   Yes   |   Yes   |  No  |   -   |
 | Mono-InternVL  |     2B      | MLLM |   Yes\*   |   Yes   |   Yes   |  No  |   -   |
+|    ChemVLM     |   8B-26B    | MLLM |    Yes    |   Yes   |   No    |  No  |   -   |
 |     Gemma2     |   9B-27B    | LLM  |    Yes    |   Yes   |   Yes   |  No  |   -   |
 |      GLM4      |     9B      | LLM  |    Yes    |   Yes   |   Yes   |  No  |  No   |
 |     GLM-4V     |     9B      | MLLM |    Yes    |   Yes   |   Yes   |  No  |  No   |
diff --git a/lmdeploy/model.py b/lmdeploy/model.py
index 1872502334..db864a8344 100644
--- a/lmdeploy/model.py
+++ b/lmdeploy/model.py
@@ -565,6 +565,9 @@ def match(cls, model_path: str) -> Optional[str]:
                 return None
             return 'internvl-internlm2'
 
+        if 'chemvlm' in path:
+            return 'internvl-internlm2'
+
 
 @MODELS.register_module(name='internvl2-internlm2')
 class InternVL2InternLM2(InternLM2Chat7B):
diff --git a/lmdeploy/vl/model/internvl.py b/lmdeploy/vl/model/internvl.py
index d85fe30939..fa67192f11 100644
--- a/lmdeploy/vl/model/internvl.py
+++ b/lmdeploy/vl/model/internvl.py
@@ -108,8 +108,15 @@ def build_model(self):
         # avoid randomness in inference.
         self.model = model.eval()
         self.config = config
+        dynamic_image_size = getattr(self.config, 'dynamic_image_size', False)
+        image_processor = None
+        try:
+            image_processor = CLIPImageProcessor.from_pretrained(
+                self.model_path)
+        except OSError:
+            pass
 
-        if getattr(self.config, 'dynamic_image_size', False):
+        if dynamic_image_size or image_processor is None:
             logger.info('using InternVL-Chat-V1-5 vision preprocess')
             MEAN = (0.485, 0.456, 0.406)
             STD = (0.229, 0.224, 0.225)
@@ -126,8 +133,7 @@ def build_model(self):
             ])
             self._forward_func = self._forward_v1_5
         else:
-            self.image_processor = CLIPImageProcessor.from_pretrained(
-                self.model_path)
+            self.image_processor = image_processor
             self._forward_func = self._forward
 
     def _preprocess_v1_5(self, images: List[Image], params: List[Dict] = None):
diff --git a/tests/test_lmdeploy/test_model.py b/tests/test_lmdeploy/test_model.py
index a38971e4d0..7e3e71793d 100644
--- a/tests/test_lmdeploy/test_model.py
+++ b/tests/test_lmdeploy/test_model.py
@@ -475,6 +475,25 @@ def test_internvl2():
     assert res == expected
 
 
+def test_chemvlm():
+    deduced_name = best_match_model('AI4Chem/ChemVLM-8B')
+
+    assert deduced_name == 'internvl-internlm2'
+    model = MODELS.get(deduced_name)()
+    messages = [{
+        'role': 'user',
+        'content': 'who are you'
+    }, {
+        'role': 'assistant',
+        'content': 'I am an AI'
+    }]
+    expected = '<|im_start|>system\nYou are an AI assistant whose name is '\
+        'InternLM (书生·浦语).<|im_end|>\n<|im_start|>user\nwho are you'\
+        '<|im_end|>\n<|im_start|>assistant\nI am an AI'
+    res = model.messages2prompt(messages)
+    assert res == expected
+
+
 def test_codegeex4():
     model_path_and_name = 'THUDM/codegeex4-all-9b'
     deduced_name = best_match_model(model_path_and_name)