diff --git a/lmdeploy/archs.py b/lmdeploy/archs.py index 53039bdb53..760a82b1c9 100644 --- a/lmdeploy/archs.py +++ b/lmdeploy/archs.py @@ -128,7 +128,8 @@ def check_vl_llm(config: dict) -> bool: return True elif arch == 'MultiModalityCausalLM' and 'language_config' in config: return True - elif arch == 'ChatGLMModel' and 'vision_config' in config: + elif arch in ['ChatGLMModel', 'ChatGLMForConditionalGeneration' + ] and 'vision_config' in config: return True elif arch in supported_archs: return True diff --git a/lmdeploy/lite/apis/calibrate.py b/lmdeploy/lite/apis/calibrate.py index 85467997e3..007f831a70 100644 --- a/lmdeploy/lite/apis/calibrate.py +++ b/lmdeploy/lite/apis/calibrate.py @@ -254,9 +254,9 @@ def calibrate(model: str, elif model_type == 'vlm': vl_model = load_vl_model(model, backend=None, with_llm=True).vl_model model = vl_model - if hasattr(vl_model, 'language_model'): # deepseek vl + if hasattr(vl_model, 'language_model'): # deepseek-vl, ... model = vl_model.language_model - if hasattr(vl_model, 'llm'): # MiniCPMV + if hasattr(vl_model, 'llm'): # MiniCPMV, ... model = vl_model.llm model.config.use_cache = False if dtype == 'float16': diff --git a/lmdeploy/vl/model/builder.py b/lmdeploy/vl/model/builder.py index 59420c9bd5..00e668c034 100644 --- a/lmdeploy/vl/model/builder.py +++ b/lmdeploy/vl/model/builder.py @@ -73,8 +73,8 @@ def load_vl_model(model_path: str, if backend == 'turbomind' or with_llm: model.build_model() return model - except Exception: - logger.error(f'matching vision model: {name} failed') + except Exception as e: + logger.error(f'build vision model {name} failed, {e}') raise raise ValueError(f'unsupported vl model with config {hf_config}') diff --git a/lmdeploy/vl/model/cogvlm.py b/lmdeploy/vl/model/cogvlm.py index abeeff31ce..07d97153f9 100644 --- a/lmdeploy/vl/model/cogvlm.py +++ b/lmdeploy/vl/model/cogvlm.py @@ -27,6 +27,14 @@ def build_preprocessor(self): patch_size = self.hf_config.vision_config['patch_size'] self.n_token_per_image = 2 + (image_size // patch_size // 2)**2 + def build_model(self): + if self.with_llm: + from transformers import AutoModelForCausalLM + self.vl_model = AutoModelForCausalLM.from_pretrained( + self.model_path, device_map='cpu', trust_remote_code=True) + else: + raise NotImplementedError('turbomind has not supported cogvlm yet') + def preprocess(self, messages: List[Dict]) -> List[Dict]: """refer to the spec of `super().preprocess`""" images = self.collect_images(messages) diff --git a/lmdeploy/vl/model/glm_4v.py b/lmdeploy/vl/model/glm_4v.py index 7cdd96d5dc..813813bf09 100644 --- a/lmdeploy/vl/model/glm_4v.py +++ b/lmdeploy/vl/model/glm_4v.py @@ -13,13 +13,13 @@ class GLM4VisionModel(VisonModel): """glm-4v-9b vision model.""" - _arch = 'ChatGLMModel' + _arch = ['ChatGLMModel', 'ChatGLMForConditionalGeneration'] @classmethod def match(cls, config: AutoConfig): """check whether the config match the model.""" arch = config.architectures[0] - if arch == cls._arch and hasattr(config, 'vision_config'): + if arch in cls._arch and hasattr(config, 'vision_config'): return True return False @@ -37,6 +37,14 @@ def build_preprocessor(self): patch_size = self.hf_config.vision_config['patch_size'] self.n_token_per_image = 2 + (image_size // patch_size // 2)**2 + def build_model(self): + if self.with_llm: + from transformers import AutoModelForCausalLM + self.vl_model = AutoModelForCausalLM.from_pretrained( + self.model_path, device_map='cpu', trust_remote_code=True) + else: + raise NotImplementedError('turbomind has not supported glm4v yet') + def preprocess(self, messages: List[Dict]) -> List[Dict]: """refers to the spec of `super.preprocess()""" outputs = [] diff --git a/lmdeploy/vl/model/mllama.py b/lmdeploy/vl/model/mllama.py index 13a6b3a480..0cae71cd6c 100644 --- a/lmdeploy/vl/model/mllama.py +++ b/lmdeploy/vl/model/mllama.py @@ -5,6 +5,15 @@ from lmdeploy.vl.model.base import VISION_MODELS, VisonModel +def check_transformers(): + try: + from transformers import MllamaForConditionalGeneration # noqa: F401 + except ImportError: + raise ImportError( + 'please install latest transformers by ' + 'pip install git+https://github.com/huggingface/transformers.git') + + @VISION_MODELS.register_module() class MllamaVLModel(VisonModel): """llama3.2 model.""" @@ -31,6 +40,16 @@ def preprocess(self, messages: List[Dict]) -> List[Dict]: messages.append(dict(role='preprocess', content=outputs)) return messages + def build_model(self): + check_transformers() + if self.with_llm: + from transformers import MllamaForConditionalGeneration + model = MllamaForConditionalGeneration.from_pretrained( + self.model_path, device_map='cpu') + self.vl_model = model + else: + raise NotImplementedError('turbomind has not supported mllama yet') + @staticmethod def proc_messages(messages, chat_template, sequence_start): """apply chat template to get the prompt.""" diff --git a/lmdeploy/vl/model/phi3_vision.py b/lmdeploy/vl/model/phi3_vision.py index 80204a2dee..ff00b5d1d9 100644 --- a/lmdeploy/vl/model/phi3_vision.py +++ b/lmdeploy/vl/model/phi3_vision.py @@ -21,6 +21,14 @@ def build_preprocessor(self): processor.tokenizer = None self.processor = processor + def build_model(self): + if self.with_llm: + from transformers import AutoModelForCausalLM + self.vl_model = AutoModelForCausalLM.from_pretrained( + self.model_path, device_map='cpu', trust_remote_code=True) + else: + raise NotImplementedError('turbomind has not supported phi3v yet') + def preprocess(self, messages: List[Dict]) -> List[Dict]: """refers to `super.preprocess() for spec.""" images = self.collect_images(messages) diff --git a/lmdeploy/vl/model/qwen2.py b/lmdeploy/vl/model/qwen2.py index 51bb1f8ccb..ed9da332e0 100644 --- a/lmdeploy/vl/model/qwen2.py +++ b/lmdeploy/vl/model/qwen2.py @@ -64,6 +64,16 @@ def preprocess(self, messages: List[Dict]) -> List[Dict]: messages.append(dict(role='preprocess', content=outputs)) return messages + def build_model(self): + check_qwen_vl_deps_install() + from transformers import Qwen2VLForConditionalGeneration + if self.with_llm: + self.vl_model = Qwen2VLForConditionalGeneration.from_pretrained( + self.model_path, device_map='cpu') + else: + raise NotImplementedError( + 'turbomind has not supported qwen2-vl yet') + @torch.no_grad() def forward(self, messages: List[Dict],