Skip to content

Commit

Permalink
[side-effect] bring back quantization of qwen2-vl, glm4v and etc. (#2954
Browse files Browse the repository at this point in the history
)

* bring back quantization of qwen2-vl, glm4v and etc.

* fix typo

* update log
  • Loading branch information
lvhan028 authored Dec 26, 2024
1 parent d9b8372 commit 4e5cc16
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 7 deletions.
3 changes: 2 additions & 1 deletion lmdeploy/archs.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def check_vl_llm(config: dict) -> bool:
return True
elif arch == 'MultiModalityCausalLM' and 'language_config' in config:
return True
elif arch == 'ChatGLMModel' and 'vision_config' in config:
elif arch in ['ChatGLMModel', 'ChatGLMForConditionalGeneration'
] and 'vision_config' in config:
return True
elif arch in supported_archs:
return True
Expand Down
4 changes: 2 additions & 2 deletions lmdeploy/lite/apis/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ def calibrate(model: str,
elif model_type == 'vlm':
vl_model = load_vl_model(model, backend=None, with_llm=True).vl_model
model = vl_model
if hasattr(vl_model, 'language_model'): # deepseek vl
if hasattr(vl_model, 'language_model'): # deepseek-vl, ...
model = vl_model.language_model
if hasattr(vl_model, 'llm'): # MiniCPMV
if hasattr(vl_model, 'llm'): # MiniCPMV, ...
model = vl_model.llm
model.config.use_cache = False
if dtype == 'float16':
Expand Down
4 changes: 2 additions & 2 deletions lmdeploy/vl/model/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def load_vl_model(model_path: str,
if backend == 'turbomind' or with_llm:
model.build_model()
return model
except Exception:
logger.error(f'matching vision model: {name} failed')
except Exception as e:
logger.error(f'build vision model {name} failed, {e}')
raise

raise ValueError(f'unsupported vl model with config {hf_config}')
8 changes: 8 additions & 0 deletions lmdeploy/vl/model/cogvlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ def build_preprocessor(self):
patch_size = self.hf_config.vision_config['patch_size']
self.n_token_per_image = 2 + (image_size // patch_size // 2)**2

def build_model(self):
if self.with_llm:
from transformers import AutoModelForCausalLM
self.vl_model = AutoModelForCausalLM.from_pretrained(
self.model_path, device_map='cpu', trust_remote_code=True)
else:
raise NotImplementedError('turbomind has not supported cogvlm yet')

def preprocess(self, messages: List[Dict]) -> List[Dict]:
"""refer to the spec of `super().preprocess`"""
images = self.collect_images(messages)
Expand Down
12 changes: 10 additions & 2 deletions lmdeploy/vl/model/glm_4v.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
class GLM4VisionModel(VisonModel):
"""glm-4v-9b vision model."""

_arch = 'ChatGLMModel'
_arch = ['ChatGLMModel', 'ChatGLMForConditionalGeneration']

@classmethod
def match(cls, config: AutoConfig):
"""check whether the config match the model."""
arch = config.architectures[0]
if arch == cls._arch and hasattr(config, 'vision_config'):
if arch in cls._arch and hasattr(config, 'vision_config'):
return True
return False

Expand All @@ -37,6 +37,14 @@ def build_preprocessor(self):
patch_size = self.hf_config.vision_config['patch_size']
self.n_token_per_image = 2 + (image_size // patch_size // 2)**2

def build_model(self):
if self.with_llm:
from transformers import AutoModelForCausalLM
self.vl_model = AutoModelForCausalLM.from_pretrained(
self.model_path, device_map='cpu', trust_remote_code=True)
else:
raise NotImplementedError('turbomind has not supported glm4v yet')

def preprocess(self, messages: List[Dict]) -> List[Dict]:
"""refers to the spec of `super.preprocess()"""
outputs = []
Expand Down
19 changes: 19 additions & 0 deletions lmdeploy/vl/model/mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
from lmdeploy.vl.model.base import VISION_MODELS, VisonModel


def check_transformers():
try:
from transformers import MllamaForConditionalGeneration # noqa: F401
except ImportError:
raise ImportError(
'please install latest transformers by '
'pip install git+https://github.com/huggingface/transformers.git')


@VISION_MODELS.register_module()
class MllamaVLModel(VisonModel):
"""llama3.2 model."""
Expand All @@ -31,6 +40,16 @@ def preprocess(self, messages: List[Dict]) -> List[Dict]:
messages.append(dict(role='preprocess', content=outputs))
return messages

def build_model(self):
check_transformers()
if self.with_llm:
from transformers import MllamaForConditionalGeneration
model = MllamaForConditionalGeneration.from_pretrained(
self.model_path, device_map='cpu')
self.vl_model = model
else:
raise NotImplementedError('turbomind has not supported mllama yet')

@staticmethod
def proc_messages(messages, chat_template, sequence_start):
"""apply chat template to get the prompt."""
Expand Down
8 changes: 8 additions & 0 deletions lmdeploy/vl/model/phi3_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ def build_preprocessor(self):
processor.tokenizer = None
self.processor = processor

def build_model(self):
if self.with_llm:
from transformers import AutoModelForCausalLM
self.vl_model = AutoModelForCausalLM.from_pretrained(
self.model_path, device_map='cpu', trust_remote_code=True)
else:
raise NotImplementedError('turbomind has not supported phi3v yet')

def preprocess(self, messages: List[Dict]) -> List[Dict]:
"""refers to `super.preprocess() for spec."""
images = self.collect_images(messages)
Expand Down
10 changes: 10 additions & 0 deletions lmdeploy/vl/model/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ def preprocess(self, messages: List[Dict]) -> List[Dict]:
messages.append(dict(role='preprocess', content=outputs))
return messages

def build_model(self):
check_qwen_vl_deps_install()
from transformers import Qwen2VLForConditionalGeneration
if self.with_llm:
self.vl_model = Qwen2VLForConditionalGeneration.from_pretrained(
self.model_path, device_map='cpu')
else:
raise NotImplementedError(
'turbomind has not supported qwen2-vl yet')

@torch.no_grad()
def forward(self,
messages: List[Dict],
Expand Down

0 comments on commit 4e5cc16

Please sign in to comment.