Skip to content

Commit

Permalink
[Bugfix] Set enforce_eager automatically for mllama (#12127)
Browse files Browse the repository at this point in the history
Signed-off-by: Chen Zhang <[email protected]>
  • Loading branch information
heheda12345 authored Jan 16, 2025
1 parent 62b06ba commit d06e824
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
1 change: 0 additions & 1 deletion examples/offline_inference/vision_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ def run_mllama(question: str, modality: str):
model=model_name,
max_model_len=4096,
max_num_seqs=16,
enforce_eager=True,
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
)

Expand Down
1 change: 0 additions & 1 deletion examples/offline_inference/vision_language_multi_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
model=model_name,
max_model_len=4096,
max_num_seqs=16,
enforce_eager=True,
limit_mm_per_prompt={"image": len(image_urls)},
)

Expand Down
8 changes: 5 additions & 3 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,10 +607,12 @@ def _verify_cuda_graph(self) -> None:
self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
self.max_model_len)

if (self.hf_config.model_type == 'deepseek_v3'
MODEL_NOT_SUPPORT_CUDA_GRAPH = ['deepseek_v3', 'mllama']
if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH
and not self.enforce_eager):
logger.warning("CUDA graph is not supported for Deepseek V3 yet, "
"fallback to the eager mode.")
logger.warning(
"CUDA graph is not supported for %s yet, fallback to the eager "
"mode.", self.hf_config.model_type)
self.enforce_eager = True

def _verify_bnb_config(self) -> None:
Expand Down

0 comments on commit d06e824

Please sign in to comment.