From 2f84a509fd1b4e6c7644386eb0a8a9020a7691f9 Mon Sep 17 00:00:00 2001 From: xffxff <1247714429@qq.com> Date: Tue, 19 Nov 2024 07:54:47 +0000 Subject: [PATCH] upgrade vllm from 0.6.2 to the latest version --- aria/vllm/aria.py | 29 ++++++++++++++++++----------- pyproject.toml | 8 ++++---- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/aria/vllm/aria.py b/aria/vllm/aria.py index 1da24b6..7c0c31e 100644 --- a/aria/vllm/aria.py +++ b/aria/vllm/aria.py @@ -27,13 +27,14 @@ from transformers.utils import logging from vllm.attention import AttentionMetadata from vllm.config import CacheConfig, LoRAConfig, MultiModalConfig +from vllm.config import VllmConfig from vllm.distributed import ( get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce, ) -from vllm.inputs import INPUT_REGISTRY, LLMInputs +from vllm.inputs import INPUT_REGISTRY, LLMInputs, token_inputs from vllm.model_executor.layers.fused_moe import fused_moe from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.logits_processor import LogitsProcessor @@ -337,6 +338,10 @@ def __init__( prefix: str = "", ) -> None: nn.Module.__init__(self) + + # FIXME(zhoufan): this is a hack to avoid the error: AttributeError: 'AriaMoELMModel' object has no attribute 'do_not_compile'. + self.do_not_compile = True + self.config = config self.padding_idx = config.pad_token_id lora_vocab = ( @@ -679,7 +684,7 @@ def input_processor(ctx, llm_inputs): # TODO: Supports dynamic image size support setattr(model_config.multimodal_config, "max_image_size", max(max_image_size)) - new_prompt, new_token_ids = repeat_and_pad_placeholder_tokens( + new_prompt, new_token_ids, ranges = repeat_and_pad_placeholder_tokens( tokenizer, llm_inputs.get("prompt"), llm_inputs["prompt_token_ids"], @@ -687,11 +692,7 @@ def input_processor(ctx, llm_inputs): repeat_count=image_feature_sizes, ) - return LLMInputs( - prompt=new_prompt, - prompt_token_ids=new_token_ids, - multi_modal_data=multi_modal_data, - ) + return token_inputs(prompt_token_ids=new_token_ids, prompt=new_prompt, multi_modal_data=multi_modal_data, multi_modal_placeholders={"image": ranges}) # adapted from transformers.models.llava.modeling_llava.LlavaForConditionalGeneration @@ -708,12 +709,18 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal): def __init__( self, - config: AriaConfig, - multimodal_config: MultiModalConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, + vllm_config: VllmConfig, + prefix: str = "", + # config: AriaConfig, + # multimodal_config: MultiModalConfig, + # cache_config: Optional[CacheConfig] = None, + # quant_config: Optional[QuantizationConfig] = None, ): super().__init__() + config = vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + # prepare the image_size to tokens mapping for the image preprocess, see input_processor setattr( config, diff --git a/pyproject.toml b/pyproject.toml index 5d2bdea..f556c17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,13 +7,13 @@ authors = [ readme = "README.md" requires-python = ">=3.10" dependencies = [ - "torch==2.4.0", - "torchvision==0.19.0", + "torch==2.5.1", + "torchvision==0.20.1", "accelerate==0.34.1", "deepspeed==0.15.0", "peft==0.12.0", "sentencepiece==0.2.0", - "transformers==4.45.0", + "transformers==4.46.3", "trl==0.9.6", "pillow==10.4.0", "wandb==0.18.1", @@ -32,7 +32,7 @@ dev = [ "pytest==8.3.3", ] vllm = [ - "vllm==0.6.2" + "vllm==0.6.4.post1" ] grouped_gemm = [ "grouped_gemm==0.1.6"