From 3d56fa92dad30292f38537997c6fb8769d71914f Mon Sep 17 00:00:00 2001 From: Daniel Levi-Minzi <51272568+dleviminzi@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:46:05 -0500 Subject: [PATCH] Fix: Pin latest versions and update engine args for vllm (#756) I'm pinning the versions in this PR so that we can have new builds triggered when updating these deps. I also added some of the new engine args added in the latest release of vllm. --- sdk/src/beta9/abstractions/integrations/vllm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sdk/src/beta9/abstractions/integrations/vllm.py b/sdk/src/beta9/abstractions/integrations/vllm.py index 7bc8625a9..f5b25dd60 100644 --- a/sdk/src/beta9/abstractions/integrations/vllm.py +++ b/sdk/src/beta9/abstractions/integrations/vllm.py @@ -37,6 +37,10 @@ class VLLMArgs: prompt_adapters: Optional[List[str]] = None chat_template: Optional[str] = None chat_template_url: Optional[str] = None + chat_template_text_format: str = "string" + allowed_local_media_path: str = "" + hf_overrides: Optional[Union[Dict[str, Any], Callable[[Any], Any]]] = None + enable_lora_bias: bool = False return_tokens_as_token_ids: bool = False enable_auto_tool_choice: bool = False tool_call_parser: Optional[str] = None @@ -74,7 +78,6 @@ class VLLMArgs: max_num_batched_tokens: Optional[int] = None max_num_seqs: int = 256 max_logprobs: int = 20 - disable_log_stats: bool = False revision: Optional[str] = None code_revision: Optional[str] = None rope_scaling: Optional[dict] = None @@ -209,7 +212,9 @@ def __init__( # Add default vllm cache volume to preserve it if custom volumes are specified for chat templates volumes.append(Volume(name="vllm_cache", mount_path=DEFAULT_VLLM_CACHE_DIR)) - image = image.add_python_packages(["fastapi", "vllm", "huggingface_hub"]) + image = image.add_python_packages( + ["fastapi", "vllm==0.6.4.post1", "huggingface_hub==0.26.3"] + ) super().__init__( cpu=cpu,