Fix: Pin latest versions and update engine args for vllm (#756)

I'm pinning the versions in this PR so that we can have new builds triggered when updating these deps. I also added some of the new engine args added in the latest release of vllm.
beam-cloud · Dec 4, 2024 · 3d56fa9 · 3d56fa9
1 parent 04c5f62
commit 3d56fa9
Showing 1 changed file with 7 additions and 2 deletions.
diff --git a/sdk/src/beta9/abstractions/integrations/vllm.py b/sdk/src/beta9/abstractions/integrations/vllm.py
@@ -37,6 +37,10 @@ class VLLMArgs:
     prompt_adapters: Optional[List[str]] = None
     chat_template: Optional[str] = None
     chat_template_url: Optional[str] = None
+    chat_template_text_format: str = "string"
+    allowed_local_media_path: str = ""
+    hf_overrides: Optional[Union[Dict[str, Any], Callable[[Any], Any]]] = None
+    enable_lora_bias: bool = False
     return_tokens_as_token_ids: bool = False
     enable_auto_tool_choice: bool = False
     tool_call_parser: Optional[str] = None
@@ -74,7 +78,6 @@ class VLLMArgs:
     max_num_batched_tokens: Optional[int] = None
     max_num_seqs: int = 256
     max_logprobs: int = 20
-    disable_log_stats: bool = False
     revision: Optional[str] = None
     code_revision: Optional[str] = None
     rope_scaling: Optional[dict] = None
@@ -209,7 +212,9 @@ def __init__(
             # Add default vllm cache volume to preserve it if custom volumes are specified for chat templates
             volumes.append(Volume(name="vllm_cache", mount_path=DEFAULT_VLLM_CACHE_DIR))
 
-        image = image.add_python_packages(["fastapi", "vllm", "huggingface_hub"])
+        image = image.add_python_packages(
+            ["fastapi", "vllm==0.6.4.post1", "huggingface_hub==0.26.3"]
+        )
 
         super().__init__(
             cpu=cpu,