Manual login for hf token (#688)

Check if hugging face token is present and use it to login if it is. A warning is logged from vLLM claiming that this login is redundant, but without it gated downloads don't work...
beam-cloud · Nov 4, 2024 · 50dddae · 50dddae
1 parent 1064bce
commit 50dddae
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 4 deletions.
diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "beta9"
-version = "0.1.106"
+version = "0.1.107"
 description = ""
 authors = ["beam.cloud <[email protected]>"]
 packages = [

diff --git a/sdk/src/beta9/abstractions/integrations/vllm.py b/sdk/src/beta9/abstractions/integrations/vllm.py
@@ -196,8 +196,8 @@ class VLLM(ASGI):
             The type or name of the GPU device to be used for GPU-accelerated tasks. If not
             applicable or no GPU required, leave it empty. Default is [GpuType.NoGPU](#gputype).
         image (Union[Image, dict]):
-            The container image used for the task execution. If you override this, it must include
-            the vllm package and the fastapi package.
+            The container image used for the task execution. Whatever you pass here will have an additional `add_python_packages` call
+            with `["fastapi", "vllm", "huggingface_hub"]` added to it to ensure that we can run vLLM in the container.
         workers (int):
             The number of workers to run in the container. Default is 1.
         concurrent_requests (int):
@@ -243,7 +243,7 @@ def __init__(
         cpu: Union[int, float, str] = 1.0,
         memory: Union[int, str] = 128,
         gpu: Union[GpuTypeAlias, List[GpuTypeAlias]] = GpuType.NoGPU,
-        image: Image = Image(python_version="python3.11").add_python_packages(["fastapi", "vllm"]),
+        image: Image = Image(python_version="python3.11"),
         workers: int = 1,
         concurrent_requests: int = 1,
         keep_warm_seconds: int = 60,
@@ -261,6 +261,8 @@ def __init__(
             # Add default vllm cache volume to preserve it if custom volumes are specified for chat templates
             volumes.append(Volume(name="vllm_cache", mount_path=DEFAULT_VLLM_CACHE_DIR))
 
+        image = image.add_python_packages(["fastapi", "vllm", "huggingface_hub"])
+
         super().__init__(
             cpu=cpu,
             memory=memory,
@@ -330,6 +332,12 @@ def __call__(self, *args: Any, **kwargs: Any):
                 f"{self.engine_config.download_dir}/{chat_template_filename}"
             )
 
+        if "HF_TOKEN" in os.environ:
+            hf_token = os.environ["HF_TOKEN"]
+            import huggingface_hub
+
+            huggingface_hub.login(hf_token)
+
         app = FastAPI()
 
         @app.get("/health")