Skip to content

Commit

Permalink
Manual login for hf token (#688)
Browse files Browse the repository at this point in the history
Check if hugging face token is present and use it to login if it is. 

A warning is logged from vLLM claiming that this login is redundant, but
without it gated downloads don't work...
  • Loading branch information
dleviminzi authored Nov 4, 2024
1 parent 1064bce commit 50dddae
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
2 changes: 1 addition & 1 deletion sdk/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "beta9"
version = "0.1.106"
version = "0.1.107"
description = ""
authors = ["beam.cloud <[email protected]>"]
packages = [
Expand Down
14 changes: 11 additions & 3 deletions sdk/src/beta9/abstractions/integrations/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ class VLLM(ASGI):
The type or name of the GPU device to be used for GPU-accelerated tasks. If not
applicable or no GPU required, leave it empty. Default is [GpuType.NoGPU](#gputype).
image (Union[Image, dict]):
The container image used for the task execution. If you override this, it must include
the vllm package and the fastapi package.
The container image used for the task execution. Whatever you pass here will have an additional `add_python_packages` call
with `["fastapi", "vllm", "huggingface_hub"]` added to it to ensure that we can run vLLM in the container.
workers (int):
The number of workers to run in the container. Default is 1.
concurrent_requests (int):
Expand Down Expand Up @@ -243,7 +243,7 @@ def __init__(
cpu: Union[int, float, str] = 1.0,
memory: Union[int, str] = 128,
gpu: Union[GpuTypeAlias, List[GpuTypeAlias]] = GpuType.NoGPU,
image: Image = Image(python_version="python3.11").add_python_packages(["fastapi", "vllm"]),
image: Image = Image(python_version="python3.11"),
workers: int = 1,
concurrent_requests: int = 1,
keep_warm_seconds: int = 60,
Expand All @@ -261,6 +261,8 @@ def __init__(
# Add default vllm cache volume to preserve it if custom volumes are specified for chat templates
volumes.append(Volume(name="vllm_cache", mount_path=DEFAULT_VLLM_CACHE_DIR))

image = image.add_python_packages(["fastapi", "vllm", "huggingface_hub"])

super().__init__(
cpu=cpu,
memory=memory,
Expand Down Expand Up @@ -330,6 +332,12 @@ def __call__(self, *args: Any, **kwargs: Any):
f"{self.engine_config.download_dir}/{chat_template_filename}"
)

if "HF_TOKEN" in os.environ:
hf_token = os.environ["HF_TOKEN"]
import huggingface_hub

huggingface_hub.login(hf_token)

app = FastAPI()

@app.get("/health")
Expand Down

0 comments on commit 50dddae

Please sign in to comment.