fix: moved to required arg after vllm serve

hommayushi3 · Aug 9, 2024 · 7d07aa4 · 7d07aa4
1 parent ba79708
commit 7d07aa4
Show file tree

Hide file tree

Showing 5 changed files with 385 additions and 1 deletion.
diff --git a/endpoints-entrypoint.sh b/endpoints-entrypoint.sh
@@ -10,7 +10,7 @@ TRUST_REMOTE_CODE=${TRUST_REMOTE_CODE:-false}
 GUIDED_DECODING_BACKEND=${GUIDED_DECODING_BACKEND:-"outlines"}
 
 # Entrypoint for the OpenAI API server
-CMD="vllm serve --host '0.0.0.0' --port 80 --model '$MODEL_PATH' --tensor-parallel-size '$NUM_SHARD' --dtype $DTYPE --guided-decoding-backend $GUIDED_DECODING_BACKEND"
+CMD="vllm serve $MODEL_PATH --host '0.0.0.0' --port 80 --tensor-parallel-size '$NUM_SHARD' --dtype $DTYPE --guided-decoding-backend $GUIDED_DECODING_BACKEND"
 
 # Append --max-model-len if its value is not -1
 if [ "$MAX_MODEL_LEN" -ne -1 ]; then

diff --git a/examples/deploy.py b/examples/deploy.py
diff --git a/examples/inference.py b/examples/inference.py
@@ -0,0 +1 @@
+from huggingface_hub import InferenceClient