Skip to content

Commit

Permalink
feat: add kv-cache-dtype to list of environment variables
Browse files Browse the repository at this point in the history
  • Loading branch information
hommayushi3 committed Aug 9, 2024
1 parent 6d4060c commit 8729cfb
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 3 additions & 1 deletion endpoints-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ QUANTIZATION=${QUANTIZATION:-}
DTYPE=${DTYPE:-"auto"}
TRUST_REMOTE_CODE=${TRUST_REMOTE_CODE:-false}
GUIDED_DECODING_BACKEND=${GUIDED_DECODING_BACKEND:-"outlines"}
KV_CACHE_DTYPE=${KV_CACHE_DTYPE:-"auto"}

# Entrypoint for the OpenAI API server
CMD="vllm serve $MODEL_PATH --host '0.0.0.0' --port 80 --tensor-parallel-size '$NUM_SHARD' --dtype $DTYPE --guided-decoding-backend $GUIDED_DECODING_BACKEND"
CMD="vllm serve $MODEL_PATH --host '0.0.0.0' --port 80 --tensor-parallel-size '$NUM_SHARD'"
CMD="$CMD --dtype $DTYPE --guided-decoding-backend $GUIDED_DECODING_BACKEND --kv-cache-dtype $KV_CACHE_DTYPE"

# Append --max-model-len if its value is not -1
if [ "$MAX_MODEL_LEN" -ne -1 ]; then
Expand Down
2 changes: 1 addition & 1 deletion examples/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
],
max_tokens=500,
temperature=0.0,
stream=STREAM
stream=STREAM,
)

if STREAM:
Expand Down

0 comments on commit 8729cfb

Please sign in to comment.