From db8cdc1c9c2df1d8b9cac319a759554d77a7a7f1 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 23 Jan 2025 00:43:09 +0800 Subject: [PATCH] add env var Signed-off-by: youkaichao --- vllm/entrypoints/openai/api_server.py | 21 +++++++++++---------- vllm/envs.py | 7 +++++++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 7015c610ad854..9bb11907f7402 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -518,16 +518,17 @@ async def create_score_v1(request: ScoreRequest, raw_request: Request): }, } - -@router.post("/reset_prefix_cache") -async def reset_prefix_cache(raw_request: Request): - """ - Reset the prefix cache. Note that we currently do not check if the - prefix cache is successfully reset in the API server. - """ - logger.info("Resetting prefix cache...") - await engine_client(raw_request).reset_prefix_cache() - return Response(status_code=200) +if envs.VLLM_SERVER_DEV_MODE: + + @router.post("/reset_prefix_cache") + async def reset_prefix_cache(raw_request: Request): + """ + Reset the prefix cache. Note that we currently do not check if the + prefix cache is successfully reset in the API server. + """ + logger.info("Resetting prefix cache...") + await engine_client(raw_request).reset_prefix_cache() + return Response(status_code=200) @router.post("/invocations") diff --git a/vllm/envs.py b/vllm/envs.py index b7b597ea15af3..1e68326b2d908 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -72,6 +72,7 @@ VLLM_ENABLE_V1_MULTIPROCESSING: bool = True VLLM_LOG_BATCHSIZE_INTERVAL: float = -1 VLLM_DISABLE_COMPILE_CACHE: bool = False + VLLM_SERVER_DEV_MODE: bool = False def get_default_cache_root(): @@ -467,6 +468,12 @@ def get_default_config_root(): lambda: float(os.getenv("VLLM_LOG_BATCHSIZE_INTERVAL", "-1")), "VLLM_DISABLE_COMPILE_CACHE": lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))), + + # If set, vllm will run in development mode, which will enable + # some additional endpoints for developing and debugging, + # e.g. `/reset_prefix_cache` + "VLLM_SERVER_DEV_MODE": + lambda: bool(int(os.getenv("VLLM_SERVER_DEV_MODE", "0"))), } # end-env-vars-definition