diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 158705769b5e0..9521acddc5f36 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -27,7 +27,7 @@ class RequestFuncInput: output_len: int model: str model_name: Optional[str] = None - best_of: int = 1 + best_of: Optional[int] = None if os.environ.get("VLLM_USE_V1", 0) else 1 logprobs: Optional[int] = None extra_body: Optional[dict] = None multi_modal_content: Optional[dict] = None diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 16ec0a4817a26..f68e2fd0b6297 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1084,7 +1084,7 @@ def main(args: argparse.Namespace): parser.add_argument( "--best-of", type=int, - default=1, + default=None if os.environ.get("VLLM_USE_V1", 0) else 1, help="Generates `best_of` sequences per prompt and " "returns the best one.", )