From 3c78e5d7ff9544a146129f66b5522ec3c687ec81 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 10:42:39 -0800 Subject: [PATCH 1/5] [Benchmark] Unset best_of when running v1 benchmark The check is introduced by https://github.com/vllm-project/vllm/pull/14159 and it will fail v1 benchmark with `VLLM V1 does not yet support best_of` error as the parameter is not yet supported Signed-off-by: Huy Do --- benchmarks/backend_request_func.py | 34 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 158705769b5e0..eed4668786431 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -5,7 +5,7 @@ import sys import time import traceback -from dataclasses import dataclass, field +from dataclasses import dataclass, field, make_dataclass from typing import Optional, Union import aiohttp @@ -18,20 +18,24 @@ AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) - -@dataclass -class RequestFuncInput: - prompt: str - api_url: str - prompt_len: int - output_len: int - model: str - model_name: Optional[str] = None - best_of: int = 1 - logprobs: Optional[int] = None - extra_body: Optional[dict] = None - multi_modal_content: Optional[dict] = None - ignore_eos: bool = False +RequestFuncInput = make_dataclass( + "RequestFuncInput", + [ + ("prompt", str), + ("api_url", str), + ("prompt_len", int), + ("output_len", int), + ("model", str), + ("model_name", Optional[str], field(default=None)), + ("logprobs", Optional[int], field(default=None)), + ("extra_body", Optional[dict], field(default=None)), + ("multi_modal_content", Optional[dict], field(default=None)), + ("ignore_eos", bool, field(default=False)), + ].extend( + # From https://github.com/vllm-project/vllm/pull/14159, v1 doesn't yet + # support best_of parameter + [] if os.environ.get("VLLM_USE_V1", 0) else [("best_of", int, + field(default=1))])) @dataclass From 39ba163abc3d56133e7f618c97c68cff63dd9769 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 10:56:52 -0800 Subject: [PATCH 2/5] Fix lint Signed-off-by: Huy Do --- benchmarks/backend_request_func.py | 37 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index eed4668786431..643a0b13bb16e 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -18,24 +18,25 @@ AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) -RequestFuncInput = make_dataclass( - "RequestFuncInput", - [ - ("prompt", str), - ("api_url", str), - ("prompt_len", int), - ("output_len", int), - ("model", str), - ("model_name", Optional[str], field(default=None)), - ("logprobs", Optional[int], field(default=None)), - ("extra_body", Optional[dict], field(default=None)), - ("multi_modal_content", Optional[dict], field(default=None)), - ("ignore_eos", bool, field(default=False)), - ].extend( - # From https://github.com/vllm-project/vllm/pull/14159, v1 doesn't yet - # support best_of parameter - [] if os.environ.get("VLLM_USE_V1", 0) else [("best_of", int, - field(default=1))])) +REQUEST_FUNC_INPUT_FIELDS = [ + ("prompt", str), + ("api_url", str), + ("prompt_len", int), + ("output_len", int), + ("model", str), + ("model_name", Optional[str], field(default=None)), + ("logprobs", Optional[int], field(default=None)), + ("extra_body", Optional[dict], field(default=None)), + ("multi_modal_content", Optional[dict], field(default=None)), + ("ignore_eos", bool, field(default=False)), +] +if not os.environ.get("VLLM_USE_V1", 0): + # From https://github.com/vllm-project/vllm/pull/14159, v1 doesn't yet + # support best_of parameter + REQUEST_FUNC_INPUT_FIELDS.extend([("best_of", int, field(default=1))]) + +RequestFuncInput = make_dataclass("RequestFuncInput", + REQUEST_FUNC_INPUT_FIELDS) @dataclass From 4da9ebc5855ea688d59d3bfb62bd9386850a0999 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 10:59:56 -0800 Subject: [PATCH 3/5] Use append Signed-off-by: Huy Do --- benchmarks/backend_request_func.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 643a0b13bb16e..5cbfbfaf5b6cd 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -33,7 +33,7 @@ if not os.environ.get("VLLM_USE_V1", 0): # From https://github.com/vllm-project/vllm/pull/14159, v1 doesn't yet # support best_of parameter - REQUEST_FUNC_INPUT_FIELDS.extend([("best_of", int, field(default=1))]) + REQUEST_FUNC_INPUT_FIELDS.append(("best_of", int, field(default=1))) RequestFuncInput = make_dataclass("RequestFuncInput", REQUEST_FUNC_INPUT_FIELDS) From 2683e6f58497c4f88f805755c902ebc2d1594fb3 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 11:08:41 -0800 Subject: [PATCH 4/5] Use the simpler approach Signed-off-by: Huy Do --- benchmarks/backend_request_func.py | 35 +++++++++++++----------------- benchmarks/benchmark_serving.py | 2 +- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 5cbfbfaf5b6cd..9521acddc5f36 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -5,7 +5,7 @@ import sys import time import traceback -from dataclasses import dataclass, field, make_dataclass +from dataclasses import dataclass, field from typing import Optional, Union import aiohttp @@ -18,25 +18,20 @@ AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) -REQUEST_FUNC_INPUT_FIELDS = [ - ("prompt", str), - ("api_url", str), - ("prompt_len", int), - ("output_len", int), - ("model", str), - ("model_name", Optional[str], field(default=None)), - ("logprobs", Optional[int], field(default=None)), - ("extra_body", Optional[dict], field(default=None)), - ("multi_modal_content", Optional[dict], field(default=None)), - ("ignore_eos", bool, field(default=False)), -] -if not os.environ.get("VLLM_USE_V1", 0): - # From https://github.com/vllm-project/vllm/pull/14159, v1 doesn't yet - # support best_of parameter - REQUEST_FUNC_INPUT_FIELDS.append(("best_of", int, field(default=1))) - -RequestFuncInput = make_dataclass("RequestFuncInput", - REQUEST_FUNC_INPUT_FIELDS) + +@dataclass +class RequestFuncInput: + prompt: str + api_url: str + prompt_len: int + output_len: int + model: str + model_name: Optional[str] = None + best_of: Optional[int] = None if os.environ.get("VLLM_USE_V1", 0) else 1 + logprobs: Optional[int] = None + extra_body: Optional[dict] = None + multi_modal_content: Optional[dict] = None + ignore_eos: bool = False @dataclass diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 16ec0a4817a26..b398c4d859d81 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1084,7 +1084,7 @@ def main(args: argparse.Namespace): parser.add_argument( "--best-of", type=int, - default=1, + default=0 if os.environ.get("VLLM_USE_V1", 0) else 1, help="Generates `best_of` sequences per prompt and " "returns the best one.", ) From c900571040ee4278f9ba7aba7730796f13b3b27c Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 5 Mar 2025 11:25:04 -0800 Subject: [PATCH 5/5] It's working now Signed-off-by: Huy Do --- benchmarks/benchmark_serving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index b398c4d859d81..f68e2fd0b6297 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -1084,7 +1084,7 @@ def main(args: argparse.Namespace): parser.add_argument( "--best-of", type=int, - default=0 if os.environ.get("VLLM_USE_V1", 0) else 1, + default=None if os.environ.get("VLLM_USE_V1", 0) else 1, help="Generates `best_of` sequences per prompt and " "returns the best one.", )