diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 9fe0d0bb0a3..04b9e8032c0 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -56,7 +56,7 @@ class EngineArgs:
     quantization: Optional[str] = None
     enforce_eager: bool = False
     max_context_len_to_capture: Optional[int] = None
-    max_seq_len_to_capture: int = 8192
+    max_seq_len_to_capture: int = 32768
     disable_custom_all_reduce: bool = False
     tokenizer_pool_size: int = 0
     tokenizer_pool_type: str = "ray"
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 6e971ae73f5..bbb5d31f060 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -115,7 +115,7 @@ def __init__(
         swap_space: int = 4,
         enforce_eager: bool = False,
         max_context_len_to_capture: Optional[int] = None,
-        max_seq_len_to_capture: int = 8192,
+        max_seq_len_to_capture: int = 32768,
         disable_custom_all_reduce: bool = False,
         **kwargs,
     ) -> None: