diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 8bd23939dd58..60acf6569cdc 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -146,6 +146,7 @@ def get_config( api_key=os.environ.get('ALLHANDS_API_KEY', None), remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), keep_remote_runtime_alive=False, + remote_runtime_init_timeout=1800, ), # do not mount workspace workspace_base=None, diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py index ff0b7bb5f375..6dd7f00dd2cf 100644 --- a/openhands/core/config/sandbox_config.py +++ b/openhands/core/config/sandbox_config.py @@ -14,7 +14,8 @@ class SandboxConfig: base_container_image: The base container image from which to build the runtime image. runtime_container_image: The runtime container image to use. user_id: The user ID for the sandbox. - timeout: The timeout for the sandbox. + timeout: The timeout for the default sandbox action execution. + remote_runtime_init_timeout: The timeout for the remote runtime to start. enable_auto_lint: Whether to enable auto-lint. use_host_network: Whether to use the host network. initialize_plugins: Whether to initialize plugins. @@ -41,6 +42,7 @@ class SandboxConfig: runtime_container_image: str | None = None user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000 timeout: int = 120 + remote_runtime_init_timeout: int = 180 enable_auto_lint: bool = ( False # once enabled, OpenHands would lint files after editing ) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 597aed7b2fbf..33b64917dfc6 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -89,6 +89,7 @@ def __init__( ) self.runtime_id: str | None = None self.runtime_url: str | None = None + self.runtime_init_timeout = self.config.sandbox.remote_runtime_init_timeout async def connect(self): try: @@ -260,13 +261,17 @@ def _parse_runtime_response(self, response: requests.Response): {'X-Session-API-Key': start_response['session_api_key']} ) - @tenacity.retry( - stop=tenacity.stop_after_delay(180) | stop_if_should_exit(), - reraise=True, - retry=tenacity.retry_if_exception_type(RuntimeNotReadyError), - wait=tenacity.wait_fixed(2), - ) def _wait_until_alive(self): + retry_decorator = tenacity.retry( + stop=tenacity.stop_after_delay(self.runtime_init_timeout) + | stop_if_should_exit(), + reraise=True, + retry=tenacity.retry_if_exception_type(RuntimeNotReadyError), + wait=tenacity.wait_fixed(2), + ) + return retry_decorator(self._wait_until_alive_impl)() + + def _wait_until_alive_impl(self): self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}') runtime_info_response = self._send_request( 'GET',