From f14f75b064933f9f2405b5ed9237e0081e28e284 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 3 Jan 2025 10:36:19 -0500 Subject: [PATCH] feat: runtime improvements for rate-limit and 502/503/404 error (#5975) --- evaluation/utils/shared.py | 7 ++++++- openhands/runtime/impl/remote/remote_runtime.py | 11 +++++++---- openhands/runtime/utils/request.py | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py index 1177d99760aa..514e90519724 100644 --- a/evaluation/utils/shared.py +++ b/evaluation/utils/shared.py @@ -376,7 +376,12 @@ def _process_instance_wrapper( + '\n' ) if isinstance( - e, (AgentRuntimeDisconnectedError, AgentRuntimeUnavailableError) + e, + ( + AgentRuntimeDisconnectedError, + AgentRuntimeUnavailableError, + AgentRuntimeNotFoundError, + ), ): runtime_failure_count += 1 msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}' diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index a4d268c3d3a4..d146e71d198a 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -21,7 +21,6 @@ from openhands.runtime.plugins import PluginRequirement from openhands.runtime.utils.command import get_remote_startup_command from openhands.runtime.utils.request import ( - RequestHTTPError, send_request, ) from openhands.runtime.utils.runtime_build import build_runtime_image @@ -367,10 +366,14 @@ def _send_action_server_request(self, method, url, **kwargs): except requests.Timeout: self.log('error', 'No response received within the timeout period.') raise - except RequestHTTPError as e: - if e.response.status_code in (404, 502): + except requests.HTTPError as e: + if e.response.status_code == 404: + raise AgentRuntimeNotFoundError( + 'Runtime unavailable: System resources may be exhausted due to running commands. This may be fixed by retrying.' + ) from e + elif e.response.status_code == 502: raise AgentRuntimeDisconnectedError( - f'{e.response.status_code} error while connecting to {self.runtime_url}' + 'Runtime disconnected: System resources may be exhausted due to running commands. This may be fixed by retrying.' ) from e elif e.response.status_code == 503: self.log('warning', 'Runtime appears to be paused. Resuming...') diff --git a/openhands/runtime/utils/request.py b/openhands/runtime/utils/request.py index 32566596eed2..a145bd27f4e5 100644 --- a/openhands/runtime/utils/request.py +++ b/openhands/runtime/utils/request.py @@ -2,6 +2,9 @@ from typing import Any import requests +from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential + +from openhands.utils.tenacity_stop import stop_if_should_exit class RequestHTTPError(requests.HTTPError): @@ -18,6 +21,18 @@ def __str__(self) -> str: return s +def is_rate_limit_error(exception): + return ( + isinstance(exception, requests.HTTPError) + and exception.response.status_code == 429 + ) + + +@retry( + retry=retry_if_exception(is_rate_limit_error), + stop=stop_after_attempt(3) | stop_if_should_exit(), + wait=wait_exponential(multiplier=1, min=4, max=60), +) def send_request( session: requests.Session, method: str,