Skip to content

Commit

Permalink
feat(eval): reliability improvement for SWE-Bench eval_infer (#6347)
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyaoww authored Jan 18, 2025
1 parent 4383be1 commit 2b04ee2
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 10 deletions.
5 changes: 4 additions & 1 deletion evaluation/utils/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ def _process_instance_wrapper(
)
# e is likely an EvalException, so we can't directly infer it from type
# but rather check if it's a fatal error
if is_fatal_runtime_error(str(e)):
# But it can also be AgentRuntime**Error (e.g., swe_bench/eval_infer.py)
_error_str = type(e).__name__ + ': ' + str(e)
if is_fatal_runtime_error(_error_str):
runtime_failure_count += 1
msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
msg += '\n' + '-' * 10 + '\n'
Expand Down Expand Up @@ -531,6 +533,7 @@ def is_fatal_runtime_error(error: str | None) -> bool:
return False

FATAL_RUNTIME_ERRORS = [
AgentRuntimeTimeoutError,
AgentRuntimeUnavailableError,
AgentRuntimeDisconnectedError,
AgentRuntimeNotFoundError,
Expand Down
20 changes: 11 additions & 9 deletions openhands/runtime/impl/remote/remote_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ def __init__(
)
self.session.headers.update({'X-API-Key': self.config.sandbox.api_key})

if self.config.workspace_base is not None:
self.log(
'debug',
'Setting workspace_base is not supported in the remote runtime.',
)

self.runtime_builder = RemoteRuntimeBuilder(
self.config.sandbox.remote_runtime_api_url,
self.config.sandbox.api_key,
Expand All @@ -70,12 +76,6 @@ def __init__(
self.available_hosts: dict[str, int] = {}
self._runtime_initialized: bool = False

if self.config.workspace_base is not None:
self.log(
'debug',
'Setting workspace_base is not supported in the remote runtime.',
)

def log(self, level: str, message: str) -> None:
message = f'[runtime session_id={self.sid} runtime_id={self.runtime_id or "unknown"}] {message}'
getattr(logger, level)(message, stacklevel=2)
Expand Down Expand Up @@ -230,7 +230,7 @@ def _start_runtime(self):
f'Runtime started. URL: {self.runtime_url}',
)
except requests.HTTPError as e:
self.log('error', f'Unable to start runtime: {e}')
self.log('error', f'Unable to start runtime: {str(e)}')
raise AgentRuntimeUnavailableError() from e

def _resume_runtime(self):
Expand Down Expand Up @@ -315,10 +315,11 @@ def _wait_until_alive_impl(self):
self.check_if_alive()
except requests.HTTPError as e:
self.log(
'warning', f"Runtime /alive failed, but pod says it's ready: {e}"
'warning',
f"Runtime /alive failed, but pod says it's ready: {str(e)}",
)
raise AgentRuntimeNotReadyError(
f'Runtime /alive failed to respond with 200: {e}'
f'Runtime /alive failed to respond with 200: {str(e)}'
)
return
elif (
Expand Down Expand Up @@ -363,6 +364,7 @@ def close(self):
):
self.log('debug', 'Runtime stopped.')
except Exception as e:
self.log('error', f'Unable to stop runtime: {str(e)}')
raise e
finally:
super().close()
Expand Down

0 comments on commit 2b04ee2

Please sign in to comment.