From 4eea1286d49b3aad55ca502a37e66ce475559a87 Mon Sep 17 00:00:00 2001 From: Faraz Shamim Date: Tue, 12 Nov 2024 15:43:09 +0545 Subject: [PATCH 01/16] Issue #4399 : Replaced all occurences (#4878) Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com> --- frontend/src/mocks/handlers.ws.ts | 2 +- frontend/src/services/chatService.ts | 4 ++-- frontend/src/types/core/actions.ts | 4 ++-- frontend/src/types/core/variances.ts | 2 +- openhands/agenthub/codeact_agent/codeact_agent.py | 4 ++-- .../codeact_swe_agent/codeact_swe_agent.py | 4 ++-- openhands/controller/state/state.py | 2 +- openhands/events/action/message.py | 14 +++++++++++--- openhands/events/serialization/action.py | 4 ++++ openhands/events/serialization/event.py | 2 +- openhands/server/listen.py | 2 +- openhands/server/session/session.py | 2 +- tests/unit/test_action_serialization.py | 4 ++-- tests/unit/test_json.py | 4 ++-- 14 files changed, 33 insertions(+), 21 deletions(-) diff --git a/frontend/src/mocks/handlers.ws.ts b/frontend/src/mocks/handlers.ws.ts index da57339733f2..04ebeb6cea28 100644 --- a/frontend/src/mocks/handlers.ws.ts +++ b/frontend/src/mocks/handlers.ws.ts @@ -29,7 +29,7 @@ const generateAgentResponse = (message: string): AssistantMessageAction => ({ action: "message", args: { content: message, - images_urls: [], + image_urls: [], wait_for_response: false, }, }); diff --git a/frontend/src/services/chatService.ts b/frontend/src/services/chatService.ts index a19240f8121f..a3bd5ca2991a 100644 --- a/frontend/src/services/chatService.ts +++ b/frontend/src/services/chatService.ts @@ -2,12 +2,12 @@ import ActionType from "#/types/ActionType"; export function createChatMessage( message: string, - images_urls: string[], + image_urls: string[], timestamp: string, ) { const event = { action: ActionType.MESSAGE, - args: { content: message, images_urls, timestamp }, + args: { content: message, image_urls, timestamp }, }; return event; } diff --git a/frontend/src/types/core/actions.ts b/frontend/src/types/core/actions.ts index a94c2da3ef5c..066e4e6e2ac9 100644 --- a/frontend/src/types/core/actions.ts +++ b/frontend/src/types/core/actions.ts @@ -4,7 +4,7 @@ export interface UserMessageAction extends OpenHandsActionEvent<"message"> { source: "user"; args: { content: string; - images_urls: string[]; + image_urls: string[]; }; } @@ -23,7 +23,7 @@ export interface AssistantMessageAction source: "agent"; args: { content: string; - images_urls: string[] | null; + image_urls: string[] | null; wait_for_response: boolean; }; } diff --git a/frontend/src/types/core/variances.ts b/frontend/src/types/core/variances.ts index f8bc9c211b59..2a0b5d4e1d9b 100644 --- a/frontend/src/types/core/variances.ts +++ b/frontend/src/types/core/variances.ts @@ -27,7 +27,7 @@ interface LocalUserMessageAction { action: "message"; args: { content: string; - images_urls: string[]; + image_urls: string[]; }; } diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index c0761c8f6a00..a9cc6e2d95ff 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -196,8 +196,8 @@ def get_action_message( elif isinstance(action, MessageAction): role = 'user' if action.source == 'user' else 'assistant' content = [TextContent(text=action.content or '')] - if self.llm.vision_is_active() and action.images_urls: - content.append(ImageContent(image_urls=action.images_urls)) + if self.llm.vision_is_active() and action.image_urls: + content.append(ImageContent(image_urls=action.image_urls)) return [ Message( role=role, diff --git a/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py b/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py index d5b9a658a920..8d403d357e03 100644 --- a/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py +++ b/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py @@ -95,9 +95,9 @@ def get_action_message(self, action: Action) -> Message | None: if ( self.llm.vision_is_active() and isinstance(action, MessageAction) - and action.images_urls + and action.image_urls ): - content.append(ImageContent(image_urls=action.images_urls)) + content.append(ImageContent(image_urls=action.image_urls)) return Message( role='user' if action.source == 'user' else 'assistant', content=content diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index 93b5d6ff4cd7..d52844d418b4 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -149,7 +149,7 @@ def get_current_user_intent(self) -> tuple[str | None, list[str] | None]: for event in reversed(self.history): if isinstance(event, MessageAction) and event.source == 'user': last_user_message = event.content - last_user_message_image_urls = event.images_urls + last_user_message_image_urls = event.image_urls elif isinstance(event, AgentFinishAction): if last_user_message is not None: return last_user_message, None diff --git a/openhands/events/action/message.py b/openhands/events/action/message.py index 0e3bb26a1cc2..86d7c439e936 100644 --- a/openhands/events/action/message.py +++ b/openhands/events/action/message.py @@ -7,7 +7,7 @@ @dataclass class MessageAction(Action): content: str - images_urls: list[str] | None = None + image_urls: list[str] | None = None wait_for_response: bool = False action: str = ActionType.MESSAGE security_risk: ActionSecurityRisk | None = None @@ -16,10 +16,18 @@ class MessageAction(Action): def message(self) -> str: return self.content + @property + def images_urls(self): + # Deprecated alias for backward compatibility + return self.image_urls + + @images_urls.setter + def images_urls(self, value): + self.image_urls = value def __str__(self) -> str: ret = f'**MessageAction** (source={self.source})\n' ret += f'CONTENT: {self.content}' - if self.images_urls: - for url in self.images_urls: + if self.image_urls: + for url in self.image_urls: ret += f'\nIMAGE_URL: {url}' return ret diff --git a/openhands/events/serialization/action.py b/openhands/events/serialization/action.py index 212295cf1799..defac3b5dda6 100644 --- a/openhands/events/serialization/action.py +++ b/openhands/events/serialization/action.py @@ -66,6 +66,10 @@ def action_from_dict(action: dict) -> Action: if is_confirmed is not None: args['confirmation_state'] = is_confirmed + # images_urls has been renamed to image_urls + if 'images_urls' in args: + args['image_urls'] = args.pop('images_urls') + try: decoded_action = action_class(**args) if 'timeout' in action: diff --git a/openhands/events/serialization/event.py b/openhands/events/serialization/event.py index f0430b4cdd36..78f7940626d4 100644 --- a/openhands/events/serialization/event.py +++ b/openhands/events/serialization/event.py @@ -101,7 +101,7 @@ def event_to_memory(event: 'Event', max_message_chars: int) -> dict: d.pop('cause', None) d.pop('timestamp', None) d.pop('message', None) - d.pop('images_urls', None) + d.pop('image_urls', None) # runnable actions have some extra fields used in the BE/FE, which should not be sent to the LLM if 'args' in d: diff --git a/openhands/server/listen.py b/openhands/server/listen.py index 3e6be135dba2..8b03d88149d0 100644 --- a/openhands/server/listen.py +++ b/openhands/server/listen.py @@ -276,7 +276,7 @@ async def websocket_endpoint(websocket: WebSocket): ``` - Send a message: ```json - {"action": "message", "args": {"content": "Hello, how are you?", "images_urls": ["base64_url1", "base64_url2"]}} + {"action": "message", "args": {"content": "Hello, how are you?", "image_urls": ["base64_url1", "base64_url2"]}} ``` - Write contents to a file: ```json diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py index 293806d0e12e..25f707f15f53 100644 --- a/openhands/server/session/session.py +++ b/openhands/server/session/session.py @@ -163,7 +163,7 @@ async def dispatch(self, data: dict): return event = event_from_dict(data.copy()) # This checks if the model supports images - if isinstance(event, MessageAction) and event.images_urls: + if isinstance(event, MessageAction) and event.image_urls: controller = self.agent_session.controller if controller: if controller.agent.llm.config.disable_vision: diff --git a/tests/unit/test_action_serialization.py b/tests/unit/test_action_serialization.py index 87ea93531ccd..20b484638dbc 100644 --- a/tests/unit/test_action_serialization.py +++ b/tests/unit/test_action_serialization.py @@ -65,7 +65,7 @@ def test_event_props_serialization_deserialization(): 'action': 'message', 'args': { 'content': 'This is a test.', - 'images_urls': None, + 'image_urls': None, 'wait_for_response': False, }, } @@ -77,7 +77,7 @@ def test_message_action_serialization_deserialization(): 'action': 'message', 'args': { 'content': 'This is a test.', - 'images_urls': None, + 'image_urls': None, 'wait_for_response': False, }, } diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py index 91d773ba3f30..883efdfe4cfb 100644 --- a/tests/unit/test_json.py +++ b/tests/unit/test_json.py @@ -17,7 +17,7 @@ def test_event_serialization_deserialization(): 'message': 'This is a test.', 'args': { 'content': 'This is a test.', - 'images_urls': None, + 'image_urls': None, 'wait_for_response': False, }, } @@ -38,7 +38,7 @@ def test_array_serialization_deserialization(): 'message': 'This is a test.', 'args': { 'content': 'This is a test.', - 'images_urls': None, + 'image_urls': None, 'wait_for_response': False, }, } From 04aeccfb69a73f47d4e28a32ff46b13365739b30 Mon Sep 17 00:00:00 2001 From: "sp.wack" <83104063+amanape@users.noreply.github.com> Date: Tue, 12 Nov 2024 12:30:43 +0200 Subject: [PATCH 02/16] fix(frontend): Remove quotes from suggestion (#4921) --- frontend/src/utils/suggestions/repo-suggestions.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/utils/suggestions/repo-suggestions.ts b/frontend/src/utils/suggestions/repo-suggestions.ts index ce1680ffcbb5..156931948d85 100644 --- a/frontend/src/utils/suggestions/repo-suggestions.ts +++ b/frontend/src/utils/suggestions/repo-suggestions.ts @@ -13,14 +13,14 @@ const KEY_2 = "Auto-merge Dependabot PRs"; const VALUE_2 = `Please add a GitHub action to this repository which automatically merges pull requests from Dependabot so long as the tests are passing.`; const KEY_3 = "Fix up my README"; -const VALUE_3 = `"Please look at the README and make the following improvements, if they make sense: +const VALUE_3 = `Please look at the README and make the following improvements, if they make sense: * correct any typos that you find * add missing language annotations on codeblocks * if there are references to other files or other sections of the README, turn them into links * make sure the readme has an h1 title towards the top * make sure any existing sections in the readme are appropriately separated with headings -If there are no obvious ways to improve the README, make at least one small change to make the wording clearer or friendlier"`; +If there are no obvious ways to improve the README, make at least one small change to make the wording clearer or friendlier`; const KEY_4 = "Clean up my dependencies"; const VALUE_4 = `Examine the dependencies of the current codebase. Make sure you can run the code and any tests. From de71b7cdb8147741c756cdee249cd5896eeae18e Mon Sep 17 00:00:00 2001 From: "sp.wack" <83104063+amanape@users.noreply.github.com> Date: Tue, 12 Nov 2024 12:50:38 +0200 Subject: [PATCH 03/16] test(frontend): Fix failing e2e test due to mock delay (#4923) --- frontend/src/mocks/handlers.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/frontend/src/mocks/handlers.ts b/frontend/src/mocks/handlers.ts index 16f2f6972d04..5926dd8ac52e 100644 --- a/frontend/src/mocks/handlers.ts +++ b/frontend/src/mocks/handlers.ts @@ -71,8 +71,6 @@ const openHandsHandlers = [ export const handlers = [ ...openHandsHandlers, http.get("https://api.github.com/user/repos", async ({ request }) => { - if (import.meta.env.MODE !== "test") await delay(3500); - const token = request.headers .get("Authorization") ?.replace("Bearer", "") From 32fdcd58e5115974608bc4f4c40b3e16b6a4944f Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 12 Nov 2024 12:24:19 +0100 Subject: [PATCH 04/16] Update litellm (#4927) --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index b32d2c7753db..85e3fe9e55a7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3934,13 +3934,13 @@ types-tqdm = "*" [[package]] name = "litellm" -version = "1.52.3" +version = "1.52.5" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.52.3-py3-none-any.whl", hash = "sha256:fc8d5d53ba184cd570ae50d9acefa53c521225b62244adedea129794e98828b6"}, - {file = "litellm-1.52.3.tar.gz", hash = "sha256:4718235cbd6dea8db99b08e884a07f7ac7fad4a4b12597e20d8ff622295e1e05"}, + {file = "litellm-1.52.5-py3-none-any.whl", hash = "sha256:38c0f30a849b80c99cfc56f96c4c7563d5ced83f08fd7fc2129011ddc4414ac5"}, + {file = "litellm-1.52.5.tar.gz", hash = "sha256:9708c02983c7ed22fc18c96e167bf1c4ed9672de397d413e7957c216dfc911e6"}, ] [package.dependencies] From d9c5f11046d2783b63a9ac112acfd6c75de1f036 Mon Sep 17 00:00:00 2001 From: "Ryan H. Tran" Date: Tue, 12 Nov 2024 20:26:33 +0700 Subject: [PATCH 05/16] Replace file editor with openhands-aci (#4782) --- .../agent_skills/file_editor/__init__.py | 62 +--- .../plugins/agent_skills/file_editor/base.py | 50 ---- .../plugins/agent_skills/file_editor/impl.py | 279 ------------------ .../plugins/agent_skills/file_editor/run.py | 44 --- poetry.lock | 35 ++- pyproject.toml | 1 + tests/unit/test_agent_skill.py | 218 +------------- 7 files changed, 41 insertions(+), 648 deletions(-) delete mode 100644 openhands/runtime/plugins/agent_skills/file_editor/base.py delete mode 100644 openhands/runtime/plugins/agent_skills/file_editor/impl.py delete mode 100644 openhands/runtime/plugins/agent_skills/file_editor/run.py diff --git a/openhands/runtime/plugins/agent_skills/file_editor/__init__.py b/openhands/runtime/plugins/agent_skills/file_editor/__init__.py index f6d3eb39a019..06d5bcca6325 100644 --- a/openhands/runtime/plugins/agent_skills/file_editor/__init__.py +++ b/openhands/runtime/plugins/agent_skills/file_editor/__init__.py @@ -1,60 +1,8 @@ -"""This file contains a global singleton of the `EditTool` class as well as raw functions that expose its __call__.""" - -from .base import CLIResult, ToolError, ToolResult -from .impl import Command, EditTool - -_GLOBAL_EDITOR = EditTool() - - -def _make_api_tool_result( - result: ToolResult, -) -> str: - """Convert an agent ToolResult to an API ToolResultBlockParam.""" - tool_result_content: str = '' - is_error = False - if result.error: - is_error = True - tool_result_content = _maybe_prepend_system_tool_result(result, result.error) - else: - assert result.output, 'Expecting output in file_editor' - tool_result_content = _maybe_prepend_system_tool_result(result, result.output) - assert ( - not result.base64_image - ), 'Not expecting base64_image as output in file_editor' - if is_error: - return f'ERROR:\n{tool_result_content}' - else: - return tool_result_content - - -def _maybe_prepend_system_tool_result(result: ToolResult, result_text: str) -> str: - if result.system: - result_text = f'{result.system}\n{result_text}' - return result_text - - -def file_editor( - command: Command, - path: str, - file_text: str | None = None, - view_range: list[int] | None = None, - old_str: str | None = None, - new_str: str | None = None, - insert_line: int | None = None, -) -> str: - try: - result: CLIResult = _GLOBAL_EDITOR( - command=command, - path=path, - file_text=file_text, - view_range=view_range, - old_str=old_str, - new_str=new_str, - insert_line=insert_line, - ) - except ToolError as e: - return _make_api_tool_result(ToolResult(error=e.message)) - return _make_api_tool_result(result) +"""This file imports a global singleton of the `EditTool` class as well as raw functions that expose +its __call__. +The implementation of the `EditTool` class can be found at: https://github.com/All-Hands-AI/openhands-aci/. +""" +from openhands_aci.editor import file_editor __all__ = ['file_editor'] diff --git a/openhands/runtime/plugins/agent_skills/file_editor/base.py b/openhands/runtime/plugins/agent_skills/file_editor/base.py deleted file mode 100644 index 6ad2a4b5b69c..000000000000 --- a/openhands/runtime/plugins/agent_skills/file_editor/base.py +++ /dev/null @@ -1,50 +0,0 @@ -from dataclasses import dataclass, fields, replace - - -@dataclass(kw_only=True, frozen=True) -class ToolResult: - """Represents the result of a tool execution.""" - - output: str | None = None - error: str | None = None - base64_image: str | None = None - system: str | None = None - - def __bool__(self): - return any(getattr(self, field.name) for field in fields(self)) - - def __add__(self, other: 'ToolResult'): - def combine_fields( - field: str | None, other_field: str | None, concatenate: bool = True - ): - if field and other_field: - if concatenate: - return field + other_field - raise ValueError('Cannot combine tool results') - return field or other_field - - return ToolResult( - output=combine_fields(self.output, other.output), - error=combine_fields(self.error, other.error), - base64_image=combine_fields(self.base64_image, other.base64_image, False), - system=combine_fields(self.system, other.system), - ) - - def replace(self, **kwargs): - """Returns a new ToolResult with the given fields replaced.""" - return replace(self, **kwargs) - - -class CLIResult(ToolResult): - """A ToolResult that can be rendered as a CLI output.""" - - -class ToolFailure(ToolResult): - """A ToolResult that represents a failure.""" - - -class ToolError(Exception): - """Raised when a tool encounters an error.""" - - def __init__(self, message): - self.message = message diff --git a/openhands/runtime/plugins/agent_skills/file_editor/impl.py b/openhands/runtime/plugins/agent_skills/file_editor/impl.py deleted file mode 100644 index 613c550e7a80..000000000000 --- a/openhands/runtime/plugins/agent_skills/file_editor/impl.py +++ /dev/null @@ -1,279 +0,0 @@ -from collections import defaultdict -from pathlib import Path -from typing import Literal, get_args - -from .base import CLIResult, ToolError, ToolResult -from .run import maybe_truncate, run - -Command = Literal[ - 'view', - 'create', - 'str_replace', - 'insert', - 'undo_edit', -] -SNIPPET_LINES: int = 4 - - -class EditTool: - """ - An filesystem editor tool that allows the agent to view, create, and edit files. - The tool parameters are defined by Anthropic and are not editable. - - Original implementation: https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/computer_use_demo/tools/edit.py - """ - - _file_history: dict[Path, list[str]] - - def __init__(self): - self._file_history = defaultdict(list) - super().__init__() - - def __call__( - self, - *, - command: Command, - path: str, - file_text: str | None = None, - view_range: list[int] | None = None, - old_str: str | None = None, - new_str: str | None = None, - insert_line: int | None = None, - **kwargs, - ): - _path = Path(path) - self.validate_path(command, _path) - if command == 'view': - return self.view(_path, view_range) - elif command == 'create': - if file_text is None: - raise ToolError('Parameter `file_text` is required for command: create') - self.write_file(_path, file_text) - self._file_history[_path].append(file_text) - return ToolResult(output=f'File created successfully at: {_path}') - elif command == 'str_replace': - if old_str is None: - raise ToolError( - 'Parameter `old_str` is required for command: str_replace' - ) - return self.str_replace(_path, old_str, new_str) - elif command == 'insert': - if insert_line is None: - raise ToolError( - 'Parameter `insert_line` is required for command: insert' - ) - if new_str is None: - raise ToolError('Parameter `new_str` is required for command: insert') - return self.insert(_path, insert_line, new_str) - elif command == 'undo_edit': - return self.undo_edit(_path) - raise ToolError( - f'Unrecognized command {command}. The allowed commands for the {self.name} tool are: {", ".join(get_args(Command))}' - ) - - def validate_path(self, command: str, path: Path): - """ - Check that the path/command combination is valid. - """ - # Check if its an absolute path - if not path.is_absolute(): - suggested_path = Path('') / path - raise ToolError( - f'The path {path} is not an absolute path, it should start with `/`. Maybe you meant {suggested_path}?' - ) - # Check if path exists - if not path.exists() and command != 'create': - raise ToolError( - f'The path {path} does not exist. Please provide a valid path.' - ) - if path.exists() and command == 'create': - raise ToolError( - f'File already exists at: {path}. Cannot overwrite files using command `create`.' - ) - # Check if the path points to a directory - if path.is_dir(): - if command != 'view': - raise ToolError( - f'The path {path} is a directory and only the `view` command can be used on directories' - ) - - def view(self, path: Path, view_range: list[int] | None = None): - """Implement the view command""" - if path.is_dir(): - if view_range: - raise ToolError( - 'The `view_range` parameter is not allowed when `path` points to a directory.' - ) - - _, stdout, stderr = run(rf"find {path} -maxdepth 2 -not -path '*/\.*'") - if not stderr: - stdout = f"Here's the files and directories up to 2 levels deep in {path}, excluding hidden items:\n{stdout}\n" - return CLIResult(output=stdout, error=stderr) - - file_content = self.read_file(path) - init_line = 1 - if view_range: - if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range): - raise ToolError( - 'Invalid `view_range`. It should be a list of two integers.' - ) - file_lines = file_content.split('\n') - n_lines_file = len(file_lines) - init_line, final_line = view_range - if init_line < 1 or init_line > n_lines_file: - raise ToolError( - f"Invalid `view_range`: {view_range}. It's first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}" - ) - if final_line > n_lines_file: - raise ToolError( - f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`" - ) - if final_line != -1 and final_line < init_line: - raise ToolError( - f"Invalid `view_range`: {view_range}. It's second element `{final_line}` should be larger or equal than its first `{init_line}`" - ) - - if final_line == -1: - file_content = '\n'.join(file_lines[init_line - 1 :]) - else: - file_content = '\n'.join(file_lines[init_line - 1 : final_line]) - - return CLIResult( - output=self._make_output(file_content, str(path), init_line=init_line) - ) - - def str_replace(self, path: Path, old_str: str, new_str: str | None): - """Implement the str_replace command, which replaces old_str with new_str in the file content""" - # Read the file content - file_content = self.read_file(path).expandtabs() - old_str = old_str.expandtabs() - new_str = new_str.expandtabs() if new_str is not None else '' - - # Check if old_str is unique in the file - occurrences = file_content.count(old_str) - if occurrences == 0: - raise ToolError( - f'No replacement was performed, old_str `{old_str}` did not appear verbatim in {path}.' - ) - elif occurrences > 1: - file_content_lines = file_content.split('\n') - lines = [ - idx + 1 - for idx, line in enumerate(file_content_lines) - if old_str in line - ] - raise ToolError( - f'No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique' - ) - - # Replace old_str with new_str - new_file_content = file_content.replace(old_str, new_str) - - # Write the new content to the file - self.write_file(path, new_file_content) - - # Save the content to history - self._file_history[path].append(file_content) - - # Create a snippet of the edited section - replacement_line = file_content.split(old_str)[0].count('\n') - start_line = max(0, replacement_line - SNIPPET_LINES) - end_line = replacement_line + SNIPPET_LINES + new_str.count('\n') - snippet = '\n'.join(new_file_content.split('\n')[start_line : end_line + 1]) - - # Prepare the success message - success_msg = f'The file {path} has been edited. ' - success_msg += self._make_output( - snippet, f'a snippet of {path}', start_line + 1 - ) - success_msg += 'Review the changes and make sure they are as expected. Edit the file again if necessary.' - - return CLIResult(output=success_msg) - - def insert(self, path: Path, insert_line: int, new_str: str): - """Implement the insert command, which inserts new_str at the specified line in the file content.""" - file_text = self.read_file(path).expandtabs() - new_str = new_str.expandtabs() - file_text_lines = file_text.split('\n') - n_lines_file = len(file_text_lines) - - if insert_line < 0 or insert_line > n_lines_file: - raise ToolError( - f'Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the file: {[0, n_lines_file]}' - ) - - new_str_lines = new_str.split('\n') - new_file_text_lines = ( - file_text_lines[:insert_line] - + new_str_lines - + file_text_lines[insert_line:] - ) - snippet_lines = ( - file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line] - + new_str_lines - + file_text_lines[insert_line : insert_line + SNIPPET_LINES] - ) - - new_file_text = '\n'.join(new_file_text_lines) - snippet = '\n'.join(snippet_lines) - - self.write_file(path, new_file_text) - self._file_history[path].append(file_text) - - success_msg = f'The file {path} has been edited. ' - success_msg += self._make_output( - snippet, - 'a snippet of the edited file', - max(1, insert_line - SNIPPET_LINES + 1), - ) - success_msg += 'Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary.' - return CLIResult(output=success_msg) - - def undo_edit(self, path: Path): - """Implement the undo_edit command.""" - if not self._file_history[path]: - raise ToolError(f'No edit history found for {path}.') - - old_text = self._file_history[path].pop() - self.write_file(path, old_text) - - return CLIResult( - output=f'Last edit to {path} undone successfully. {self._make_output(old_text, str(path))}' - ) - - def read_file(self, path: Path): - """Read the content of a file from a given path; raise a ToolError if an error occurs.""" - try: - return path.read_text() - except Exception as e: - raise ToolError(f'Ran into {e} while trying to read {path}') from None - - def write_file(self, path: Path, file: str): - """Write the content of a file to a given path; raise a ToolError if an error occurs.""" - try: - path.write_text(file) - except Exception as e: - raise ToolError(f'Ran into {e} while trying to write to {path}') from None - - def _make_output( - self, - file_content: str, - file_descriptor: str, - init_line: int = 1, - expand_tabs: bool = True, - ): - """Generate output for the CLI based on the content of a file.""" - file_content = maybe_truncate(file_content) - if expand_tabs: - file_content = file_content.expandtabs() - file_content = '\n'.join( - [ - f'{i + init_line:6}\t{line}' - for i, line in enumerate(file_content.split('\n')) - ] - ) - return ( - f"Here's the result of running `cat -n` on {file_descriptor}:\n" - + file_content - + '\n' - ) diff --git a/openhands/runtime/plugins/agent_skills/file_editor/run.py b/openhands/runtime/plugins/agent_skills/file_editor/run.py deleted file mode 100644 index 29c604256f80..000000000000 --- a/openhands/runtime/plugins/agent_skills/file_editor/run.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Utility to run shell commands asynchronously with a timeout.""" - -import subprocess -import time - -TRUNCATED_MESSAGE: str = 'To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.' -MAX_RESPONSE_LEN: int = 16000 - - -def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN): - """Truncate content and append a notice if content exceeds the specified length.""" - return ( - content - if not truncate_after or len(content) <= truncate_after - else content[:truncate_after] + TRUNCATED_MESSAGE - ) - - -def run( - cmd: str, - timeout: float | None = 120.0, # seconds - truncate_after: int | None = MAX_RESPONSE_LEN, -): - """Run a shell command synchronously with a timeout.""" - start_time = time.time() - - try: - process = subprocess.Popen( - cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) - - stdout, stderr = process.communicate(timeout=timeout) - - return ( - process.returncode or 0, - maybe_truncate(stdout, truncate_after=truncate_after), - maybe_truncate(stderr, truncate_after=truncate_after), - ) - except subprocess.TimeoutExpired: - process.kill() - elapsed_time = time.time() - start_time - raise TimeoutError( - f"Command '{cmd}' timed out after {elapsed_time:.2f} seconds" - ) diff --git a/poetry.lock b/poetry.lock index 85e3fe9e55a7..7b45e1b62efc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1562,6 +1562,17 @@ files = [ {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, ] +[[package]] +name = "diskcache" +version = "5.6.3" +description = "Disk Cache -- Disk and file backed persistent cache." +optional = false +python-versions = ">=3" +files = [ + {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"}, + {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"}, +] + [[package]] name = "distlib" version = "0.3.9" @@ -5629,6 +5640,28 @@ files = [ [package.dependencies] numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""} +[[package]] +name = "openhands-aci" +version = "0.1.0" +description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands." +optional = false +python-versions = "<4.0,>=3.12" +files = [ + {file = "openhands_aci-0.1.0-py3-none-any.whl", hash = "sha256:f28e5a32e394d1e643f79bf8af27fe44d039cb71729d590f9f3ee0c23c075f00"}, + {file = "openhands_aci-0.1.0.tar.gz", hash = "sha256:babc55f516efbb27eb7e528662e14b75c902965c48a110408fda824b83ea4461"}, +] + +[package.dependencies] +diskcache = ">=5.6.3,<6.0.0" +gitpython = "*" +grep-ast = "0.3.3" +litellm = "*" +networkx = "*" +numpy = "*" +pandas = "*" +scipy = "*" +tree-sitter = "0.21.3" + [[package]] name = "opentelemetry-api" version = "1.25.0" @@ -10178,4 +10211,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "245fd4cd56a3c95b2dd4f3a06251f7de82ad0300de7349f0710aac1f92a151b7" +content-hash = "a552f630dfdb9221eda6932e71e67a935c52ebfe4388ec9ef4b3245e7df2f82b" diff --git a/pyproject.toml b/pyproject.toml index 6430f70d720d..a121ec388e23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ opentelemetry-exporter-otlp-proto-grpc = "1.25.0" modal = "^0.64.145" runloop-api-client = "0.7.0" pygithub = "^2.5.0" +openhands-aci = "^0.1.0" [tool.poetry.group.llama-index.dependencies] llama-index = "*" diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py index f619bc00bff0..6079eb659aea 100644 --- a/tests/unit/test_agent_skill.py +++ b/tests/unit/test_agent_skill.py @@ -5,7 +5,6 @@ import docx import pytest -from openhands.runtime.plugins.agent_skills.agentskills import file_editor from openhands.runtime.plugins.agent_skills.file_ops.file_ops import ( WINDOW, _print_window, @@ -781,7 +780,7 @@ def test_file_editor_create(tmp_path): assert result is not None assert ( result - == f'ERROR:\nThe path {random_file} does not exist. Please provide a valid path.' + == f'ERROR:\nInvalid `path` parameter: {random_file}. The path {random_file} does not exist. Please provide a valid path.' ) # create a file @@ -800,218 +799,3 @@ def test_file_editor_create(tmp_path): 1\tLine 6 """.strip().split('\n') ) - - -@pytest.fixture -def setup_file(tmp_path): - random_dir = tmp_path / 'dir_1' - random_dir.mkdir() - random_file = random_dir / 'a.txt' - return random_file - - -def test_file_editor_create_and_view(setup_file): - random_file = setup_file - - # Test create command - result = file_editor( - command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3' - ) - print(result) - assert result == f'File created successfully at: {random_file}' - - # Test view command for file - result = file_editor(command='view', path=str(random_file)) - print(result) - assert ( - result.strip().split('\n') - == f"""Here's the result of running `cat -n` on {random_file}: - 1\tLine 1 - 2\tLine 2 - 3\tLine 3 -""".strip().split('\n') - ) - - # Test view command for directory - result = file_editor(command='view', path=str(random_file.parent)) - assert f'{random_file.parent}' in result - assert f'{random_file.name}' in result - - -def test_file_editor_view_nonexistent(setup_file): - random_file = setup_file - - # Test view command for non-existent file - result = file_editor(command='view', path=str(random_file)) - assert ( - result - == f'ERROR:\nThe path {random_file} does not exist. Please provide a valid path.' - ) - - -def test_file_editor_str_replace(setup_file): - random_file = setup_file - file_editor( - command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3' - ) - - # Test str_replace command - result = file_editor( - command='str_replace', - path=str(random_file), - old_str='Line 2', - new_str='New Line 2', - ) - print(result) - assert ( - result - == f"""The file {random_file} has been edited. Here's the result of running `cat -n` on a snippet of {random_file}: - 1\tLine 1 - 2\tNew Line 2 - 3\tLine 3 -Review the changes and make sure they are as expected. Edit the file again if necessary.""" - ) - - # View the file after str_replace - result = file_editor(command='view', path=str(random_file)) - print(result) - assert ( - result.strip().split('\n') - == f"""Here's the result of running `cat -n` on {random_file}: - 1\tLine 1 - 2\tNew Line 2 - 3\tLine 3 -""".strip().split('\n') - ) - - -def test_file_editor_str_replace_non_existent(setup_file): - random_file = setup_file - file_editor( - command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3' - ) - - # Test str_replace with non-existent string - result = file_editor( - command='str_replace', - path=str(random_file), - old_str='Non-existent Line', - new_str='New Line', - ) - print(result) - assert ( - result - == f'ERROR:\nNo replacement was performed, old_str `Non-existent Line` did not appear verbatim in {random_file}.' - ) - - -def test_file_editor_insert(setup_file): - random_file = setup_file - file_editor( - command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3' - ) - - # Test insert command - result = file_editor( - command='insert', path=str(random_file), insert_line=2, new_str='Inserted Line' - ) - print(result) - assert ( - result - == f"""The file {random_file} has been edited. Here's the result of running `cat -n` on a snippet of the edited file: - 1\tLine 1 - 2\tLine 2 - 3\tInserted Line - 4\tLine 3 -Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary.""" - ) - - # View the file after insert - result = file_editor(command='view', path=str(random_file)) - assert ( - result.strip().split('\n') - == f"""Here's the result of running `cat -n` on {random_file}: - 1\tLine 1 - 2\tLine 2 - 3\tInserted Line - 4\tLine 3 -""".strip().split('\n') - ) - - -def test_file_editor_insert_invalid_line(setup_file): - random_file = setup_file - file_editor( - command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3' - ) - - # Test insert with invalid line number - result = file_editor( - command='insert', - path=str(random_file), - insert_line=10, - new_str='Invalid Insert', - ) - assert ( - result - == 'ERROR:\nInvalid `insert_line` parameter: 10. It should be within the range of lines of the file: [0, 3]' - ) - - -def test_file_editor_undo_edit(setup_file): - random_file = setup_file - result = file_editor( - command='create', path=str(random_file), file_text='Line 1\nLine 2\nLine 3' - ) - print(result) - assert result == f"""File created successfully at: {random_file}""" - - # Make an edit - result = file_editor( - command='str_replace', - path=str(random_file), - old_str='Line 2', - new_str='New Line 2', - ) - print(result) - assert ( - result - == f"""The file {random_file} has been edited. Here's the result of running `cat -n` on a snippet of {random_file}: - 1\tLine 1 - 2\tNew Line 2 - 3\tLine 3 -Review the changes and make sure they are as expected. Edit the file again if necessary.""" - ) - - # Test undo_edit command - result = file_editor(command='undo_edit', path=str(random_file)) - print(result) - assert ( - result - == f"""Last edit to {random_file} undone successfully. Here's the result of running `cat -n` on {random_file}: - 1\tLine 1 - 2\tLine 2 - 3\tLine 3 -""" - ) - - # View the file after undo_edit - result = file_editor(command='view', path=str(random_file)) - assert ( - result.strip().split('\n') - == f"""Here's the result of running `cat -n` on {random_file}: - 1\tLine 1 - 2\tLine 2 - 3\tLine 3 -""".strip().split('\n') - ) - - -def test_file_editor_undo_edit_no_edits(tmp_path): - random_file = tmp_path / 'a.txt' - random_file.touch() - - # Test undo_edit when no edits have been made - result = file_editor(command='undo_edit', path=str(random_file)) - print(result) - assert result == f'ERROR:\nNo edit history found for {random_file}.' From 0633a9929876c4fc9964b8566edb3ae6076f24d8 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 12 Nov 2024 09:03:02 -0500 Subject: [PATCH 06/16] Fix resume runtime after a pause (#4904) --- .../runtime/impl/remote/remote_runtime.py | 42 +++++++++++++++---- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 6552da7c22d5..f0a020358a76 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -138,6 +138,7 @@ def _check_existing_runtime(self) -> bool: response = self._send_request( 'GET', f'{self.config.sandbox.remote_runtime_api_url}/sessions/{self.sid}', + is_retry=False, timeout=5, ) except requests.HTTPError as e: @@ -168,6 +169,7 @@ def _build_runtime(self): response = self._send_request( 'GET', f'{self.config.sandbox.remote_runtime_api_url}/registry_prefix', + is_retry=False, timeout=10, ) response_json = response.json() @@ -198,6 +200,7 @@ def _build_runtime(self): response = self._send_request( 'GET', f'{self.config.sandbox.remote_runtime_api_url}/image_exists', + is_retry=False, params={'image': self.container_image}, timeout=10, ) @@ -234,6 +237,7 @@ def _start_runtime(self): response = self._send_request( 'POST', f'{self.config.sandbox.remote_runtime_api_url}/start', + is_retry=False, json=start_request, ) self._parse_runtime_response(response) @@ -246,6 +250,7 @@ def _resume_runtime(self): self._send_request( 'POST', f'{self.config.sandbox.remote_runtime_api_url}/resume', + is_retry=False, json={'runtime_id': self.runtime_id}, timeout=30, ) @@ -283,14 +288,12 @@ def _wait_until_alive_impl(self): assert runtime_data['runtime_id'] == self.runtime_id assert 'pod_status' in runtime_data pod_status = runtime_data['pod_status'] + self.log('debug', runtime_data) + self.log('debug', f'Pod status: {pod_status}') # FIXME: We should fix it at the backend of /start endpoint, make sure # the pod is created before returning the response. # Retry a period of time to give the cluster time to start the pod - if pod_status == 'Not Found': - raise RuntimeNotReadyError( - f'Runtime (ID={self.runtime_id}) is not yet ready. Status: {pod_status}' - ) if pod_status == 'Ready': try: self._send_request( @@ -305,12 +308,23 @@ def _wait_until_alive_impl(self): f'Runtime /alive failed to respond with 200: {e}' ) return - if pod_status in ('Failed', 'Unknown'): + elif ( + pod_status == 'Not Found' + or pod_status == 'Pending' + or pod_status == 'Running' + ): # nb: Running is not yet Ready + raise RuntimeNotReadyError( + f'Runtime (ID={self.runtime_id}) is not yet ready. Status: {pod_status}' + ) + elif pod_status in ('Failed', 'Unknown'): # clean up the runtime self.close() raise RuntimeError( f'Runtime (ID={self.runtime_id}) failed to start. Current status: {pod_status}' ) + else: + # Maybe this should be a hard failure, but passing through in case the API changes + self.log('warning', f'Unknown pod status: {pod_status}') self.log( 'debug', @@ -327,6 +341,7 @@ def close(self, timeout: int = 10): response = self._send_request( 'POST', f'{self.config.sandbox.remote_runtime_api_url}/stop', + is_retry=False, json={'runtime_id': self.runtime_id}, timeout=timeout, ) @@ -342,7 +357,7 @@ def close(self, timeout: int = 10): finally: self.session.close() - def run_action(self, action: Action) -> Observation: + def run_action(self, action: Action, is_retry: bool = False) -> Observation: if action.timeout is None: action.timeout = self.config.sandbox.timeout if isinstance(action, FileEditAction): @@ -367,6 +382,7 @@ def run_action(self, action: Action) -> Observation: response = self._send_request( 'POST', f'{self.runtime_url}/execute_action', + is_retry=False, json=request_body, # wait a few more seconds to get the timeout error from client side timeout=action.timeout + 5, @@ -380,7 +396,7 @@ def run_action(self, action: Action) -> Observation: ) return obs - def _send_request(self, method, url, **kwargs): + def _send_request(self, method, url, is_retry=False, **kwargs): is_runtime_request = self.runtime_url and self.runtime_url in url try: return send_request(self.session, method, url, **kwargs) @@ -392,6 +408,15 @@ def _send_request(self, method, url, **kwargs): raise RuntimeDisconnectedError( f'404 error while connecting to {self.runtime_url}' ) + elif is_runtime_request and e.response.status_code == 503: + if not is_retry: + self.log('warning', 'Runtime appears to be paused. Resuming...') + self._resume_runtime() + self._wait_until_alive() + return self._send_request(method, url, True, **kwargs) + else: + raise e + else: raise e @@ -444,6 +469,7 @@ def copy_to( response = self._send_request( 'POST', f'{self.runtime_url}/upload_file', + is_retry=False, files=upload_data, params=params, timeout=300, @@ -467,6 +493,7 @@ def list_files(self, path: str | None = None) -> list[str]: response = self._send_request( 'POST', f'{self.runtime_url}/list_files', + is_retry=False, json=data, timeout=30, ) @@ -480,6 +507,7 @@ def copy_from(self, path: str) -> Path: response = self._send_request( 'GET', f'{self.runtime_url}/download_files', + is_retry=False, params=params, stream=True, timeout=30, From b54724ac3fed21a6a4022b64718c423a29e3aca5 Mon Sep 17 00:00:00 2001 From: OpenHands Date: Tue, 12 Nov 2024 10:42:13 -0500 Subject: [PATCH 07/16] Fix issue #4931: Make use of microagents configurable in `codeact_agent` (#4932) Co-authored-by: Graham Neubig --- .../agenthub/codeact_agent/codeact_agent.py | 6 +- openhands/core/config/agent_config.py | 4 ++ openhands/utils/prompt.py | 13 +++- tests/unit/test_prompt_manager.py | 60 +++++++++++++++++++ 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index a9cc6e2d95ff..629c6edfb18b 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -103,15 +103,17 @@ def __init__( f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}' ) self.prompt_manager = PromptManager( - microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'), + microagent_dir=os.path.join(os.path.dirname(__file__), 'micro') if self.config.use_microagents else None, prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'tools'), + disabled_microagents=self.config.disabled_microagents, ) else: self.action_parser = CodeActResponseParser() self.prompt_manager = PromptManager( - microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'), + microagent_dir=os.path.join(os.path.dirname(__file__), 'micro') if self.config.use_microagents else None, prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'default'), agent_skills_docs=AgentSkillsRequirement.documentation, + disabled_microagents=self.config.disabled_microagents, ) self.pending_actions: deque[Action] = deque() diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py index bfaf978e5703..c9704b32a610 100644 --- a/openhands/core/config/agent_config.py +++ b/openhands/core/config/agent_config.py @@ -16,6 +16,8 @@ class AgentConfig: memory_enabled: Whether long-term memory (embeddings) is enabled. memory_max_threads: The maximum number of threads indexing at the same time for embeddings. llm_config: The name of the llm config to use. If specified, this will override global llm config. + use_microagents: Whether to use microagents at all. Default is True. + disabled_microagents: A list of microagents to disable. Default is None. """ function_calling: bool = True @@ -26,6 +28,8 @@ class AgentConfig: memory_enabled: bool = False memory_max_threads: int = 3 llm_config: str | None = None + use_microagents: bool = True + disabled_microagents: list[str] | None = None def defaults_to_dict(self) -> dict: """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.""" diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py index 6fb3ef8f63ee..5d0d92968d35 100644 --- a/openhands/utils/prompt.py +++ b/openhands/utils/prompt.py @@ -19,13 +19,16 @@ class PromptManager: Attributes: prompt_dir (str): Directory containing prompt templates. agent_skills_docs (str): Documentation of agent skills. + microagent_dir (str): Directory containing microagent specifications. + disabled_microagents (list[str] | None): List of microagents to disable. If None, all microagents are enabled. """ def __init__( self, prompt_dir: str, - microagent_dir: str = '', + microagent_dir: str | None = None, agent_skills_docs: str = '', + disabled_microagents: list[str] | None = None, ): self.prompt_dir: str = prompt_dir self.agent_skills_docs: str = agent_skills_docs @@ -43,9 +46,15 @@ def __init__( ] for microagent_file in microagent_files: microagent = MicroAgent(microagent_file) - self.microagents[microagent.name] = microagent + if ( + disabled_microagents is None + or microagent.name not in disabled_microagents + ): + self.microagents[microagent.name] = microagent def _load_template(self, template_name: str) -> Template: + if self.prompt_dir is None: + raise ValueError('Prompt directory is not set') template_path = os.path.join(self.prompt_dir, f'{template_name}.j2') if not os.path.exists(template_path): raise FileNotFoundError(f'Prompt file {template_path} not found') diff --git a/tests/unit/test_prompt_manager.py b/tests/unit/test_prompt_manager.py index 1cabb6ca6a1a..cb555e6506ae 100644 --- a/tests/unit/test_prompt_manager.py +++ b/tests/unit/test_prompt_manager.py @@ -119,3 +119,63 @@ def test_prompt_manager_template_rendering(prompt_dir, agent_skills_docs): # Clean up temporary files os.remove(os.path.join(prompt_dir, 'system_prompt.j2')) os.remove(os.path.join(prompt_dir, 'user_prompt.j2')) + + +def test_prompt_manager_disabled_microagents(prompt_dir, agent_skills_docs): + # Create test microagent files + microagent1_name = 'test_microagent1' + microagent2_name = 'test_microagent2' + microagent1_content = """ +--- +name: Test Microagent 1 +agent: CodeActAgent +triggers: +- test1 +--- + +Test microagent 1 content +""" + microagent2_content = """ +--- +name: Test Microagent 2 +agent: CodeActAgent +triggers: +- test2 +--- + +Test microagent 2 content +""" + + # Create temporary micro agent files + os.makedirs(os.path.join(prompt_dir, 'micro'), exist_ok=True) + with open(os.path.join(prompt_dir, 'micro', f'{microagent1_name}.md'), 'w') as f: + f.write(microagent1_content) + with open(os.path.join(prompt_dir, 'micro', f'{microagent2_name}.md'), 'w') as f: + f.write(microagent2_content) + + # Test that specific microagents can be disabled + manager = PromptManager( + prompt_dir=prompt_dir, + microagent_dir=os.path.join(prompt_dir, 'micro'), + agent_skills_docs=agent_skills_docs, + disabled_microagents=['Test Microagent 1'], + ) + + assert len(manager.microagents) == 1 + assert 'Test Microagent 2' in manager.microagents + assert 'Test Microagent 1' not in manager.microagents + + # Test that all microagents are enabled by default + manager = PromptManager( + prompt_dir=prompt_dir, + microagent_dir=os.path.join(prompt_dir, 'micro'), + agent_skills_docs=agent_skills_docs, + ) + + assert len(manager.microagents) == 2 + assert 'Test Microagent 1' in manager.microagents + assert 'Test Microagent 2' in manager.microagents + + # Clean up temporary files + os.remove(os.path.join(prompt_dir, 'micro', f'{microagent1_name}.md')) + os.remove(os.path.join(prompt_dir, 'micro', f'{microagent2_name}.md')) From 910b283ac2f6b3896e174cb77377c5ab6900da22 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 12 Nov 2024 09:53:22 -0600 Subject: [PATCH 08/16] fix(llm): bedrock throw errors if content contains empty string (#4935) --- openhands/core/message.py | 7 +++++++ openhands/llm/debug_mixin.py | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/openhands/core/message.py b/openhands/core/message.py index e538bec44bbe..06efb7d57bbf 100644 --- a/openhands/core/message.py +++ b/openhands/core/message.py @@ -98,6 +98,13 @@ def _list_serializer(self): content.extend(d) ret: dict = {'content': content, 'role': self.role} + # pop content if it's empty + if not content or ( + len(content) == 1 + and content[0]['type'] == 'text' + and content[0]['text'] == '' + ): + ret.pop('content') if role_tool_with_prompt_caching: ret['cache_control'] = {'type': 'ephemeral'} diff --git a/openhands/llm/debug_mixin.py b/openhands/llm/debug_mixin.py index 1eecc301df7d..f386613d6fba 100644 --- a/openhands/llm/debug_mixin.py +++ b/openhands/llm/debug_mixin.py @@ -14,7 +14,9 @@ def log_prompt(self, messages: list[dict[str, Any]] | dict[str, Any]): messages = messages if isinstance(messages, list) else [messages] debug_message = MESSAGE_SEPARATOR.join( - self._format_message_content(msg) for msg in messages if msg['content'] + self._format_message_content(msg) + for msg in messages + if msg.get('content', None) ) if debug_message: From 17f4c6e1a941cb6ea1374daa7f39398212d2eae9 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 12 Nov 2024 11:20:36 -0500 Subject: [PATCH 09/16] Refactor sessions a bit, and fix issue where runtimes get killed (#4900) --- .github/workflows/ghcr-build.yml | 2 - docs/modules/usage/runtimes.md | 2 +- evaluation/miniwob/run_infer.py | 2 +- evaluation/scienceagentbench/run_infer.py | 2 +- evaluation/swe_bench/run_infer.py | 2 +- openhands/core/config/sandbox_config.py | 2 +- .../runtime/impl/eventstream/containers.py | 18 +++++ .../impl/eventstream/eventstream_runtime.py | 78 ++++++++----------- .../runtime/impl/remote/remote_runtime.py | 3 +- .../runtime/impl/runloop/runloop_runtime.py | 6 +- openhands/server/listen.py | 10 +-- openhands/server/session/manager.py | 72 ++--------------- tests/runtime/conftest.py | 1 + tests/runtime/test_stress_remote_runtime.py | 2 +- 14 files changed, 71 insertions(+), 131 deletions(-) create mode 100644 openhands/runtime/impl/eventstream/containers.py diff --git a/.github/workflows/ghcr-build.yml b/.github/workflows/ghcr-build.yml index a7398961da3c..5c2e6d111d76 100644 --- a/.github/workflows/ghcr-build.yml +++ b/.github/workflows/ghcr-build.yml @@ -286,7 +286,6 @@ jobs: image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }} image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]') - SKIP_CONTAINER_LOGS=true \ TEST_RUNTIME=eventstream \ SANDBOX_USER_ID=$(id -u) \ SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \ @@ -364,7 +363,6 @@ jobs: image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }} image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]') - SKIP_CONTAINER_LOGS=true \ TEST_RUNTIME=eventstream \ SANDBOX_USER_ID=$(id -u) \ SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \ diff --git a/docs/modules/usage/runtimes.md b/docs/modules/usage/runtimes.md index 3c227ffaf74d..0ea88442c7d9 100644 --- a/docs/modules/usage/runtimes.md +++ b/docs/modules/usage/runtimes.md @@ -59,7 +59,7 @@ docker run # ... -e RUNTIME=remote \ -e SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.app.all-hands.dev" \ -e SANDBOX_API_KEY="your-all-hands-api-key" \ - -e SANDBOX_KEEP_REMOTE_RUNTIME_ALIVE="true" \ + -e SANDBOX_KEEP_RUNTIME_ALIVE="true" \ # ... ``` diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py index 715bdaa470ae..b1b0f48e66f1 100644 --- a/evaluation/miniwob/run_infer.py +++ b/evaluation/miniwob/run_infer.py @@ -66,7 +66,7 @@ def get_config( browsergym_eval_env=env_id, api_key=os.environ.get('ALLHANDS_API_KEY', None), remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), - keep_remote_runtime_alive=False, + keep_runtime_alive=False, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/scienceagentbench/run_infer.py b/evaluation/scienceagentbench/run_infer.py index 93a82855452e..efa6c9e42c57 100644 --- a/evaluation/scienceagentbench/run_infer.py +++ b/evaluation/scienceagentbench/run_infer.py @@ -72,7 +72,7 @@ def get_config( timeout=300, api_key=os.environ.get('ALLHANDS_API_KEY', None), remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), - keep_remote_runtime_alive=False, + keep_runtime_alive=False, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 60acf6569cdc..9f0603eaf2fd 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -145,7 +145,7 @@ def get_config( platform='linux/amd64', api_key=os.environ.get('ALLHANDS_API_KEY', None), remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), - keep_remote_runtime_alive=False, + keep_runtime_alive=False, remote_runtime_init_timeout=1800, ), # do not mount workspace diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py index 6dd7f00dd2cf..57f4b189b182 100644 --- a/openhands/core/config/sandbox_config.py +++ b/openhands/core/config/sandbox_config.py @@ -36,7 +36,7 @@ class SandboxConfig: remote_runtime_api_url: str = 'http://localhost:8000' local_runtime_url: str = 'http://localhost' - keep_remote_runtime_alive: bool = True + keep_runtime_alive: bool = True api_key: str | None = None base_container_image: str = 'nikolaik/python-nodejs:python3.12-nodejs22' # default to nikolaik/python-nodejs:python3.12-nodejs22 for eventstream runtime runtime_container_image: str | None = None diff --git a/openhands/runtime/impl/eventstream/containers.py b/openhands/runtime/impl/eventstream/containers.py new file mode 100644 index 000000000000..e162b029c265 --- /dev/null +++ b/openhands/runtime/impl/eventstream/containers.py @@ -0,0 +1,18 @@ +import docker + + +def remove_all_containers(prefix: str): + docker_client = docker.from_env() + + try: + containers = docker_client.containers.list(all=True) + for container in containers: + try: + if container.name.startswith(prefix): + container.remove(force=True) + except docker.errors.APIError: + pass + except docker.errors.NotFound: + pass + except docker.errors.NotFound: # yes, this can happen! + pass diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py index be05c767f544..dbf6599ea66a 100644 --- a/openhands/runtime/impl/eventstream/eventstream_runtime.py +++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py @@ -1,8 +1,9 @@ +import atexit import os -from pathlib import Path import tempfile import threading from functools import lru_cache +from pathlib import Path from typing import Callable from zipfile import ZipFile @@ -35,6 +36,7 @@ from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS from openhands.runtime.base import Runtime from openhands.runtime.builder import DockerRuntimeBuilder +from openhands.runtime.impl.eventstream.containers import remove_all_containers from openhands.runtime.plugins import PluginRequirement from openhands.runtime.utils import find_available_tcp_port from openhands.runtime.utils.request import send_request @@ -42,6 +44,15 @@ from openhands.utils.async_utils import call_sync_from_async from openhands.utils.tenacity_stop import stop_if_should_exit +CONTAINER_NAME_PREFIX = 'openhands-runtime-' + + +def remove_all_runtime_containers(): + remove_all_containers(CONTAINER_NAME_PREFIX) + + +atexit.register(remove_all_runtime_containers) + class LogBuffer: """Synchronous buffer for Docker container logs. @@ -114,8 +125,6 @@ class EventStreamRuntime(Runtime): env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None. """ - container_name_prefix = 'openhands-runtime-' - # Need to provide this method to allow inheritors to init the Runtime # without initting the EventStreamRuntime. def init_base_runtime( @@ -158,7 +167,7 @@ def __init__( self.docker_client: docker.DockerClient = self._init_docker_client() self.base_container_image = self.config.sandbox.base_container_image self.runtime_container_image = self.config.sandbox.runtime_container_image - self.container_name = self.container_name_prefix + sid + self.container_name = CONTAINER_NAME_PREFIX + sid self.container = None self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time @@ -173,10 +182,6 @@ def __init__( f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}', ) - self.skip_container_logs = ( - os.environ.get('SKIP_CONTAINER_LOGS', 'false').lower() == 'true' - ) - self.init_base_runtime( config, event_stream, @@ -189,7 +194,15 @@ def __init__( async def connect(self): self.send_status_message('STATUS$STARTING_RUNTIME') - if not self.attach_to_existing: + try: + await call_sync_from_async(self._attach_to_container) + except docker.errors.NotFound as e: + if self.attach_to_existing: + self.log( + 'error', + f'Container {self.container_name} not found.', + ) + raise e if self.runtime_container_image is None: if self.base_container_image is None: raise ValueError( @@ -210,13 +223,12 @@ async def connect(self): await call_sync_from_async(self._init_container) self.log('info', f'Container started: {self.container_name}') - else: - await call_sync_from_async(self._attach_to_container) - if not self.attach_to_existing: self.log('info', f'Waiting for client to become ready at {self.api_url}...') - self.send_status_message('STATUS$WAITING_FOR_CLIENT') + self.send_status_message('STATUS$WAITING_FOR_CLIENT') + await call_sync_from_async(self._wait_until_alive) + if not self.attach_to_existing: self.log('info', 'Runtime is ready.') @@ -227,7 +239,8 @@ async def connect(self): 'debug', f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}', ) - self.send_status_message(' ') + if not self.attach_to_existing: + self.send_status_message(' ') @staticmethod @lru_cache(maxsize=1) @@ -332,13 +345,12 @@ def _init_container(self): self.log('debug', f'Container started. Server url: {self.api_url}') self.send_status_message('STATUS$CONTAINER_STARTED') except docker.errors.APIError as e: - # check 409 error if '409' in str(e): self.log( 'warning', f'Container {self.container_name} already exists. Removing...', ) - self._close_containers(rm_all_containers=True) + remove_all_containers(self.container_name) return self._init_container() else: @@ -414,42 +426,18 @@ def close(self, rm_all_containers: bool = True): Parameters: - rm_all_containers (bool): Whether to remove all containers with the 'openhands-sandbox-' prefix """ - if self.log_buffer: self.log_buffer.close() if self.session: self.session.close() - if self.attach_to_existing: + if self.config.sandbox.keep_runtime_alive or self.attach_to_existing: return - self._close_containers(rm_all_containers) - - def _close_containers(self, rm_all_containers: bool = True): - try: - containers = self.docker_client.containers.list(all=True) - for container in containers: - try: - # If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures - # that all 'openhands-sandbox-' containers are removed as well. - if rm_all_containers and container.name.startswith( - self.container_name_prefix - ): - container.remove(force=True) - elif container.name == self.container_name: - if not self.skip_container_logs: - logs = container.logs(tail=1000).decode('utf-8') - self.log( - 'debug', - f'==== Container logs on close ====\n{logs}\n==== End of container logs ====', - ) - container.remove(force=True) - except docker.errors.APIError: - pass - except docker.errors.NotFound: - pass - except docker.errors.NotFound: # yes, this can happen! - pass + close_prefix = ( + CONTAINER_NAME_PREFIX if rm_all_containers else self.container_name + ) + remove_all_containers(close_prefix) def run_action(self, action: Action) -> Observation: if isinstance(action, FileEditAction): diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index f0a020358a76..97b16c1c83fa 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -288,7 +288,6 @@ def _wait_until_alive_impl(self): assert runtime_data['runtime_id'] == self.runtime_id assert 'pod_status' in runtime_data pod_status = runtime_data['pod_status'] - self.log('debug', runtime_data) self.log('debug', f'Pod status: {pod_status}') # FIXME: We should fix it at the backend of /start endpoint, make sure @@ -333,7 +332,7 @@ def _wait_until_alive_impl(self): raise RuntimeNotReadyError() def close(self, timeout: int = 10): - if self.config.sandbox.keep_remote_runtime_alive or self.attach_to_existing: + if self.config.sandbox.keep_runtime_alive or self.attach_to_existing: self.session.close() return if self.runtime_id and self.session: diff --git a/openhands/runtime/impl/runloop/runloop_runtime.py b/openhands/runtime/impl/runloop/runloop_runtime.py index ac5e0b098d1b..36ad4590b7a5 100644 --- a/openhands/runtime/impl/runloop/runloop_runtime.py +++ b/openhands/runtime/impl/runloop/runloop_runtime.py @@ -21,6 +21,8 @@ from openhands.runtime.utils.request import send_request from openhands.utils.tenacity_stop import stop_if_should_exit +CONTAINER_NAME_PREFIX = 'openhands-runtime-' + class RunloopLogBuffer(LogBuffer): """Synchronous buffer for Runloop devbox logs. @@ -115,7 +117,7 @@ def __init__( bearer_token=config.runloop_api_key, ) self.session = requests.Session() - self.container_name = self.container_name_prefix + sid + self.container_name = CONTAINER_NAME_PREFIX + sid self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time self.init_base_runtime( config, @@ -190,7 +192,7 @@ def _create_new_devbox(self) -> DevboxView: prebuilt='openhands', launch_parameters=LaunchParameters( available_ports=[self._sandbox_port], - resource_size_request="LARGE", + resource_size_request='LARGE', ), metadata={'container-name': self.container_name}, ) diff --git a/openhands/server/listen.py b/openhands/server/listen.py index 8b03d88149d0..3b4db2daddad 100644 --- a/openhands/server/listen.py +++ b/openhands/server/listen.py @@ -5,7 +5,6 @@ import time import uuid import warnings -from contextlib import asynccontextmanager import jwt import requests @@ -74,14 +73,7 @@ session_manager = SessionManager(config, file_store) -@asynccontextmanager -async def lifespan(app: FastAPI): - global session_manager - async with session_manager: - yield - - -app = FastAPI(lifespan=lifespan) +app = FastAPI() app.add_middleware( LocalhostCORSMiddleware, allow_credentials=True, diff --git a/openhands/server/session/manager.py b/openhands/server/session/manager.py index 15f7fbde4402..f746b3676e29 100644 --- a/openhands/server/session/manager.py +++ b/openhands/server/session/manager.py @@ -1,14 +1,11 @@ -import asyncio import time -from dataclasses import dataclass, field -from typing import Optional +from dataclasses import dataclass from fastapi import WebSocket from openhands.core.config import AppConfig from openhands.core.logger import openhands_logger as logger from openhands.events.stream import session_exists -from openhands.runtime.utils.shutdown_listener import should_continue from openhands.server.session.conversation import Conversation from openhands.server.session.session import Session from openhands.storage.files import FileStore @@ -18,78 +15,23 @@ class SessionManager: config: AppConfig file_store: FileStore - cleanup_interval: int = 300 - session_timeout: int = 600 - _sessions: dict[str, Session] = field(default_factory=dict) - _session_cleanup_task: Optional[asyncio.Task] = None - - async def __aenter__(self): - if not self._session_cleanup_task: - self._session_cleanup_task = asyncio.create_task(self._cleanup_sessions()) - return self - - async def __aexit__(self, exc_type, exc_value, traceback): - if self._session_cleanup_task: - self._session_cleanup_task.cancel() - self._session_cleanup_task = None def add_or_restart_session(self, sid: str, ws_conn: WebSocket) -> Session: - if sid in self._sessions: - self._sessions[sid].close() - self._sessions[sid] = Session( + return Session( sid=sid, file_store=self.file_store, ws=ws_conn, config=self.config ) - return self._sessions[sid] - - def get_session(self, sid: str) -> Session | None: - if sid not in self._sessions: - return None - return self._sessions.get(sid) async def attach_to_conversation(self, sid: str) -> Conversation | None: + start_time = time.time() if not await session_exists(sid, self.file_store): return None c = Conversation(sid, file_store=self.file_store, config=self.config) await c.connect() + end_time = time.time() + logger.info( + f'Conversation {c.sid} connected in {end_time - start_time} seconds' + ) return c async def detach_from_conversation(self, conversation: Conversation): await conversation.disconnect() - - async def send(self, sid: str, data: dict[str, object]) -> bool: - """Sends data to the client.""" - session = self.get_session(sid) - if session is None: - logger.error(f'*** No session found for {sid}, skipping message ***') - return False - return await session.send(data) - - async def send_error(self, sid: str, message: str) -> bool: - """Sends an error message to the client.""" - return await self.send(sid, {'error': True, 'message': message}) - - async def send_message(self, sid: str, message: str) -> bool: - """Sends a message to the client.""" - return await self.send(sid, {'message': message}) - - async def _cleanup_sessions(self): - while should_continue(): - current_time = time.time() - session_ids_to_remove = [] - for sid, session in list(self._sessions.items()): - # if session inactive for a long time, remove it - if ( - not session.is_alive - and current_time - session.last_active_ts > self.session_timeout - ): - session_ids_to_remove.append(sid) - - for sid in session_ids_to_remove: - to_del_session: Session | None = self._sessions.pop(sid, None) - if to_del_session is not None: - to_del_session.close() - logger.debug( - f'Session {sid} and related resource have been removed due to inactivity.' - ) - - await asyncio.sleep(self.cleanup_interval) diff --git a/tests/runtime/conftest.py b/tests/runtime/conftest.py index 36d75a15d63f..2a57a9c820b7 100644 --- a/tests/runtime/conftest.py +++ b/tests/runtime/conftest.py @@ -224,6 +224,7 @@ def _load_runtime( config = load_app_config() config.run_as_openhands = run_as_openhands config.sandbox.force_rebuild_runtime = force_rebuild_runtime + config.sandbox.keep_runtime_alive = False # Folder where all tests create their own folder global test_mount_path if use_workspace: diff --git a/tests/runtime/test_stress_remote_runtime.py b/tests/runtime/test_stress_remote_runtime.py index 3a5d6d280726..a38b5c5dbe24 100644 --- a/tests/runtime/test_stress_remote_runtime.py +++ b/tests/runtime/test_stress_remote_runtime.py @@ -64,7 +64,7 @@ def get_config( timeout=300, api_key=os.environ.get('ALLHANDS_API_KEY', None), remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), - keep_remote_runtime_alive=False, + keep_runtime_alive=False, ), # do not mount workspace workspace_base=None, From 0cfb132ab7cf5befa08564b7bb8127321b5baa2f Mon Sep 17 00:00:00 2001 From: "sp.wack" <83104063+amanape@users.noreply.github.com> Date: Tue, 12 Nov 2024 18:27:06 +0200 Subject: [PATCH 10/16] fix(frontend): Remove dotted outline on focus (#4926) --- frontend/src/components/interactive-chat-box.tsx | 5 ----- frontend/src/routes/_oh._index/task-form.tsx | 5 ----- 2 files changed, 10 deletions(-) diff --git a/frontend/src/components/interactive-chat-box.tsx b/frontend/src/components/interactive-chat-box.tsx index a8b7f51f159c..3cb166cd7a4f 100644 --- a/frontend/src/components/interactive-chat-box.tsx +++ b/frontend/src/components/interactive-chat-box.tsx @@ -59,11 +59,6 @@ export function InteractiveChatBox({ "bg-neutral-700 border border-neutral-600 rounded-lg px-2 py-[10px]", "transition-colors duration-200", "hover:border-neutral-500 focus-within:border-neutral-500", - "group relative", - "before:pointer-events-none before:absolute before:inset-0 before:rounded-lg before:transition-colors", - "before:border-2 before:border-dashed before:border-transparent", - "[&:has(*:focus-within)]:before:border-neutral-500/50", - "[&:has(*[data-dragging-over='true'])]:before:border-neutral-500/50", )} > diff --git a/frontend/src/routes/_oh._index/task-form.tsx b/frontend/src/routes/_oh._index/task-form.tsx index 2a2eaae158ab..57b03f5374bc 100644 --- a/frontend/src/routes/_oh._index/task-form.tsx +++ b/frontend/src/routes/_oh._index/task-form.tsx @@ -70,11 +70,6 @@ export function TaskForm() { "border border-neutral-600 px-4 py-[17px] rounded-lg text-[17px] leading-5 w-full transition-colors duration-200", inputIsFocused ? "bg-neutral-600" : "bg-neutral-700", "hover:border-neutral-500 focus-within:border-neutral-500", - "group relative", - "before:pointer-events-none before:absolute before:inset-0 before:rounded-lg before:transition-colors", - "before:border-2 before:border-dashed before:border-transparent", - "[&:has(*:focus-within)]:before:border-neutral-500/50", - "[&:has(*[data-dragging-over='true'])]:before:border-neutral-500/50", )} > Date: Tue, 12 Nov 2024 12:19:57 -0700 Subject: [PATCH 11/16] fix(evaluation): SWE-bench evaluation script supports multiprocessing (#4943) --- evaluation/swe_bench/eval_infer.py | 43 ++++++++++++++++++++++++++---- evaluation/utils/shared.py | 1 + 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/evaluation/swe_bench/eval_infer.py b/evaluation/swe_bench/eval_infer.py index b1fd15fe5140..81eadeb33f10 100644 --- a/evaluation/swe_bench/eval_infer.py +++ b/evaluation/swe_bench/eval_infer.py @@ -1,6 +1,7 @@ import os import tempfile import time +from functools import partial import pandas as pd from swebench.harness.grading import get_eval_report @@ -94,13 +95,28 @@ def get_config(instance: pd.Series) -> AppConfig: def process_instance( instance: pd.Series, - metadata: EvalMetadata | None = None, + metadata: EvalMetadata, reset_logger: bool = True, + log_dir: str | None = None, ) -> EvalOutput: + """ + Evaluate agent performance on a SWE-bench problem instance. + + Note that this signature differs from the expected input to `run_evaluation`. Use + `functools.partial` to provide optional arguments before passing to the evaluation harness. + + Args: + log_dir (str | None, default=None): Path to directory where log files will be written. Must + be provided if `reset_logger` is set. + + Raises: + AssertionError: if the `reset_logger` flag is set without a provided log directory. + """ # Setup the logger properly, so you can run multi-processing to parallelize the evaluation if reset_logger: - global output_file - log_dir = output_file.replace('.jsonl', '.logs') + assert ( + log_dir is not None + ), "Can't reset logger without a provided log directory." os.makedirs(log_dir, exist_ok=True) reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir) else: @@ -127,6 +143,7 @@ def process_instance( return EvalOutput( instance_id=instance_id, test_result=instance['test_result'], + metadata=metadata, ) runtime = create_runtime(config) @@ -176,6 +193,7 @@ def process_instance( return EvalOutput( instance_id=instance_id, test_result=instance['test_result'], + metadata=metadata, ) elif 'APPLY_PATCH_PASS' in apply_patch_output: logger.info(f'[{instance_id}] {APPLY_PATCH_PASS}:\n{apply_patch_output}') @@ -269,6 +287,7 @@ def process_instance( return EvalOutput( instance_id=instance_id, test_result=instance['test_result'], + metadata=metadata, ) else: logger.info( @@ -355,12 +374,26 @@ def process_instance( output_file = args.input_file.replace('.jsonl', '.swebench_eval.jsonl') instances = prepare_dataset(predictions, output_file, args.eval_n_limit) + # If possible, load the relevant metadata to avoid issues with `run_evaluation`. + metadata: EvalMetadata | None = None + metadata_filepath = os.path.join(os.path.dirname(args.input_file), 'metadata.json') + if os.path.exists(metadata_filepath): + with open(metadata_filepath, 'r') as metadata_file: + data = metadata_file.read() + metadata = EvalMetadata.model_validate_json(data) + + # The evaluation harness constrains the signature of `process_instance_func` but we need to + # pass extra information. Build a new function object to avoid issues with multiprocessing. + process_instance_func = partial( + process_instance, log_dir=output_file.replace('.jsonl', '.logs') + ) + run_evaluation( instances, - metadata=None, + metadata=metadata, output_file=output_file, num_workers=args.eval_num_workers, - process_instance_func=process_instance, + process_instance_func=process_instance_func, ) # Load evaluated predictions & print number of resolved predictions diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py index d5a6d6d89de8..aac54b6b0972 100644 --- a/evaluation/utils/shared.py +++ b/evaluation/utils/shared.py @@ -346,6 +346,7 @@ def run_evaluation( f'model {metadata.llm_config.model}, max iterations {metadata.max_iterations}.\n' ) else: + logger.warning('Running evaluation without metadata.') logger.info(f'Evaluation started with {num_workers} workers.') total_instances = len(dataset) From c555611d58e6d279807b8cf8934797e98a6df9ee Mon Sep 17 00:00:00 2001 From: OpenHands Date: Tue, 12 Nov 2024 14:40:12 -0500 Subject: [PATCH 12/16] Fix issue #4941: [Bug]: Browser tab does not reset after starting a new session (#4945) Co-authored-by: amanape <83104063+amanape@users.noreply.github.com> --- frontend/__tests__/clear-session.test.ts | 40 ++++++++++++++++++++++++ frontend/src/utils/clear-session.ts | 16 +++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 frontend/__tests__/clear-session.test.ts diff --git a/frontend/__tests__/clear-session.test.ts b/frontend/__tests__/clear-session.test.ts new file mode 100644 index 000000000000..4a172608497f --- /dev/null +++ b/frontend/__tests__/clear-session.test.ts @@ -0,0 +1,40 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { clearSession } from "../src/utils/clear-session"; +import store from "../src/store"; +import { initialState as browserInitialState } from "../src/state/browserSlice"; + +describe("clearSession", () => { + beforeEach(() => { + // Mock localStorage + const localStorageMock = { + getItem: vi.fn(), + setItem: vi.fn(), + removeItem: vi.fn(), + clear: vi.fn(), + }; + vi.stubGlobal("localStorage", localStorageMock); + + // Set initial browser state to non-default values + store.dispatch({ + type: "browser/setUrl", + payload: "https://example.com", + }); + store.dispatch({ + type: "browser/setScreenshotSrc", + payload: "base64screenshot", + }); + }); + + it("should clear localStorage and reset browser state", () => { + clearSession(); + + // Verify localStorage items were removed + expect(localStorage.removeItem).toHaveBeenCalledWith("token"); + expect(localStorage.removeItem).toHaveBeenCalledWith("repo"); + + // Verify browser state was reset + const state = store.getState(); + expect(state.browser.url).toBe(browserInitialState.url); + expect(state.browser.screenshotSrc).toBe(browserInitialState.screenshotSrc); + }); +}); diff --git a/frontend/src/utils/clear-session.ts b/frontend/src/utils/clear-session.ts index 27013c31b550..e0a7beed5c54 100644 --- a/frontend/src/utils/clear-session.ts +++ b/frontend/src/utils/clear-session.ts @@ -1,7 +1,21 @@ +import store from "#/store"; +import { initialState as browserInitialState } from "#/state/browserSlice"; + /** - * Clear the session data from the local storage. This will remove the token and repo + * Clear the session data from the local storage and reset relevant Redux state */ export const clearSession = () => { + // Clear local storage localStorage.removeItem("token"); localStorage.removeItem("repo"); + + // Reset browser state to initial values + store.dispatch({ + type: "browser/setUrl", + payload: browserInitialState.url, + }); + store.dispatch({ + type: "browser/setScreenshotSrc", + payload: browserInitialState.screenshotSrc, + }); }; From 40e2d28e872c69458f8a3bacca65a8010d2647ff Mon Sep 17 00:00:00 2001 From: mamoodi Date: Tue, 12 Nov 2024 15:08:10 -0500 Subject: [PATCH 13/16] Release 0.13.1 (#4947) --- frontend/package-lock.json | 4 ++-- frontend/package.json | 2 +- pyproject.toml | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index a18c2cf24bc8..bb18dc8b4668 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1,12 +1,12 @@ { "name": "openhands-frontend", - "version": "0.13.0", + "version": "0.13.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "openhands-frontend", - "version": "0.13.0", + "version": "0.13.1", "dependencies": { "@monaco-editor/react": "^4.6.0", "@nextui-org/react": "^2.4.8", diff --git a/frontend/package.json b/frontend/package.json index 6376ab7c2565..9d09d967fb90 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "openhands-frontend", - "version": "0.13.0", + "version": "0.13.1", "private": true, "type": "module", "engines": { diff --git a/pyproject.toml b/pyproject.toml index a121ec388e23..f1e145fa3cd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openhands-ai" -version = "0.13.0" +version = "0.13.1" description = "OpenHands: Code Less, Make More" authors = ["OpenHands"] license = "MIT" @@ -95,7 +95,6 @@ reportlab = "*" [tool.coverage.run] concurrency = ["gevent"] - [tool.poetry.group.runtime.dependencies] jupyterlab = "*" notebook = "*" @@ -126,7 +125,6 @@ ignore = ["D1"] [tool.ruff.lint.pydocstyle] convention = "google" - [tool.poetry.group.evaluation.dependencies] streamlit = "*" whatthepatch = "*" From 123fb4b75d57dca6e8645d7febce44b2d7af4986 Mon Sep 17 00:00:00 2001 From: "sp.wack" <83104063+amanape@users.noreply.github.com> Date: Tue, 12 Nov 2024 22:37:59 +0200 Subject: [PATCH 14/16] feat(posthog): Add saas login event (#4948) --- frontend/src/routes/_oh.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/frontend/src/routes/_oh.tsx b/frontend/src/routes/_oh.tsx index 585bbe2437b1..9f0d57d5661f 100644 --- a/frontend/src/routes/_oh.tsx +++ b/frontend/src/routes/_oh.tsx @@ -171,6 +171,8 @@ export default function MainApp() { company: user.company, name: user.name, email: user.email, + user: user.login, + mode: window.__APP_MODE__ || "oss", }); } }, [user]); From 59f7093428b917049a7e1df97a4e88268355f94a Mon Sep 17 00:00:00 2001 From: tofarr Date: Tue, 12 Nov 2024 14:09:43 -0700 Subject: [PATCH 15/16] Fix max iterations (#4949) --- frontend/src/components/event-handler.tsx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/frontend/src/components/event-handler.tsx b/frontend/src/components/event-handler.tsx index 75eef4116476..a9e0085ae694 100644 --- a/frontend/src/components/event-handler.tsx +++ b/frontend/src/components/event-handler.tsx @@ -34,6 +34,7 @@ import { base64ToBlob } from "#/utils/base64-to-blob"; import { setCurrentAgentState } from "#/state/agentSlice"; import AgentState from "#/types/AgentState"; import { getSettings } from "#/services/settings"; +import { generateAgentStateChangeEvent } from "#/services/agentStateService"; interface ServerError { error: boolean | string; @@ -96,6 +97,14 @@ export function EventHandler({ children }: React.PropsWithChildren) { return; } + if (event.type === "error") { + const message: string = `${event.message}`; + if (message.startsWith("Agent reached maximum")) { + // We set the agent state to paused here - if the user clicks resume, it auto updates the max iterations + send(generateAgentStateChangeEvent(AgentState.PAUSED)); + } + } + if (isErrorObservation(event)) { dispatch( addErrorMessage({ From 207df9dd309b37bb43964ccaf84fce599fc663e5 Mon Sep 17 00:00:00 2001 From: OpenHands Date: Tue, 12 Nov 2024 17:23:11 -0500 Subject: [PATCH 16/16] Fix issue #4912: [Bug]: BedrockException: "The number of toolResult blocks at messages.2.content exceeds the number of toolUse blocks of previous turn.". (#4937) Co-authored-by: Xingyao Wang Co-authored-by: Graham Neubig Co-authored-by: mamoodi --- docs/modules/usage/llms/litellm-proxy.md | 20 ++++++++++++++++++++ docs/modules/usage/llms/llms.md | 1 + docs/sidebars.ts | 5 +++++ 3 files changed, 26 insertions(+) create mode 100644 docs/modules/usage/llms/litellm-proxy.md diff --git a/docs/modules/usage/llms/litellm-proxy.md b/docs/modules/usage/llms/litellm-proxy.md new file mode 100644 index 000000000000..9178bc5c33ea --- /dev/null +++ b/docs/modules/usage/llms/litellm-proxy.md @@ -0,0 +1,20 @@ +# LiteLLM Proxy + +OpenHands supports using the [LiteLLM proxy](https://docs.litellm.ai/docs/proxy/quick_start) to access various LLM providers. + +## Configuration + +To use LiteLLM proxy with OpenHands, you need to: + +1. Set up a LiteLLM proxy server (see [LiteLLM documentation](https://docs.litellm.ai/docs/proxy/quick_start)) +2. When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings: + * Enable `Advanced Options` + * `Custom Model` to the prefix `litellm_proxy/` + the model you will be using (e.g. `litellm_proxy/anthropic.claude-3-5-sonnet-20241022-v2:0`) + * `Base URL` to your LiteLLM proxy URL (e.g. `https://your-litellm-proxy.com`) + * `API Key` to your LiteLLM proxy API key + +## Supported Models + +The supported models depend on your LiteLLM proxy configuration. OpenHands supports any model that your LiteLLM proxy is configured to handle. + +Refer to your LiteLLM proxy configuration for the list of available models and their names. diff --git a/docs/modules/usage/llms/llms.md b/docs/modules/usage/llms/llms.md index 3ce773fcc15e..d9254b2070a4 100644 --- a/docs/modules/usage/llms/llms.md +++ b/docs/modules/usage/llms/llms.md @@ -63,6 +63,7 @@ We have a few guides for running OpenHands with specific model providers: - [Azure](llms/azure-llms) - [Google](llms/google-llms) - [Groq](llms/groq) +- [LiteLLM Proxy](llms/litellm-proxy) - [OpenAI](llms/openai-llms) - [OpenRouter](llms/openrouter) diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 19356116f28c..7ce0a1f210c2 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -76,6 +76,11 @@ const sidebars: SidebarsConfig = { label: 'Groq', id: 'usage/llms/groq', }, + { + type: 'doc', + label: 'LiteLLM Proxy', + id: 'usage/llms/litellm-proxy', + }, { type: 'doc', label: 'OpenAI',