Merge branch 'main' into feat-socket-io

All-Hands-AI · Nov 21, 2024 · 8d12295 · 8d12295
2 parents 5424755 + 68e52a9
commit 8d12295
Show file tree

Hide file tree

Showing 6 changed files with 138 additions and 38 deletions.
diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml
@@ -80,11 +80,11 @@ jobs:
             github.event.label.name == 'fix-me-experimental' ||
             (
               (github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') &&
-              startsWith(github.event.comment.body, inputs.macro || '@openhands-agent-exp')
+              startsWith(github.event.comment.body, '@openhands-agent-exp')
             ) ||
             (
               github.event_name == 'pull_request_review' &&
-              startsWith(github.event.review.body, inputs.macro || '@openhands-agent-exp')
+              startsWith(github.event.review.body, '@openhands-agent-exp')
             )
           )
         uses: actions/cache@v3

diff --git a/docs/modules/usage/how-to/github-action.md b/docs/modules/usage/how-to/github-action.md
@@ -43,3 +43,53 @@ To customize the default macro (`@openhands-agent`):
 
 1. [Create a repository variable](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) named `OPENHANDS_MACRO`
 2. Assign the variable a custom value
+
+## Writing Effective .openhands_instructions Files
+
+The `.openhands_instructions` file is a file that you can put in the root directory of your repository to guide OpenHands in understanding and working with your repository effectively. Here are key tips for writing high-quality instructions:
+
+### Core Principles
+
+1. **Concise but Informative**: Provide a clear, focused overview of the repository that emphasizes the most common actions OpenHands will need to perform.
+
+2. **Repository Structure**: Explain the key directories and their purposes, especially highlighting where different types of code (e.g., frontend, backend) are located.
+
+3. **Development Workflows**: Document the essential commands for:
+   - Building and setting up the project
+   - Running tests
+   - Linting and code quality checks
+   - Any environment-specific requirements
+
+4. **Testing Guidelines**: Specify:
+   - Where tests are located
+   - How to run specific test suites
+   - Any testing conventions or requirements
+
+### Example Structure
+
+```markdown
+# Repository Overview
+[Brief description of the project]
+
+## General Setup
+- Main build command
+- Development environment setup
+- Pre-commit checks
+
+## Backend
+- Location and structure
+- Testing instructions
+- Environment requirements
+
+## Frontend
+- Setup prerequisites
+- Build and test commands
+- Environment variables
+
+## Additional Guidelines
+- Code style requirements
+- Special considerations
+- Common workflows
+```
+
+For a real-world example, refer to the [OpenHands repository's .openhands_instructions](https://github.com/All-Hands-AI/OpenHands/blob/main/.openhands_instructions).
diff --git a/openhands/agenthub/README.md b/openhands/agenthub/README.md
@@ -7,10 +7,10 @@ Contributors from different backgrounds and interests can choose to contribute t
 
 ## Constructing an Agent
 
-The abstraction for an agent can be found [here](../openhands/controller/agent.py).
+The abstraction for an agent can be found [here](../controller/agent.py).
 
 Agents are run inside of a loop. At each iteration, `agent.step()` is called with a
-[State](../openhands/controller/state/state.py) input, and the agent must output an [Action](../openhands/events/action).
+[State](../controller/state/state.py) input, and the agent must output an [Action](../events/action).
 
 Every agent also has a `self.llm` which it can use to interact with the LLM configured by the user.
 See the [LiteLLM docs for `self.llm.completion`](https://docs.litellm.ai/docs/completion).
@@ -46,17 +46,17 @@ The agent can add and modify subtasks through the `AddTaskAction` and `ModifyTas
 
 Here is a list of available Actions, which can be returned by `agent.step()`:
 
-- [`CmdRunAction`](../openhands/events/action/commands.py) - Runs a command inside a sandboxed terminal
-- [`IPythonRunCellAction`](../openhands/events/action/commands.py) - Execute a block of Python code interactively (in Jupyter notebook) and receives `CmdOutputObservation`. Requires setting up `jupyter` [plugin](../openhands/runtime/plugins) as a requirement.
-- [`FileReadAction`](../openhands/events/action/files.py) - Reads the content of a file
-- [`FileWriteAction`](../openhands/events/action/files.py) - Writes new content to a file
-- [`BrowseURLAction`](../openhands/events/action/browse.py) - Gets the content of a URL
-- [`AddTaskAction`](../openhands/events/action/tasks.py) - Adds a subtask to the plan
-- [`ModifyTaskAction`](../openhands/events/action/tasks.py) - Changes the state of a subtask.
-- [`AgentFinishAction`](../openhands/events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
-- [`AgentRejectAction`](../openhands/events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
-- [`AgentFinishAction`](../openhands/events/action/agent.py) - Stops the control loop, allowing the user to enter a new task
-- [`MessageAction`](../openhands/events/action/message.py) - Represents a message from an agent or the user
+- [`CmdRunAction`](../events/action/commands.py) - Runs a command inside a sandboxed terminal
+- [`IPythonRunCellAction`](../events/action/commands.py) - Execute a block of Python code interactively (in Jupyter notebook) and receives `CmdOutputObservation`. Requires setting up `jupyter` [plugin](../runtime/plugins) as a requirement.
+- [`FileReadAction`](../events/action/files.py) - Reads the content of a file
+- [`FileWriteAction`](../events/action/files.py) - Writes new content to a file
+- [`BrowseURLAction`](../events/action/browse.py) - Gets the content of a URL
+- [`AddTaskAction`](../events/action/tasks.py) - Adds a subtask to the plan
+- [`ModifyTaskAction`](../events/action/tasks.py) - Changes the state of a subtask.
+- [`AgentFinishAction`](../events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
+- [`AgentRejectAction`](../events/action/agent.py) - Stops the control loop, allowing the user/delegator agent to enter a new task
+- [`AgentFinishAction`](../events/action/agent.py) - Stops the control loop, allowing the user to enter a new task
+- [`MessageAction`](../events/action/message.py) - Represents a message from an agent or the user
 
 To serialize and deserialize an action, you can use:
 - `action.to_dict()` to serialize the action to a dictionary to be sent to the UI, including a user-friendly string representation of the message
@@ -70,12 +70,12 @@ But they may also appear as a result of asynchronous events (e.g. a message from
 
 Here is a list of available Observations:
 
-- [`CmdOutputObservation`](../openhands/events/observation/commands.py)
-- [`BrowserOutputObservation`](../openhands/events/observation/browse.py)
-- [`FileReadObservation`](../openhands/events/observation/files.py)
-- [`FileWriteObservation`](../openhands/events/observation/files.py)
-- [`ErrorObservation`](../openhands/events/observation/error.py)
-- [`SuccessObservation`](../openhands/events/observation/success.py)
+- [`CmdOutputObservation`](../events/observation/commands.py)
+- [`BrowserOutputObservation`](../events/observation/browse.py)
+- [`FileReadObservation`](../events/observation/files.py)
+- [`FileWriteObservation`](../events/observation/files.py)
+- [`ErrorObservation`](../events/observation/error.py)
+- [`SuccessObservation`](../events/observation/success.py)
 
 You can use `observation.to_dict()` and `observation_from_dict` to serialize and deserialize observations.
 

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
@@ -164,7 +164,6 @@ def __init__(
         )
         def wrapper(*args, **kwargs):
             """Wrapper for the litellm completion function. Logs the input and output of the completion function."""
-
             from openhands.core.utils import json
 
             messages: list[dict[str, Any]] | dict[str, Any] = []
@@ -370,16 +369,16 @@ def init_model_info(self):
                 ):
                     self.config.max_output_tokens = self.model_info['max_tokens']
 
-    def vision_is_active(self):
+    def vision_is_active(self) -> bool:
         with warnings.catch_warnings():
             warnings.simplefilter('ignore')
             return not self.config.disable_vision and self._supports_vision()
 
-    def _supports_vision(self):
+    def _supports_vision(self) -> bool:
         """Acquire from litellm if model is vision capable.
 
         Returns:
-            bool: True if model is vision capable. If model is not supported by litellm, it will return False.
+            bool: True if model is vision capable. Return False if model not supported by litellm.
         """
         # litellm.supports_vision currently returns False for 'openai/gpt-...' or 'anthropic/claude-...' (with prefixes)
         # but model_info will have the correct value for some reason.
@@ -477,7 +476,7 @@ def _post_completion(self, response: ModelResponse) -> None:
         if stats:
             logger.debug(stats)
 
-    def get_token_count(self, messages):
+    def get_token_count(self, messages) -> int:
         """Get the number of tokens in a list of messages.
 
         Args:
@@ -492,7 +491,7 @@ def get_token_count(self, messages):
             # TODO: this is to limit logspam in case token count is not supported
             return 0
 
-    def _is_local(self):
+    def _is_local(self) -> bool:
         """Determines if the system is using a locally running LLM.
 
         Returns:
@@ -507,7 +506,7 @@ def _is_local(self):
                 return True
         return False
 
-    def _completion_cost(self, response):
+    def _completion_cost(self, response) -> float:
         """Calculate the cost of a completion response based on the model.  Local models are treated as free.
         Add the current cost into total cost in metrics.
 
@@ -556,7 +555,7 @@ def __str__(self):
     def __repr__(self):
         return str(self)
 
-    def reset(self):
+    def reset(self) -> None:
         self.metrics.reset()
 
     def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dict]:

diff --git a/openhands/resolver/README.md b/openhands/resolver/README.md
@@ -15,6 +15,8 @@ Follow these steps to use this workflow in your own repository:
 
 1. [Create a personal access token](https://github.com/settings/tokens?type=beta) with read/write scope for "contents", "issues", "pull requests", and "workflows"
 
+   Note: If you're working with an organizational repository, you may need to configure the organization's personal access token policy first. See [Setting a personal access token policy for your organization](https://docs.github.com/en/organizations/managing-programmatic-access-to-your-organization/setting-a-personal-access-token-policy-for-your-organization) for details.
+
 2. Create an API key for the [Claude API](https://www.anthropic.com/api) (recommended) or another supported LLM service
 
 3. Copy `examples/openhands-resolver.yml` to your repository's `.github/workflows/` directory
@@ -83,11 +85,14 @@ pip install openhands-ai
 3. Set up environment variables:
 
 ```bash
+
 # GitHub credentials
+
 export GITHUB_TOKEN="your-github-token"
 export GITHUB_USERNAME="your-github-username"  # Optional, defaults to token owner
 
 # LLM configuration
+
 export LLM_MODEL="anthropic/claude-3-5-sonnet-20241022"  # Recommended
 export LLM_API_KEY="your-llm-api-key"
 export LLM_BASE_URL="your-api-url"  # Optional, for API proxies

diff --git a/openhands/server/listen.py b/openhands/server/listen.py
@@ -59,7 +59,7 @@
 from openhands.events.serialization import event_to_dict
 from openhands.events.stream import AsyncEventStreamWrapper
 from openhands.llm import bedrock
-from openhands.runtime.base import Runtime
+from openhands.runtime.base import Runtime, RuntimeUnavailableError
 from openhands.server.auth.auth import get_sid_from_token, sign_token
 from openhands.server.middleware import (
     InMemoryRateLimiter,
@@ -401,7 +401,14 @@ async def list_files(request: Request, path: str | None = None):
         )
 
     runtime: Runtime = request.state.conversation.runtime
-    file_list = await call_sync_from_async(runtime.list_files, path)
+    try:
+        file_list = await call_sync_from_async(runtime.list_files, path)
+    except RuntimeUnavailableError as e:
+        logger.error(f'Error listing files: {e}', exc_info=True)
+        return JSONResponse(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            content={'error': f'Error listing files: {e}'},
+        )
     if path:
         file_list = [os.path.join(path, f) for f in file_list]
 
@@ -421,7 +428,14 @@ async def filter_for_gitignore(file_list, base_path):
         file_list = [entry for entry in file_list if not spec.match_file(entry)]
         return file_list
 
-    file_list = await filter_for_gitignore(file_list, '')
+    try:
+        file_list = await filter_for_gitignore(file_list, '')
+    except RuntimeUnavailableError as e:
+        logger.error(f'Error filtering files: {e}', exc_info=True)
+        return JSONResponse(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            content={'error': f'Error filtering files: {e}'},
+        )
 
     return file_list
 
@@ -450,7 +464,14 @@ async def select_file(file: str, request: Request):
 
     file = os.path.join(runtime.config.workspace_mount_path_in_sandbox, file)
     read_action = FileReadAction(file)
-    observation = await call_sync_from_async(runtime.run_action, read_action)
+    try:
+        observation = await call_sync_from_async(runtime.run_action, read_action)
+    except RuntimeUnavailableError as e:
+        logger.error(f'Error opening file {file}: {e}', exc_info=True)
+        return JSONResponse(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            content={'error': f'Error opening file: {e}'},
+        )
 
     if isinstance(observation, FileReadObservation):
         content = observation.content
@@ -546,9 +567,20 @@ async def upload_file(request: Request, files: list[UploadFile]):
                     tmp_file.flush()
 
                 runtime: Runtime = request.state.conversation.runtime
-                runtime.copy_to(
-                    tmp_file_path, runtime.config.workspace_mount_path_in_sandbox
-                )
+                try:
+                    await call_sync_from_async(
+                        runtime.copy_to,
+                        tmp_file_path,
+                        runtime.config.workspace_mount_path_in_sandbox,
+                    )
+                except RuntimeUnavailableError as e:
+                    logger.error(
+                        f'Error saving file {safe_filename}: {e}', exc_info=True
+                    )
+                    return JSONResponse(
+                        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                        content={'error': f'Error saving file: {e}'},
+                    )
             uploaded_files.append(safe_filename)
 
         response_content = {
@@ -679,7 +711,14 @@ async def save_file(request: Request):
             runtime.config.workspace_mount_path_in_sandbox, file_path
         )
         write_action = FileWriteAction(file_path, content)
-        observation = await call_sync_from_async(runtime.run_action, write_action)
+        try:
+            observation = await call_sync_from_async(runtime.run_action, write_action)
+        except RuntimeUnavailableError as e:
+            logger.error(f'Error saving file: {e}', exc_info=True)
+            return JSONResponse(
+                status_code=500,
+                content={'error': f'Error saving file: {e}'},
+            )
 
         if isinstance(observation, FileWriteObservation):
             return JSONResponse(
@@ -730,7 +769,14 @@ async def zip_current_workspace(request: Request, background_tasks: BackgroundTa
         logger.debug('Zipping workspace')
         runtime: Runtime = request.state.conversation.runtime
         path = runtime.config.workspace_mount_path_in_sandbox
-        zip_file = await call_sync_from_async(runtime.copy_from, path)
+        try:
+            zip_file = await call_sync_from_async(runtime.copy_from, path)
+        except RuntimeUnavailableError as e:
+            logger.error(f'Error zipping workspace: {e}', exc_info=True)
+            return JSONResponse(
+                status_code=500,
+                content={'error': f'Error zipping workspace: {e}'},
+            )
         response = FileResponse(
             path=zip_file,
             filename='workspace.zip',