Merge branch 'main' into mh/test-eval-wf

All-Hands-AI · Nov 19, 2024 · 7424c76 · 7424c76
2 parents 6528eec + f0ca45c
commit 7424c76
Show file tree

Hide file tree

Showing 21 changed files with 801 additions and 591 deletions.
diff --git a/.github/workflows/lint-fix.yml b/.github/workflows/lint-fix.yml
@@ -44,7 +44,11 @@ jobs:
         run: pip install pre-commit==3.7.0
       - name: Fix python lint issues
         run: |
-          pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --config ./dev_config/python/.pre-commit-config.yaml
+          pre-commit run trailing-whitespace --files openhands/**/* evaluation/**/* tests/**/* --config ./dev_config/python/.pre-commit-config.yaml
+          pre-commit run end-of-file-fixer --files openhands/**/* evaluation/**/* tests/**/* --config ./dev_config/python/.pre-commit-config.yaml
+          pre-commit run pyproject-fmt --files openhands/**/* evaluation/**/* tests/**/* --config ./dev_config/python/.pre-commit-config.yaml
+          pre-commit run ruff --files openhands/**/* evaluation/**/* tests/**/* --config ./dev_config/python/.pre-commit-config.yaml
+          pre-commit run ruff-format --files openhands/**/* evaluation/**/* tests/**/* --config ./dev_config/python/.pre-commit-config.yaml
 
       # Commit and push changes if any
       - name: Check for changes

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -54,7 +54,7 @@ The agent needs a place to run code and commands. When you run OpenHands on your
 to do this by default. But there are other ways of creating a sandbox for the agent.
 
 If you work for a company that provides a cloud-based runtime, you could help us add support for that runtime
-by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/runtime.py).
+by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/base.py).
 
 #### Testing
 When you write code, it is also good to write tests. Please navigate to the `tests` folder to see existing test suites.

diff --git a/docs/modules/usage/how-to/github-action.md b/docs/modules/usage/how-to/github-action.md
@@ -4,13 +4,42 @@ This guide explains how to use the OpenHands GitHub Action, both within the Open
 
 ## Using the Action in the OpenHands Repository
 
-To use the OpenHands GitHub Action in the OpenHands repository, an OpenHands maintainer can:
+To use the OpenHands GitHub Action in a repository, you can:
 
 1. Create an issue in the repository.
-2. Add the `fix-me` label to the issue.
-3. The action will automatically trigger and attempt to resolve the issue.
+2. Add the `fix-me` label to the issue or leave a comment on the issue starting with `@openhands-agent`.
+
+The action will automatically trigger and attempt to resolve the issue.
 
 ## Installing the Action in a New Repository
 
 To install the OpenHands GitHub Action in your own repository, follow
 the [README for the OpenHands Resolver](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md).
+
+## Usage Tips
+
+### Iterative resolution
+
+1. Create an issue in the repository.
+2. Add the `fix-me` label to the issue, or leave a comment starting with `@openhands-agent`
+3. Review the attempt to resolve the issue by checking the pull request
+4. Follow up with feedback through general comments, review comments, or inline thread comments
+5. Add the `fix-me` label to the pull request, or address a specific comment by starting with `@openhands-agent`
+
+### Label versus Macro
+
+- Label (`fix-me`): Requests OpenHands to address the **entire** issue or pull request.
+- Macro (`@openhands-agent`): Requests OpenHands to consider only the issue/pull request description and **the specific comment**.
+
+## Advanced Settings
+
+### Add custom repository settings
+
+You can provide custom directions for OpenHands by following the [README for the resolver](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md#providing-custom-instructions).
+
+### Configure custom macro
+
+To customize the default macro (`@openhands-agent`):
+
+1. [Create a repository variable](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) named `OPENHANDS_MACRO`
+2. Assign the variable a custom value
diff --git a/evaluation/discoverybench/run_infer.py b/evaluation/discoverybench/run_infer.py
@@ -250,9 +250,6 @@ def process_instance(
 
     config = get_config(metadata)
 
-    # use a session id for concurrent evaluation
-    sid = 'ID_' + str(instance.instance_id)
-
     # Setup the logger properly, so you can run
     # multi-processing to parallelize the evaluation
     if reset_logger:
@@ -284,7 +281,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
-    runtime = create_runtime(config, sid=sid)
+    runtime = create_runtime(config)
     call_async_from_sync(runtime.connect)
     initialize_runtime(runtime, instance.data_files)
 

diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
@@ -145,7 +145,7 @@ def get_config(
             platform='linux/amd64',
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
-            keep_remote_runtime_alive=False,
+            keep_runtime_alive=False,
             remote_runtime_init_timeout=3600,
         ),
         # do not mount workspace

diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
@@ -3,9 +3,11 @@
 import multiprocessing as mp
 import os
 import pathlib
+import signal
 import subprocess
 import time
 import traceback
+from contextlib import contextmanager
 from typing import Any, Awaitable, Callable, TextIO
 
 import pandas as pd
@@ -92,6 +94,27 @@ class EvalException(Exception):
     pass
 
 
+class EvalTimeoutException(Exception):
+    pass
+
+
+@contextmanager
+def timeout(seconds: int):
+    def timeout_handler(signum, frame):
+        raise EvalTimeoutException(f'Function timed out after {seconds} seconds')
+
+    # Set up the signal handler
+    original_handler = signal.signal(signal.SIGALRM, timeout_handler)
+    signal.alarm(seconds)
+
+    try:
+        yield
+    finally:
+        # Restore the original handler and disable the alarm
+        signal.alarm(0)
+        signal.signal(signal.SIGALRM, original_handler)
+
+
 def codeact_user_response(
     state: State,
     encapsulate_solution: bool = False,
@@ -280,15 +303,33 @@ def _process_instance_wrapper(
     metadata: EvalMetadata,
     use_mp: bool,
     max_retries: int = 5,
+    timeout_seconds: int | None = None,
 ) -> EvalOutput:
-    """Wrap the process_instance_func to handle retries and errors.
-
-    Retry an instance up to max_retries times if it fails (e.g., due to transient network/runtime issues).
-    """
+    """Wrap the process_instance_func to handle retries and errors."""
     for attempt in range(max_retries + 1):
         try:
-            result = process_instance_func(instance, metadata, use_mp)
+            if timeout_seconds is not None:
+                with timeout(timeout_seconds):
+                    result = process_instance_func(instance, metadata, use_mp)
+            else:
+                result = process_instance_func(instance, metadata, use_mp)
             return result
+        except EvalTimeoutException as e:
+            error = f'Timeout after {timeout_seconds} seconds'
+            stacktrace = traceback.format_exc()
+            msg = (
+                '-' * 10
+                + '\n'
+                + f'Timeout ({timeout_seconds} seconds) in instance [{instance.instance_id}], Stopped evaluation for this instance.'
+                + '\n'
+                + '-' * 10
+            )
+            logger.exception(e)
+            return EvalOutput(
+                instance_id=instance.instance_id,
+                test_result={},
+                error=error,
+            )
         except Exception as e:
             error = str(e)
             stacktrace = traceback.format_exc()
@@ -337,6 +378,7 @@ def run_evaluation(
         [pd.Series, EvalMetadata, bool], Awaitable[EvalOutput]
     ],
     max_retries: int = 5,  # number of retries for each instance
+    timeout_seconds: int | None = None,
 ):
     use_multiprocessing = num_workers > 1
 
@@ -357,7 +399,14 @@ def run_evaluation(
         if use_multiprocessing:
             with mp.Pool(num_workers) as pool:
                 args_iter = (
-                    (process_instance_func, instance, metadata, True, max_retries)
+                    (
+                        process_instance_func,
+                        instance,
+                        metadata,
+                        True,
+                        max_retries,
+                        timeout_seconds,
+                    )
                     for _, instance in dataset.iterrows()
                 )
                 results = pool.imap_unordered(_process_instance_wrapper_mp, args_iter)

diff --git a/frontend/src/api/open-hands.ts b/frontend/src/api/open-hands.ts
@@ -185,8 +185,7 @@ class OpenHands {
   }
 
   static async getRuntimeId(): Promise<{ runtime_id: string }> {
-    const response = await request("/api/config");
-    const data = await response.json();
+    const data = await request("/api/conversation");
 
     return data;
   }

diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
@@ -12,6 +12,7 @@
     ModelResponse,
 )
 
+from openhands.core.exceptions import FunctionCallNotExistsError
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
     Action,
@@ -484,7 +485,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
             elif tool_call.function.name == 'browser':
                 action = BrowseInteractiveAction(browser_actions=arguments['code'])
             else:
-                raise RuntimeError(f'Unknown tool call: {tool_call.function.name}')
+                raise FunctionCallNotExistsError(
+                    f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
+                )
 
             # We only add thought to the first action
             if i == 0:

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
@@ -12,6 +12,7 @@
 from openhands.controller.stuck import StuckDetector
 from openhands.core.config import AgentConfig, LLMConfig
 from openhands.core.exceptions import (
+    FunctionCallNotExistsError,
     FunctionCallValidationError,
     LLMMalformedActionError,
     LLMNoActionError,
@@ -488,6 +489,7 @@ async def _step(self) -> None:
             LLMNoActionError,
             LLMResponseError,
             FunctionCallValidationError,
+            FunctionCallNotExistsError,
         ) as e:
             self.event_stream.add_event(
                 ErrorObservation(

diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py
@@ -114,3 +114,10 @@ class FunctionCallValidationError(Exception):
 
     def __init__(self, message):
         super().__init__(message)
+
+
+class FunctionCallNotExistsError(Exception):
+    """Exception raised when an LLM call a tool that is not registered."""
+
+    def __init__(self, message):
+        super().__init__(message)
diff --git a/openhands/core/main.py b/openhands/core/main.py
@@ -59,7 +59,8 @@ def create_runtime(
     """Create a runtime for the agent to run on.
 
     config: The app config.
-    sid: The session id.
+    sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
+        Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
     headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
         where we don't want to have the VSCode UI open, so it defaults to True.
     """
@@ -105,6 +106,8 @@ async def run_controller(
     Args:
         config: The app config.
         initial_user_action: An Action object containing initial user input
+        sid: (optional) The session id. IMPORTANT: please don't set this unless you know what you're doing.
+            Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
         runtime: (optional) A runtime for the agent to run on.
         agent: (optional) A agent to run.
         exit_on_message: quit if agent asks for a message from user (optional)

diff --git a/openhands/resolver/examples/openhands-resolver.yml b/openhands/resolver/examples/openhands-resolver.yml
@@ -7,6 +7,10 @@ on:
     types: [labeled]
   issue_comment:
     types: [created]
+  pull_request_review_comment:
+    types: [created]
+  pull_request_review:
+    types: [submitted]
 
 permissions:
   contents: write
@@ -16,16 +20,24 @@ permissions:
 jobs:
   call-openhands-resolver:
     if: |
-      ${{
-        github.event.label.name == 'fix-me' ||
-        (github.event_name == 'issue_comment' &&
-        startsWith(github.event.comment.body, vars.OPENHANDS_MACRO || '@openhands-agent') &&
-        (github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER'))
-      }}
+      github.event.label.name == 'fix-me' ||
+
+      (
+        ((github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') &&
+         (startsWith(github.event.comment.body, inputs.macro || '@openhands-agent') || startsWith(github.event.comment.body, inputs.macro || vars.OPENHANDS_MACRO)) &&
+        (github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER')
+        ) ||
+
+        (github.event_name == 'pull_request_review' &&
+        (startsWith(github.event.review.body, inputs.macro || '@openhands-agent') || startsWith(github.event.review.body, inputs.macro || vars.OPENHANDS_MACRO)) &&
+        (github.event.review.author_association == 'OWNER' || github.event.review.author_association == 'COLLABORATOR' || github.event.review.author_association == 'MEMBER')
+        )
+      )
+
     uses: All-Hands-AI/OpenHands/.github/workflows/openhands-resolver.yml@main
     with:
       macro: ${{ vars.OPENHANDS_MACRO || '@openhands-agent' }}
-      max_iterations: 50
+      max_iterations: ${{ vars.OPENHANDS_MAX_ITER || 50 }}
     secrets:
       PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
       PAT_USERNAME: ${{ secrets.PAT_USERNAME }}

diff --git a/openhands/resolver/issue_definitions.py b/openhands/resolver/issue_definitions.py
@@ -18,7 +18,9 @@ class IssueHandlerInterface(ABC):
     issue_type: ClassVar[str]
 
     @abstractmethod
-    def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssue]:
+    def get_converted_issues(
+        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
+    ) -> list[GithubIssue]:
         """Download issues from GitHub."""
         pass
 
@@ -138,13 +140,29 @@ def _get_issue_comments(
 
         return all_comments if all_comments else None
 
-    def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssue]:
+    def get_converted_issues(
+        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
+    ) -> list[GithubIssue]:
         """Download issues from Github.
 
         Returns:
             List of Github issues.
         """
+
+        if not issue_numbers:
+            raise ValueError('Unspecified issue number')
+
         all_issues = self._download_issues_from_github()
+        logger.info(f'Limiting resolving to issues {issue_numbers}.')
+        all_issues = [
+            issue
+            for issue in all_issues
+            if issue['number'] in issue_numbers and 'pull_request' not in issue
+        ]
+
+        if len(issue_numbers) == 1 and not all_issues:
+            raise ValueError(f'Issue {issue_numbers[0]} not found')
+
         converted_issues = []
         for issue in all_issues:
             if any([issue.get(key) is None for key in ['number', 'title', 'body']]):
@@ -153,9 +171,6 @@ def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssu
                 )
                 continue
 
-            if 'pull_request' in issue:
-                continue
-
             # Get issue thread comments
             thread_comments = self._get_issue_comments(
                 issue['number'], comment_id=comment_id
@@ -486,8 +501,16 @@ def __get_context_from_external_issues_references(
 
         return closing_issues
 
-    def get_converted_issues(self, comment_id: int | None = None) -> list[GithubIssue]:
+    def get_converted_issues(
+        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
+    ) -> list[GithubIssue]:
+        if not issue_numbers:
+            raise ValueError('Unspecified issue numbers')
+
         all_issues = self._download_issues_from_github()
+        logger.info(f'Limiting resolving to issues {issue_numbers}.')
+        all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]
+
         converted_issues = []
         for issue in all_issues:
             # For PRs, body can be None
@@ -576,9 +599,7 @@ def get_instruction(
         # Format thread comments if they exist
         thread_context = ''
         if issue.thread_comments:
-            thread_context = '\n\nPR Thread Comments:\n' + '\n---\n'.join(
-                issue.thread_comments
-            )
+            thread_context = '\n---\n'.join(issue.thread_comments)
             images.extend(self._extract_image_urls(thread_context))
 
         instruction = template.render(