Merge

SmartManoj · Nov 13, 2024 · 34c5db2 · 34c5db2
2 parents 206a94c + 207df9d
commit 34c5db2
Show file tree

Hide file tree

Showing 54 changed files with 427 additions and 831 deletions.
diff --git a/.github/workflows/ghcr-build.yml b/.github/workflows/ghcr-build.yml
@@ -286,7 +286,6 @@ jobs:
           image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
           image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
 
-          SKIP_CONTAINER_LOGS=true \
           TEST_RUNTIME=eventstream \
           SANDBOX_USER_ID=$(id -u) \
           SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
@@ -364,7 +363,6 @@ jobs:
           image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
           image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
 
-          SKIP_CONTAINER_LOGS=true \
           TEST_RUNTIME=eventstream \
           SANDBOX_USER_ID=$(id -u) \
           SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \

diff --git a/docs/modules/usage/llms/litellm-proxy.md b/docs/modules/usage/llms/litellm-proxy.md
@@ -0,0 +1,20 @@
+# LiteLLM Proxy
+
+OpenHands supports using the [LiteLLM proxy](https://docs.litellm.ai/docs/proxy/quick_start) to access various LLM providers.
+
+## Configuration
+
+To use LiteLLM proxy with OpenHands, you need to:
+
+1. Set up a LiteLLM proxy server (see [LiteLLM documentation](https://docs.litellm.ai/docs/proxy/quick_start))
+2. When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
+  * Enable `Advanced Options`
+  * `Custom Model` to the prefix `litellm_proxy/` + the model you will be using (e.g. `litellm_proxy/anthropic.claude-3-5-sonnet-20241022-v2:0`)
+  * `Base URL` to your LiteLLM proxy URL (e.g. `https://your-litellm-proxy.com`)
+  * `API Key` to your LiteLLM proxy API key
+
+## Supported Models
+
+The supported models depend on your LiteLLM proxy configuration. OpenHands supports any model that your LiteLLM proxy is configured to handle.
+
+Refer to your LiteLLM proxy configuration for the list of available models and their names.
diff --git a/docs/modules/usage/llms/llms.md b/docs/modules/usage/llms/llms.md
@@ -63,6 +63,7 @@ We have a few guides for running OpenHands with specific model providers:
 - [Azure](llms/azure-llms)
 - [Google](llms/google-llms)
 - [Groq](llms/groq)
+- [LiteLLM Proxy](llms/litellm-proxy)
 - [OpenAI](llms/openai-llms)
 - [OpenRouter](llms/openrouter)
 

diff --git a/docs/modules/usage/runtimes.md b/docs/modules/usage/runtimes.md
@@ -59,7 +59,7 @@ docker run # ...
     -e RUNTIME=remote \
     -e SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.app.all-hands.dev" \
     -e SANDBOX_API_KEY="your-all-hands-api-key" \
-    -e SANDBOX_KEEP_REMOTE_RUNTIME_ALIVE="true" \
+    -e SANDBOX_KEEP_RUNTIME_ALIVE="true" \
     # ...
 ```
 

diff --git a/docs/sidebars.ts b/docs/sidebars.ts
@@ -76,6 +76,11 @@ const sidebars: SidebarsConfig = {
                   label: 'Groq',
                   id: 'usage/llms/groq',
                 },
+                {
+                  type: 'doc',
+                  label: 'LiteLLM Proxy',
+                  id: 'usage/llms/litellm-proxy',
+                },
                 {
                   type: 'doc',
                   label: 'OpenAI',

diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py
@@ -66,7 +66,7 @@ def get_config(
             browsergym_eval_env=env_id,
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
-            keep_remote_runtime_alive=False,
+            keep_runtime_alive=False,
         ),
         # do not mount workspace
         workspace_base=None,

diff --git a/evaluation/scienceagentbench/run_infer.py b/evaluation/scienceagentbench/run_infer.py
@@ -72,7 +72,7 @@ def get_config(
             timeout=300,
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
-            keep_remote_runtime_alive=False,
+            keep_runtime_alive=False,
         ),
         # do not mount workspace
         workspace_base=None,

diff --git a/evaluation/swe_bench/eval_infer.py b/evaluation/swe_bench/eval_infer.py
@@ -1,6 +1,7 @@
 import os
 import tempfile
 import time
+from functools import partial
 
 import pandas as pd
 from swebench.harness.grading import get_eval_report
@@ -94,13 +95,28 @@ def get_config(instance: pd.Series) -> AppConfig:
 
 def process_instance(
     instance: pd.Series,
-    metadata: EvalMetadata | None = None,
+    metadata: EvalMetadata,
     reset_logger: bool = True,
+    log_dir: str | None = None,
 ) -> EvalOutput:
+    """
+    Evaluate agent performance on a SWE-bench problem instance.
+
+    Note that this signature differs from the expected input to `run_evaluation`. Use
+    `functools.partial` to provide optional arguments before passing to the evaluation harness.
+
+    Args:
+        log_dir (str | None, default=None): Path to directory where log files will be written. Must
+        be provided if `reset_logger` is set.
+
+    Raises:
+        AssertionError: if the `reset_logger` flag is set without a provided log directory.
+    """
     # Setup the logger properly, so you can run multi-processing to parallelize the evaluation
     if reset_logger:
-        global output_file
-        log_dir = output_file.replace('.jsonl', '.logs')
+        assert (
+            log_dir is not None
+        ), "Can't reset logger without a provided log directory."
         os.makedirs(log_dir, exist_ok=True)
         reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
     else:
@@ -127,6 +143,7 @@ def process_instance(
         return EvalOutput(
             instance_id=instance_id,
             test_result=instance['test_result'],
+            metadata=metadata,
         )
 
     runtime = create_runtime(config)
@@ -176,6 +193,7 @@ def process_instance(
             return EvalOutput(
                 instance_id=instance_id,
                 test_result=instance['test_result'],
+                metadata=metadata,
             )
         elif 'APPLY_PATCH_PASS' in apply_patch_output:
             logger.info(f'[{instance_id}] {APPLY_PATCH_PASS}:\n{apply_patch_output}')
@@ -269,6 +287,7 @@ def process_instance(
             return EvalOutput(
                 instance_id=instance_id,
                 test_result=instance['test_result'],
+                metadata=metadata,
             )
         else:
             logger.info(
@@ -355,12 +374,26 @@ def process_instance(
     output_file = args.input_file.replace('.jsonl', '.swebench_eval.jsonl')
     instances = prepare_dataset(predictions, output_file, args.eval_n_limit)
 
+    # If possible, load the relevant metadata to avoid issues with `run_evaluation`.
+    metadata: EvalMetadata | None = None
+    metadata_filepath = os.path.join(os.path.dirname(args.input_file), 'metadata.json')
+    if os.path.exists(metadata_filepath):
+        with open(metadata_filepath, 'r') as metadata_file:
+            data = metadata_file.read()
+            metadata = EvalMetadata.model_validate_json(data)
+
+    # The evaluation harness constrains the signature of `process_instance_func` but we need to
+    # pass extra information. Build a new function object to avoid issues with multiprocessing.
+    process_instance_func = partial(
+        process_instance, log_dir=output_file.replace('.jsonl', '.logs')
+    )
+
     run_evaluation(
         instances,
-        metadata=None,
+        metadata=metadata,
         output_file=output_file,
         num_workers=args.eval_num_workers,
-        process_instance_func=process_instance,
+        process_instance_func=process_instance_func,
     )
 
     # Load evaluated predictions & print number of resolved predictions

diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
@@ -85,10 +85,10 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
             f'{instance.problem_statement}\n\n'
             'The current working directory is /testbed.\n'
             'Do not provide suggestions or workarounds. Directly fix the issue by modifying the source code.\n'
-            "Plan:\n"
+            'Plan:\n'
             # "*) Reproduce the issue in the test code before fixing it;\n"
             "*) Don't search for the user files in the repo because the user's code is an MRE (Minimal Reproducible Example) and wouldn't be part of the repository. It is verified that there is no issue in the user's code and this issue lies in the source code only. Focus only on modifying the existing repository code relevant to the issue instead. Search for the relevant files to modify using search_class, search_function and open_file agent skills instead of modifying the test files itself;\n"
-            "\n"
+            '\n'
             'Add your valuable thoughts to every action you take.\n'
             # 'Determine the root cause of the issue and implement a direct fix, rather than employing a workaround.\n'
             # 'Think about edgecases and make sure your fix handles them as well\n'
@@ -162,7 +162,7 @@ def get_config(
             platform='linux/amd64',
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
-            keep_remote_runtime_alive=False,
+            keep_runtime_alive=False,
             remote_runtime_init_timeout=1800,
         ),
         # do not mount workspace

diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
@@ -345,6 +345,7 @@ def run_evaluation(
             f'model {metadata.llm_config.model}, max iterations {metadata.max_iterations}.\n'
         )
     else:
+        logger.warning('Running evaluation without metadata.')
         logger.info(f'Evaluation started with {num_workers} workers.')
 
     total_instances = len(dataset)

diff --git a/frontend/__tests__/clear-session.test.ts b/frontend/__tests__/clear-session.test.ts
@@ -0,0 +1,40 @@
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { clearSession } from "../src/utils/clear-session";
+import store from "../src/store";
+import { initialState as browserInitialState } from "../src/state/browserSlice";
+
+describe("clearSession", () => {
+  beforeEach(() => {
+    // Mock localStorage
+    const localStorageMock = {
+      getItem: vi.fn(),
+      setItem: vi.fn(),
+      removeItem: vi.fn(),
+      clear: vi.fn(),
+    };
+    vi.stubGlobal("localStorage", localStorageMock);
+
+    // Set initial browser state to non-default values
+    store.dispatch({
+      type: "browser/setUrl",
+      payload: "https://example.com",
+    });
+    store.dispatch({
+      type: "browser/setScreenshotSrc",
+      payload: "base64screenshot",
+    });
+  });
+
+  it("should clear localStorage and reset browser state", () => {
+    clearSession();
+
+    // Verify localStorage items were removed
+    expect(localStorage.removeItem).toHaveBeenCalledWith("token");
+    expect(localStorage.removeItem).toHaveBeenCalledWith("repo");
+
+    // Verify browser state was reset
+    const state = store.getState();
+    expect(state.browser.url).toBe(browserInitialState.url);
+    expect(state.browser.screenshotSrc).toBe(browserInitialState.screenshotSrc);
+  });
+});
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
diff --git a/frontend/package.json b/frontend/package.json
@@ -1,6 +1,6 @@
 {
   "name": "openhands-frontend",
-  "version": "0.13.0",
+  "version": "0.13.1",
   "private": true,
   "type": "module",
   "engines": {

diff --git a/frontend/src/components/event-handler.tsx b/frontend/src/components/event-handler.tsx
@@ -34,6 +34,7 @@ import { base64ToBlob } from "#/utils/base64-to-blob";
 import { setCurrentAgentState } from "#/state/agentSlice";
 import AgentState from "#/types/AgentState";
 import { getSettings } from "#/services/settings";
+import { generateAgentStateChangeEvent } from "#/services/agentStateService";
 
 interface ServerError {
   error: boolean | string;
@@ -96,6 +97,14 @@ export function EventHandler({ children }: React.PropsWithChildren) {
       return;
     }
 
+    if (event.type === "error") {
+      const message: string = `${event.message}`;
+      if (message.startsWith("Agent reached maximum")) {
+        // We set the agent state to paused here - if the user clicks resume, it auto updates the max iterations
+        send(generateAgentStateChangeEvent(AgentState.PAUSED));
+      }
+    }
+
     if (isErrorObservation(event)) {
       dispatch(
         addErrorMessage({

diff --git a/frontend/src/components/interactive-chat-box.tsx b/frontend/src/components/interactive-chat-box.tsx
@@ -59,11 +59,6 @@ export function InteractiveChatBox({
           "bg-neutral-700 border border-neutral-600 rounded-lg px-2 py-[10px]",
           "transition-colors duration-200",
           "hover:border-neutral-500 focus-within:border-neutral-500",
-          "group relative",
-          "before:pointer-events-none before:absolute before:inset-0 before:rounded-lg before:transition-colors",
-          "before:border-2 before:border-dashed before:border-transparent",
-          "[&:has(*:focus-within)]:before:border-neutral-500/50",
-          "[&:has(*[data-dragging-over='true'])]:before:border-neutral-500/50",
         )}
       >
         <UploadImageInput onUpload={handleUpload} />

diff --git a/frontend/src/mocks/handlers.ts b/frontend/src/mocks/handlers.ts
@@ -71,8 +71,6 @@ const openHandsHandlers = [
 export const handlers = [
   ...openHandsHandlers,
   http.get("https://api.github.com/user/repos", async ({ request }) => {
-    if (import.meta.env.MODE !== "test") await delay(3500);
-
     const token = request.headers
       .get("Authorization")
       ?.replace("Bearer", "")

diff --git a/frontend/src/mocks/handlers.ws.ts b/frontend/src/mocks/handlers.ws.ts
@@ -29,7 +29,7 @@ const generateAgentResponse = (message: string): AssistantMessageAction => ({
   action: "message",
   args: {
     content: message,
-    images_urls: [],
+    image_urls: [],
     wait_for_response: false,
   },
 });

diff --git a/frontend/src/routes/_oh._index/task-form.tsx b/frontend/src/routes/_oh._index/task-form.tsx
@@ -70,11 +70,6 @@ export function TaskForm() {
             "border border-neutral-600 px-4 py-[17px] rounded-lg text-[17px] leading-5 w-full transition-colors duration-200",
             inputIsFocused ? "bg-neutral-600" : "bg-neutral-700",
             "hover:border-neutral-500 focus-within:border-neutral-500",
-            "group relative",
-            "before:pointer-events-none before:absolute before:inset-0 before:rounded-lg before:transition-colors",
-            "before:border-2 before:border-dashed before:border-transparent",
-            "[&:has(*:focus-within)]:before:border-neutral-500/50",
-            "[&:has(*[data-dragging-over='true'])]:before:border-neutral-500/50",
           )}
         >
           <ChatInput

diff --git a/frontend/src/routes/_oh.tsx b/frontend/src/routes/_oh.tsx
@@ -171,6 +171,8 @@ export default function MainApp() {
         company: user.company,
         name: user.name,
         email: user.email,
+        user: user.login,
+        mode: window.__APP_MODE__ || "oss",
       });
     }
   }, [user]);

diff --git a/frontend/src/services/chatService.ts b/frontend/src/services/chatService.ts
@@ -3,12 +3,12 @@ import ActionType from "#/types/ActionType";
 
 export function createChatMessage(
   message: string,
-  images_urls: string[],
+  image_urls: string[],
   timestamp: string,
 ) {
   const event = {
     action: ActionType.MESSAGE,
-    args: { content: message, images_urls, timestamp },
+    args: { content: message, image_urls, timestamp },
   };
   return event;
 }

diff --git a/frontend/src/types/core/actions.ts b/frontend/src/types/core/actions.ts
@@ -4,7 +4,7 @@ export interface UserMessageAction extends OpenHandsActionEvent<"message"> {
   source: "user";
   args: {
     content: string;
-    images_urls: string[];
+    image_urls: string[];
   };
 }
 
@@ -23,7 +23,7 @@ export interface AssistantMessageAction
   source: "agent";
   args: {
     content: string;
-    images_urls: string[] | null;
+    image_urls: string[] | null;
     wait_for_response: boolean;
   };
 }

diff --git a/frontend/src/types/core/variances.ts b/frontend/src/types/core/variances.ts
@@ -27,7 +27,7 @@ interface LocalUserMessageAction {
   action: "message";
   args: {
     content: string;
-    images_urls: string[];
+    image_urls: string[];
   };
 }