Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
SmartManoj committed Nov 13, 2024
2 parents 206a94c + 207df9d commit 34c5db2
Show file tree
Hide file tree
Showing 54 changed files with 427 additions and 831 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/ghcr-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,6 @@ jobs:
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
SKIP_CONTAINER_LOGS=true \
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
Expand Down Expand Up @@ -364,7 +363,6 @@ jobs:
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
SKIP_CONTAINER_LOGS=true \
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
Expand Down
20 changes: 20 additions & 0 deletions docs/modules/usage/llms/litellm-proxy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# LiteLLM Proxy

OpenHands supports using the [LiteLLM proxy](https://docs.litellm.ai/docs/proxy/quick_start) to access various LLM providers.

## Configuration

To use LiteLLM proxy with OpenHands, you need to:

1. Set up a LiteLLM proxy server (see [LiteLLM documentation](https://docs.litellm.ai/docs/proxy/quick_start))
2. When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
* Enable `Advanced Options`
* `Custom Model` to the prefix `litellm_proxy/` + the model you will be using (e.g. `litellm_proxy/anthropic.claude-3-5-sonnet-20241022-v2:0`)
* `Base URL` to your LiteLLM proxy URL (e.g. `https://your-litellm-proxy.com`)
* `API Key` to your LiteLLM proxy API key

## Supported Models

The supported models depend on your LiteLLM proxy configuration. OpenHands supports any model that your LiteLLM proxy is configured to handle.

Refer to your LiteLLM proxy configuration for the list of available models and their names.
1 change: 1 addition & 0 deletions docs/modules/usage/llms/llms.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ We have a few guides for running OpenHands with specific model providers:
- [Azure](llms/azure-llms)
- [Google](llms/google-llms)
- [Groq](llms/groq)
- [LiteLLM Proxy](llms/litellm-proxy)
- [OpenAI](llms/openai-llms)
- [OpenRouter](llms/openrouter)

Expand Down
2 changes: 1 addition & 1 deletion docs/modules/usage/runtimes.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ docker run # ...
-e RUNTIME=remote \
-e SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.app.all-hands.dev" \
-e SANDBOX_API_KEY="your-all-hands-api-key" \
-e SANDBOX_KEEP_REMOTE_RUNTIME_ALIVE="true" \
-e SANDBOX_KEEP_RUNTIME_ALIVE="true" \
# ...
```

Expand Down
5 changes: 5 additions & 0 deletions docs/sidebars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ const sidebars: SidebarsConfig = {
label: 'Groq',
id: 'usage/llms/groq',
},
{
type: 'doc',
label: 'LiteLLM Proxy',
id: 'usage/llms/litellm-proxy',
},
{
type: 'doc',
label: 'OpenAI',
Expand Down
2 changes: 1 addition & 1 deletion evaluation/miniwob/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def get_config(
browsergym_eval_env=env_id,
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_remote_runtime_alive=False,
keep_runtime_alive=False,
),
# do not mount workspace
workspace_base=None,
Expand Down
2 changes: 1 addition & 1 deletion evaluation/scienceagentbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def get_config(
timeout=300,
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_remote_runtime_alive=False,
keep_runtime_alive=False,
),
# do not mount workspace
workspace_base=None,
Expand Down
43 changes: 38 additions & 5 deletions evaluation/swe_bench/eval_infer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import tempfile
import time
from functools import partial

import pandas as pd
from swebench.harness.grading import get_eval_report
Expand Down Expand Up @@ -94,13 +95,28 @@ def get_config(instance: pd.Series) -> AppConfig:

def process_instance(
instance: pd.Series,
metadata: EvalMetadata | None = None,
metadata: EvalMetadata,
reset_logger: bool = True,
log_dir: str | None = None,
) -> EvalOutput:
"""
Evaluate agent performance on a SWE-bench problem instance.
Note that this signature differs from the expected input to `run_evaluation`. Use
`functools.partial` to provide optional arguments before passing to the evaluation harness.
Args:
log_dir (str | None, default=None): Path to directory where log files will be written. Must
be provided if `reset_logger` is set.
Raises:
AssertionError: if the `reset_logger` flag is set without a provided log directory.
"""
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
global output_file
log_dir = output_file.replace('.jsonl', '.logs')
assert (
log_dir is not None
), "Can't reset logger without a provided log directory."
os.makedirs(log_dir, exist_ok=True)
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
Expand All @@ -127,6 +143,7 @@ def process_instance(
return EvalOutput(
instance_id=instance_id,
test_result=instance['test_result'],
metadata=metadata,
)

runtime = create_runtime(config)
Expand Down Expand Up @@ -176,6 +193,7 @@ def process_instance(
return EvalOutput(
instance_id=instance_id,
test_result=instance['test_result'],
metadata=metadata,
)
elif 'APPLY_PATCH_PASS' in apply_patch_output:
logger.info(f'[{instance_id}] {APPLY_PATCH_PASS}:\n{apply_patch_output}')
Expand Down Expand Up @@ -269,6 +287,7 @@ def process_instance(
return EvalOutput(
instance_id=instance_id,
test_result=instance['test_result'],
metadata=metadata,
)
else:
logger.info(
Expand Down Expand Up @@ -355,12 +374,26 @@ def process_instance(
output_file = args.input_file.replace('.jsonl', '.swebench_eval.jsonl')
instances = prepare_dataset(predictions, output_file, args.eval_n_limit)

# If possible, load the relevant metadata to avoid issues with `run_evaluation`.
metadata: EvalMetadata | None = None
metadata_filepath = os.path.join(os.path.dirname(args.input_file), 'metadata.json')
if os.path.exists(metadata_filepath):
with open(metadata_filepath, 'r') as metadata_file:
data = metadata_file.read()
metadata = EvalMetadata.model_validate_json(data)

# The evaluation harness constrains the signature of `process_instance_func` but we need to
# pass extra information. Build a new function object to avoid issues with multiprocessing.
process_instance_func = partial(
process_instance, log_dir=output_file.replace('.jsonl', '.logs')
)

run_evaluation(
instances,
metadata=None,
metadata=metadata,
output_file=output_file,
num_workers=args.eval_num_workers,
process_instance_func=process_instance,
process_instance_func=process_instance_func,
)

# Load evaluated predictions & print number of resolved predictions
Expand Down
6 changes: 3 additions & 3 deletions evaluation/swe_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
f'{instance.problem_statement}\n\n'
'The current working directory is /testbed.\n'
'Do not provide suggestions or workarounds. Directly fix the issue by modifying the source code.\n'
"Plan:\n"
'Plan:\n'
# "*) Reproduce the issue in the test code before fixing it;\n"
"*) Don't search for the user files in the repo because the user's code is an MRE (Minimal Reproducible Example) and wouldn't be part of the repository. It is verified that there is no issue in the user's code and this issue lies in the source code only. Focus only on modifying the existing repository code relevant to the issue instead. Search for the relevant files to modify using search_class, search_function and open_file agent skills instead of modifying the test files itself;\n"
"\n"
'\n'
'Add your valuable thoughts to every action you take.\n'
# 'Determine the root cause of the issue and implement a direct fix, rather than employing a workaround.\n'
# 'Think about edgecases and make sure your fix handles them as well\n'
Expand Down Expand Up @@ -162,7 +162,7 @@ def get_config(
platform='linux/amd64',
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_remote_runtime_alive=False,
keep_runtime_alive=False,
remote_runtime_init_timeout=1800,
),
# do not mount workspace
Expand Down
1 change: 1 addition & 0 deletions evaluation/utils/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ def run_evaluation(
f'model {metadata.llm_config.model}, max iterations {metadata.max_iterations}.\n'
)
else:
logger.warning('Running evaluation without metadata.')
logger.info(f'Evaluation started with {num_workers} workers.')

total_instances = len(dataset)
Expand Down
40 changes: 40 additions & 0 deletions frontend/__tests__/clear-session.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { clearSession } from "../src/utils/clear-session";
import store from "../src/store";
import { initialState as browserInitialState } from "../src/state/browserSlice";

describe("clearSession", () => {
beforeEach(() => {
// Mock localStorage
const localStorageMock = {
getItem: vi.fn(),
setItem: vi.fn(),
removeItem: vi.fn(),
clear: vi.fn(),
};
vi.stubGlobal("localStorage", localStorageMock);

// Set initial browser state to non-default values
store.dispatch({
type: "browser/setUrl",
payload: "https://example.com",
});
store.dispatch({
type: "browser/setScreenshotSrc",
payload: "base64screenshot",
});
});

it("should clear localStorage and reset browser state", () => {
clearSession();

// Verify localStorage items were removed
expect(localStorage.removeItem).toHaveBeenCalledWith("token");
expect(localStorage.removeItem).toHaveBeenCalledWith("repo");

// Verify browser state was reset
const state = store.getState();
expect(state.browser.url).toBe(browserInitialState.url);
expect(state.browser.screenshotSrc).toBe(browserInitialState.screenshotSrc);
});
});
4 changes: 2 additions & 2 deletions frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "openhands-frontend",
"version": "0.13.0",
"version": "0.13.1",
"private": true,
"type": "module",
"engines": {
Expand Down
9 changes: 9 additions & 0 deletions frontend/src/components/event-handler.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import { base64ToBlob } from "#/utils/base64-to-blob";
import { setCurrentAgentState } from "#/state/agentSlice";
import AgentState from "#/types/AgentState";
import { getSettings } from "#/services/settings";
import { generateAgentStateChangeEvent } from "#/services/agentStateService";

interface ServerError {
error: boolean | string;
Expand Down Expand Up @@ -96,6 +97,14 @@ export function EventHandler({ children }: React.PropsWithChildren) {
return;
}

if (event.type === "error") {
const message: string = `${event.message}`;
if (message.startsWith("Agent reached maximum")) {
// We set the agent state to paused here - if the user clicks resume, it auto updates the max iterations
send(generateAgentStateChangeEvent(AgentState.PAUSED));
}
}

if (isErrorObservation(event)) {
dispatch(
addErrorMessage({
Expand Down
5 changes: 0 additions & 5 deletions frontend/src/components/interactive-chat-box.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@ export function InteractiveChatBox({
"bg-neutral-700 border border-neutral-600 rounded-lg px-2 py-[10px]",
"transition-colors duration-200",
"hover:border-neutral-500 focus-within:border-neutral-500",
"group relative",
"before:pointer-events-none before:absolute before:inset-0 before:rounded-lg before:transition-colors",
"before:border-2 before:border-dashed before:border-transparent",
"[&:has(*:focus-within)]:before:border-neutral-500/50",
"[&:has(*[data-dragging-over='true'])]:before:border-neutral-500/50",
)}
>
<UploadImageInput onUpload={handleUpload} />
Expand Down
2 changes: 0 additions & 2 deletions frontend/src/mocks/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ const openHandsHandlers = [
export const handlers = [
...openHandsHandlers,
http.get("https://api.github.com/user/repos", async ({ request }) => {
if (import.meta.env.MODE !== "test") await delay(3500);

const token = request.headers
.get("Authorization")
?.replace("Bearer", "")
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/mocks/handlers.ws.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ const generateAgentResponse = (message: string): AssistantMessageAction => ({
action: "message",
args: {
content: message,
images_urls: [],
image_urls: [],
wait_for_response: false,
},
});
Expand Down
5 changes: 0 additions & 5 deletions frontend/src/routes/_oh._index/task-form.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,6 @@ export function TaskForm() {
"border border-neutral-600 px-4 py-[17px] rounded-lg text-[17px] leading-5 w-full transition-colors duration-200",
inputIsFocused ? "bg-neutral-600" : "bg-neutral-700",
"hover:border-neutral-500 focus-within:border-neutral-500",
"group relative",
"before:pointer-events-none before:absolute before:inset-0 before:rounded-lg before:transition-colors",
"before:border-2 before:border-dashed before:border-transparent",
"[&:has(*:focus-within)]:before:border-neutral-500/50",
"[&:has(*[data-dragging-over='true'])]:before:border-neutral-500/50",
)}
>
<ChatInput
Expand Down
2 changes: 2 additions & 0 deletions frontend/src/routes/_oh.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ export default function MainApp() {
company: user.company,
name: user.name,
email: user.email,
user: user.login,
mode: window.__APP_MODE__ || "oss",
});
}
}, [user]);
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/services/chatService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ import ActionType from "#/types/ActionType";

export function createChatMessage(
message: string,
images_urls: string[],
image_urls: string[],
timestamp: string,
) {
const event = {
action: ActionType.MESSAGE,
args: { content: message, images_urls, timestamp },
args: { content: message, image_urls, timestamp },
};
return event;
}
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/types/core/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export interface UserMessageAction extends OpenHandsActionEvent<"message"> {
source: "user";
args: {
content: string;
images_urls: string[];
image_urls: string[];
};
}

Expand All @@ -23,7 +23,7 @@ export interface AssistantMessageAction
source: "agent";
args: {
content: string;
images_urls: string[] | null;
image_urls: string[] | null;
wait_for_response: boolean;
};
}
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/types/core/variances.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ interface LocalUserMessageAction {
action: "message";
args: {
content: string;
images_urls: string[];
image_urls: string[];
};
}

Expand Down
Loading

0 comments on commit 34c5db2

Please sign in to comment.