Skip to content

Commit

Permalink
Merge branch 'main' into feat/memory-condenser-switch
Browse files Browse the repository at this point in the history
  • Loading branch information
csmith49 authored Feb 21, 2025
2 parents a694e64 + f093c14 commit dbe21a8
Show file tree
Hide file tree
Showing 28 changed files with 378 additions and 198 deletions.
4 changes: 4 additions & 0 deletions config.template.toml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ codeact_enable_jupyter = true
# List of microagents to disable
#disabled_microagents = []

# Whether history should be truncated to continue the session when hitting LLM context
# length limit
enable_history_truncation = true

[agent.RepoExplorerAgent]
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
# useful when an agent doesn't demand high quality but uses a lot of tokens
Expand Down
5 changes: 5 additions & 0 deletions docs/modules/usage/configuration-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
- Default: `false`
- Description: Whether Jupyter is enabled in the action space

- `enable_history_truncation`
- Type: `bool`
- Default: `true`
- Description: Whether history should be truncated to continue the session when hitting LLM context length limit

### Microagent Usage
- `enable_prompt_extensions`
- Type: `bool`
Expand Down
4 changes: 4 additions & 0 deletions evaluation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ To evaluate an agent, you can provide the agent's name to the `run_infer.py` pro
### Evaluating Different LLMs

OpenHands in development mode uses `config.toml` to keep track of most configuration.
**IMPORTANT: For evaluation, only the LLM section in `config.toml` will be used. Other configurations, such as `save_trajectory_path`, are not applied during evaluation.**

Here's an example configuration file you can use to define and use multiple LLMs:

```toml
Expand All @@ -40,6 +42,8 @@ api_key = "XXX"
temperature = 0.0
```

For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.

## Supported Benchmarks

The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
Expand Down
3 changes: 0 additions & 3 deletions frontend/src/api/open-hands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,6 @@ class OpenHands {
body,
);

// TODO: remove this once we have a multi-conversation UI
localStorage.setItem("latest_conversation_id", data.conversation_id);

return data;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
<div
ref={ref}
data-testid="conversation-panel"
className="w-[350px] h-full border border-neutral-700 bg-base-secondary rounded-xl overflow-y-auto"
className="w-[350px] h-full border border-neutral-700 bg-base-secondary rounded-xl overflow-y-auto absolute"
>
<div className="pt-4 px-4 flex items-center justify-between">
<div className="w-full h-full absolute flex justify-center items-center">
{isFetching && <LoadingSpinner size="small" />}
</div>
{error && (
Expand Down
18 changes: 0 additions & 18 deletions frontend/src/routes/_oh._index/route.tsx
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import React from "react";
import { useDispatch } from "react-redux";
import { useTranslation } from "react-i18next";
import posthog from "posthog-js";
import { I18nKey } from "#/i18n/declaration";
import { setImportedProjectZip } from "#/state/initial-query-slice";
import { convertZipToBase64 } from "#/utils/convert-zip-to-base64";
import { useGitHubUser } from "#/hooks/query/use-github-user";
Expand All @@ -14,7 +12,6 @@ import { HeroHeading } from "#/components/shared/hero-heading";
import { TaskForm } from "#/components/shared/task-form";

function Home() {
const { t } = useTranslation();
const dispatch = useDispatch();
const formRef = React.useRef<HTMLFormElement>(null);

Expand All @@ -26,8 +23,6 @@ function Home() {
gitHubClientId: config?.GITHUB_CLIENT_ID || null,
});

const latestConversation = localStorage.getItem("latest_conversation_id");

return (
<div className="bg-base-secondary h-full rounded-xl flex flex-col items-center justify-center relative overflow-y-auto px-2">
<HeroHeading />
Expand Down Expand Up @@ -56,19 +51,6 @@ function Home() {
/>
</div>
</div>
{latestConversation && (
<div className="flex gap-4 w-full text-center mt-8">
<p className="text-center w-full">
{t(I18nKey.LANDING$OR)}&nbsp;
<a
className="underline"
href={`/conversations/${latestConversation}`}
>
{t(I18nKey.LANDING$RECENT_CONVERSATION)}
</a>
</p>
</div>
)}
</div>
);
}
Expand Down
23 changes: 13 additions & 10 deletions openhands/agenthub/browsing_agent/response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,24 @@


class BrowsingResponseParser(ResponseParser):
def __init__(self):
def __init__(self) -> None:
# Need to pay attention to the item order in self.action_parsers
super().__init__()
self.action_parsers = [BrowsingActionParserMessage()]
self.default_parser = BrowsingActionParserBrowseInteractive()

def parse(self, response: str) -> Action:
action_str = self.parse_response(response)
def parse(
self, response: str | dict[str, list[dict[str, dict[str, str | None]]]]
) -> Action:
if isinstance(response, str):
action_str = response
else:
action_str = self.parse_response(response)
return self.parse_action(action_str)

def parse_response(self, response) -> str:
def parse_response(
self, response: dict[str, list[dict[str, dict[str, str | None]]]]
) -> str:
action_str = response['choices'][0]['message']['content']
if action_str is None:
return ''
Expand All @@ -47,9 +54,7 @@ class BrowsingActionParserMessage(ActionParser):
- BrowseInteractiveAction(browser_actions) - unexpected response format, message back to user
"""

def __init__(
self,
):
def __init__(self) -> None:
pass

def check_condition(self, action_str: str) -> bool:
Expand All @@ -69,9 +74,7 @@ class BrowsingActionParserBrowseInteractive(ActionParser):
- BrowseInteractiveAction(browser_actions) - handle send message to user function call in BrowserGym
"""

def __init__(
self,
):
def __init__(self) -> None:
pass

def check_condition(self, action_str: str) -> bool:
Expand Down
36 changes: 26 additions & 10 deletions openhands/agenthub/browsing_agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import yaml


def yaml_parser(message):
def yaml_parser(message: str) -> tuple[dict, bool, str]:
"""Parse a yaml message for the retry function."""
# saves gpt-3.5 from some yaml parsing errors
message = re.sub(r':\s*\n(?=\S|\n)', ': ', message)
Expand All @@ -22,7 +22,9 @@ def yaml_parser(message):
return value, valid, retry_message


def _compress_chunks(text, identifier, skip_list, split_regex='\n\n+'):
def _compress_chunks(
text: str, identifier: str, skip_list: list[str], split_regex: str = '\n\n+'
) -> tuple[dict[str, str], str]:
"""Compress a string by replacing redundant chunks by identifiers. Chunks are defined by the split_regex."""
text_list = re.split(split_regex, text)
text_list = [chunk.strip() for chunk in text_list]
Expand All @@ -44,7 +46,7 @@ def _compress_chunks(text, identifier, skip_list, split_regex='\n\n+'):
return def_dict, compressed_text


def compress_string(text):
def compress_string(text: str) -> str:
"""Compress a string by replacing redundant paragraphs and lines with identifiers."""
# Perform paragraph-level compression
def_dict, compressed_text = _compress_chunks(
Expand All @@ -67,7 +69,7 @@ def compress_string(text):
return definitions + '\n' + compressed_text


def extract_html_tags(text, keys):
def extract_html_tags(text: str, keys: list[str]) -> dict[str, list[str]]:
"""Extract the content within HTML tags for a list of keys.
Parameters
Expand Down Expand Up @@ -102,7 +104,12 @@ class ParseError(Exception):
pass


def parse_html_tags_raise(text, keys=(), optional_keys=(), merge_multiple=False):
def parse_html_tags_raise(
text: str,
keys: list[str] | None = None,
optional_keys: list[str] | None = None,
merge_multiple: bool = False,
) -> dict[str, str]:
"""A version of parse_html_tags that raises an exception if the parsing is not successful."""
content_dict, valid, retry_message = parse_html_tags(
text, keys, optional_keys, merge_multiple=merge_multiple
Expand All @@ -112,7 +119,12 @@ def parse_html_tags_raise(text, keys=(), optional_keys=(), merge_multiple=False)
return content_dict


def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
def parse_html_tags(
text: str,
keys: list[str] | None = None,
optional_keys: list[str] | None = None,
merge_multiple: bool = False,
) -> tuple[dict[str, str], bool, str]:
"""Satisfy the parse api, extracts 1 match per key and validates that all keys are present
Parameters
Expand All @@ -133,26 +145,30 @@ def parse_html_tags(text, keys=(), optional_keys=(), merge_multiple=False):
str
A message to be displayed to the agent if the parsing was not successful.
"""
all_keys = tuple(keys) + tuple(optional_keys)
keys = keys or []
optional_keys = optional_keys or []
all_keys = list(keys) + list(optional_keys)
content_dict = extract_html_tags(text, all_keys)
retry_messages = []
result_dict: dict[str, str] = {}

for key in all_keys:
if key not in content_dict:
if key not in optional_keys:
retry_messages.append(f'Missing the key <{key}> in the answer.')
else:
val = content_dict[key]
content_dict[key] = val[0]
if len(val) > 1:
if not merge_multiple:
retry_messages.append(
f'Found multiple instances of the key {key}. You should have only one of them.'
)
else:
# merge the multiple instances
content_dict[key] = '\n'.join(val)
result_dict[key] = '\n'.join(val)
else:
result_dict[key] = val[0]

valid = len(retry_messages) == 0
retry_message = '\n'.join(retry_messages)
return content_dict, valid, retry_message
return result_dict, valid, retry_message
10 changes: 7 additions & 3 deletions openhands/agenthub/codeact_agent/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,9 @@ def combine_thought(action: Action, thought: str) -> Action:
def response_to_actions(response: ModelResponse) -> list[Action]:
actions: list[Action] = []
assert len(response.choices) == 1, 'Only one choice is supported for now'
assistant_msg = response.choices[0].message
if assistant_msg.tool_calls:
choice = response.choices[0]
assistant_msg = choice.message
if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
# Check if there's assistant_msg.content. If so, add it to the thought
thought = ''
if isinstance(assistant_msg.content, str):
Expand Down Expand Up @@ -592,7 +593,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
actions.append(action)
else:
actions.append(
MessageAction(content=assistant_msg.content, wait_for_response=True)
MessageAction(
content=str(assistant_msg.content) if assistant_msg.content else '',
wait_for_response=True,
)
)

assert len(actions) >= 1
Expand Down
10 changes: 6 additions & 4 deletions openhands/agenthub/micro/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def parse_response(orig_response: str) -> Action:
return action_from_dict(action_dict)


def to_json(obj, **kwargs):
def to_json(obj: object, **kwargs: dict) -> str:
"""Serialize an object to str format"""
return json.dumps(obj, **kwargs)

Expand All @@ -32,7 +32,9 @@ class MicroAgent(Agent):
prompt = ''
agent_definition: dict = {}

def history_to_json(self, history: list[Event], max_events: int = 20, **kwargs):
def history_to_json(
self, history: list[Event], max_events: int = 20, **kwargs: dict
) -> str:
"""
Serialize and simplify history to str format
"""
Expand Down Expand Up @@ -60,7 +62,7 @@ def __init__(self, llm: LLM, config: AgentConfig):
super().__init__(llm, config)
if 'name' not in self.agent_definition:
raise ValueError('Agent definition must contain a name')
self.prompt_template = Environment(loader=BaseLoader).from_string(self.prompt)
self.prompt_template = Environment(loader=BaseLoader()).from_string(self.prompt)
self.delegates = all_microagents.copy()
del self.delegates[self.agent_definition['name']]

Expand All @@ -74,7 +76,7 @@ def step(self, state: State) -> Action:
delegates=self.delegates,
latest_user_message=last_user_message,
)
content = [TextContent(text=prompt)]
content: list[TextContent | ImageContent] = [TextContent(text=prompt)]
if self.llm.vision_is_active() and last_image_urls:
content.append(ImageContent(image_urls=last_image_urls))
message = Message(role='user', content=content)
Expand Down
10 changes: 7 additions & 3 deletions openhands/agenthub/visualbrowsing_agent/visualbrowsing_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def get_error_prefix(obs: BrowserOutputObservation) -> str:
return f'## Error from previous action:\n{obs.last_browser_action_error}\n'


def create_goal_prompt(goal: str, image_urls: list[str] | None):
def create_goal_prompt(
goal: str, image_urls: list[str] | None
) -> tuple[str, list[str]]:
goal_txt: str = f"""\
# Instructions
Review the current state of the page and all other information to find the best possible next action to accomplish your goal. Your answer will be interpreted and executed by a program, make sure to follow the formatting instructions.
Expand All @@ -52,7 +54,7 @@ def create_observation_prompt(
focused_element: str,
error_prefix: str,
som_screenshot: str | None,
):
) -> tuple[str, str | None]:
txt_observation = f"""
# Observation of current step:
{tabs}{axtree_txt}{focused_element}{error_prefix}
Expand Down Expand Up @@ -273,7 +275,9 @@ def step(self, state: State) -> Action:
observation_txt, som_screenshot = create_observation_prompt(
cur_axtree_txt, tabs, focused_element, error_prefix, set_of_marks
)
human_prompt = [TextContent(type='text', text=goal_txt)]
human_prompt: list[TextContent | ImageContent] = [
TextContent(type='text', text=goal_txt)
]
if len(goal_images) > 0:
human_prompt.append(ImageContent(image_urls=goal_images))
human_prompt.append(TextContent(type='text', text=observation_txt))
Expand Down
Loading

0 comments on commit dbe21a8

Please sign in to comment.