Skip to content

Commit

Permalink
Use keyword matching for CodeAct microagents (#4568)
Browse files Browse the repository at this point in the history
Co-authored-by: Xingyao Wang <[email protected]>
  • Loading branch information
rbren and xingyaoww authored Nov 9, 2024
1 parent 67c8915 commit be82832
Show file tree
Hide file tree
Showing 18 changed files with 204 additions and 257 deletions.
6 changes: 4 additions & 2 deletions evaluation/EDA/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def codeact_user_response_eda(state: State) -> str:

# retrieve the latest model message from history
if state.history:
model_guess = state.get_last_agent_message()
last_agent_message = state.get_last_agent_message()
model_guess = last_agent_message.content if last_agent_message else ''

assert game is not None, 'Game is not initialized.'
msg = game.generate_user_response(model_guess)
Expand Down Expand Up @@ -140,7 +141,8 @@ def process_instance(
if state is None:
raise ValueError('State should not be None.')

final_message = state.get_last_agent_message()
last_agent_message = state.get_last_agent_message()
final_message = last_agent_message.content if last_agent_message else ''

logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
test_result = game.reward()
Expand Down
3 changes: 2 additions & 1 deletion evaluation/gorilla/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def process_instance(
raise ValueError('State should not be None.')

# retrieve the last message from the agent
model_answer_raw = state.get_last_agent_message()
last_agent_message = state.get_last_agent_message()
model_answer_raw = last_agent_message.content if last_agent_message else ''

# attempt to parse model_answer
ast_eval_fn = instance['ast_eval']
Expand Down
3 changes: 2 additions & 1 deletion evaluation/toolqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
raise ValueError('State should not be None.')

# retrieve the last message from the agent
model_answer_raw = state.get_last_agent_message()
last_agent_message = state.get_last_agent_message()
model_answer_raw = last_agent_message.content if last_agent_message else ''

# attempt to parse model_answer
correct = eval_answer(str(model_answer_raw), str(answer))
Expand Down
5 changes: 1 addition & 4 deletions frontend/src/components/project-menu/ProjectMenuCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,7 @@ export function ProjectMenuCard({
posthog.capture("push_to_github_button_clicked");
const rawEvent = {
content: `
Let's push the code to GitHub.
If we're currently on the openhands-workspace branch, please create a new branch with a descriptive name.
Commit any changes and push them to the remote repository.
Finally, open up a pull request using the GitHub API and the token in the GITHUB_TOKEN environment variable, then show me the URL of the pull request.
Please push the changes to GitHub and open a pull request.
`,
imageUrls: [],
timestamp: new Date().toISOString(),
Expand Down
61 changes: 17 additions & 44 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
JupyterRequirement,
PluginRequirement,
)
from openhands.utils.microagent import MicroAgent
from openhands.utils.prompt import PromptManager


Expand Down Expand Up @@ -86,16 +85,6 @@ def __init__(
super().__init__(llm, config)
self.reset()

self.micro_agent = (
MicroAgent(
os.path.join(
os.path.dirname(__file__), 'micro', f'{config.micro_agent_name}.md'
)
)
if config.micro_agent_name
else None
)

self.function_calling_active = self.config.function_calling
if self.function_calling_active and not self.llm.is_function_calling_active():
logger.warning(
Expand All @@ -105,7 +94,6 @@ def __init__(
self.function_calling_active = False

if self.function_calling_active:
# Function calling mode
self.tools = codeact_function_calling.get_tools(
codeact_enable_browsing=self.config.codeact_enable_browsing,
codeact_enable_jupyter=self.config.codeact_enable_jupyter,
Expand All @@ -114,18 +102,17 @@ def __init__(
logger.debug(
f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}'
)
self.system_prompt = codeact_function_calling.SYSTEM_PROMPT
self.initial_user_message = None
self.prompt_manager = PromptManager(
microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'),
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'tools'),
)
else:
# Non-function-calling mode
self.action_parser = CodeActResponseParser()
self.prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__)),
microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'),
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'default'),
agent_skills_docs=AgentSkillsRequirement.documentation,
micro_agent=self.micro_agent,
)
self.system_prompt = self.prompt_manager.system_message
self.initial_user_message = self.prompt_manager.initial_user_message

self.pending_actions: deque[Action] = deque()

Expand Down Expand Up @@ -337,8 +324,8 @@ def step(self, state: State) -> Action:
return self.pending_actions.popleft()

# if we're done, go back
last_user_message = state.get_last_user_message()
if last_user_message and last_user_message.strip() == '/exit':
latest_user_message = state.get_last_user_message()
if latest_user_message and latest_user_message.content.strip() == '/exit':
return AgentFinishAction()

# prepare what we want to send to the LLM
Expand Down Expand Up @@ -403,17 +390,19 @@ def _get_messages(self, state: State) -> list[Message]:
role='system',
content=[
TextContent(
text=self.system_prompt,
cache_prompt=self.llm.is_caching_prompt_active(), # Cache system prompt
text=self.prompt_manager.get_system_message(),
cache_prompt=self.llm.is_caching_prompt_active(),
)
],
)
]
if self.initial_user_message:
example_message = self.prompt_manager.get_example_user_message()
if example_message:
messages.append(
Message(
role='user',
content=[TextContent(text=self.initial_user_message)],
content=[TextContent(text=example_message)],
cache_prompt=self.llm.is_caching_prompt_active(),
)
)

Expand Down Expand Up @@ -462,8 +451,9 @@ def _get_messages(self, state: State) -> list[Message]:
pending_tool_call_action_messages.pop(response_id)

for message in messages_to_add:
# add regular message
if message:
if message.role == 'user':
self.prompt_manager.enhance_message(message)
# handle error if the message is the SAME role as the previous message
# litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
# there shouldn't be two consecutive messages from the same role
Expand Down Expand Up @@ -493,23 +483,6 @@ def _get_messages(self, state: State) -> list[Message]:
break

if not self.function_calling_active:
# The latest user message is important:
# we want to remind the agent of the environment constraints
latest_user_message = next(
islice(
(
m
for m in reversed(messages)
if m.role == 'user'
and any(isinstance(c, TextContent) for c in m.content)
),
1,
),
None,
)
# do not add this for function calling
if latest_user_message:
reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
latest_user_message.content.append(TextContent(text=reminder_text))
self.prompt_manager.add_turns_left_reminder(messages, state)

return messages
8 changes: 0 additions & 8 deletions openhands/agenthub/codeact_agent/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@
)
from openhands.events.tool import ToolCallMetadata

SYSTEM_PROMPT = """You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
<IMPORTANT>
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
* When configuring git credentials, use "openhands" as the user.name and "[email protected]" as the user.email by default, unless explicitly instructed otherwise.
* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
</IMPORTANT>
"""

_BASH_DESCRIPTION = """Execute a bash command in the terminal.
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
Expand Down
9 changes: 9 additions & 0 deletions openhands/agenthub/codeact_agent/micro/flarglebargle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
name: flarglebargle
agent: CodeActAgent
triggers:
- flarglebargle
---

IMPORTANT! The user has said the magic word "flarglebargle". You must
only respond with a message telling them how smart they are
88 changes: 25 additions & 63 deletions openhands/agenthub/codeact_agent/micro/github.md
Original file line number Diff line number Diff line change
@@ -1,69 +1,31 @@
---
name: github
agent: CodeActAgent
require_env_var:
SANDBOX_ENV_GITHUB_TOKEN: "Create a GitHub Personal Access Token (https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) and set it as SANDBOX_GITHUB_TOKEN in your environment variables."
triggers:
- github
- git
---

# How to Interact with Github

## Environment Variable Available

- `GITHUB_TOKEN`: A read-only token for Github.

## Using GitHub's RESTful API

Use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API. Here are some common operations:

Here's a template for API calls:

```sh
curl -H "Authorization: token $GITHUB_TOKEN" \
"https://api.github.com/{endpoint}"
You have access to an environment variable, `GITHUB_TOKEN`, which allows you to interact with
the GitHub API.

You can use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API.
ALWAYS use the GitHub API for operations instead of a web browser.

Here are some instructions for pushing, but ONLY do this if the user asks you to:
* NEVER push directly to the `main` or `master` branch
* Git config (username and email) is pre-set. Do not modify.
* You may already be on a branch called `openhands-workspace`. Create a new branch with a better name before pushing.
* Use the GitHub API to create a pull request, if you haven't already
* Use the main branch as the base branch, unless the user requests otherwise
* After opening or updating a pull request, send the user a short message with a link to the pull request.
* Do all of the above in as few steps as possible. E.g. you could open a PR with one step by running the following bash commands:
```bash
git checkout -b create-widget
git add .
git commit -m "Create widget"
git push origin create-widget
curl -X POST "https://api.github.com/repos/CodeActOrg/openhands/pulls" \
-H "Authorization: Bearer $GITHUB_TOKEN" \
-d '{"title":"Create widget","head":"create-widget","base":"openhands-workspace"}'
```

First replace `{endpoint}` with the specific API path. Common operations:

1. View an issue or pull request:
- Issues: `/repos/{owner}/{repo}/issues/{issue_number}`
- Pull requests: `/repos/{owner}/{repo}/pulls/{pull_request_number}`

2. List repository issues or pull requests:
- Issues: `/repos/{owner}/{repo}/issues`
- Pull requests: `/repos/{owner}/{repo}/pulls`

3. Search issues or pull requests:
- `/search/issues?q=repo:{owner}/{repo}+is:{type}+{search_term}+state:{state}`
- Replace `{type}` with `issue` or `pr`

4. List repository branches:
`/repos/{owner}/{repo}/branches`

5. Get commit details:
`/repos/{owner}/{repo}/commits/{commit_sha}`

6. Get repository details:
`/repos/{owner}/{repo}`

7. Get user information:
`/user`

8. Search repositories:
`/search/repositories?q={query}`

9. Get rate limit status:
`/rate_limit`

Replace `{owner}`, `{repo}`, `{commit_sha}`, `{issue_number}`, `{pull_request_number}`,
`{search_term}`, `{state}`, and `{query}` with appropriate values.

## Important Notes

1. Always use the GitHub API for operations instead of a web browser.
2. The `GITHUB_TOKEN` is read-only. Avoid operations that require write access.
3. Git config (username and email) is pre-set. Do not modify.
4. Edit and test code locally. Never push directly to remote.
5. Verify correct branch before committing.
6. Commit changes frequently.
7. If the issue or task is ambiguous or lacks sufficient detail, always request clarification from the user before proceeding.
8. You should avoid using command line tools like `sed` for file editing.
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,5 @@ The server is running on port 5000 with PID 126. You can access the list of numb
{% endset %}
Here is an example of how you can interact with the environment for task solving:
{{ DEFAULT_EXAMPLE }}
{% if micro_agent %}
--- BEGIN OF GUIDELINE ---
The following information may assist you in completing your task:

{{ micro_agent }}
--- END OF GUIDELINE ---
{% endif %}

NOW, LET'S START!
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
<IMPORTANT>
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
* When configuring git credentials, use "openhands" as the user.name and "[email protected]" as the user.email by default, unless explicitly instructed otherwise.
* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
</IMPORTANT>

Empty file.
2 changes: 1 addition & 1 deletion openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def step(self, state: State) -> Action:
"""
# if we're done, go back
last_user_message = state.get_last_user_message()
if last_user_message and last_user_message.strip() == '/exit':
if last_user_message and last_user_message.content.strip() == '/exit':
return AgentFinishAction()

# prepare what we want to send to the LLM
Expand Down
8 changes: 4 additions & 4 deletions openhands/controller/state/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,14 @@ def get_current_user_intent(self) -> tuple[str | None, list[str] | None]:

return last_user_message, last_user_message_image_urls

def get_last_agent_message(self) -> str | None:
def get_last_agent_message(self) -> MessageAction | None:
for event in reversed(self.history):
if isinstance(event, MessageAction) and event.source == EventSource.AGENT:
return event.content
return event
return None

def get_last_user_message(self) -> str | None:
def get_last_user_message(self) -> MessageAction | None:
for event in reversed(self.history):
if isinstance(event, MessageAction) and event.source == EventSource.USER:
return event.content
return event
return None
Loading

0 comments on commit be82832

Please sign in to comment.