Skip to content

Commit

Permalink
Fix browsing actions to be more robust (#4226)
Browse files Browse the repository at this point in the history
  • Loading branch information
enyst authored Oct 7, 2024
1 parent 09243eb commit 6b1f23a
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 4 deletions.
27 changes: 23 additions & 4 deletions agenthub/browsing_agent/response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,36 @@ def check_condition(self, action_str: str) -> bool:
return True

def parse(self, action_str: str) -> Action:
thought = action_str.split('```')[0].strip()
action_str = action_str.split('```')[1].strip()
# parse the action string into browser_actions and thought
# the LLM can return only one string, or both

# when both are returned, it looks like this:
### Based on the current state of the page and the goal of finding out the president of the USA, the next action should involve searching for information related to the president.
### To achieve this, we can navigate to a reliable source such as a search engine or a specific website that provides information about the current president of the USA.
### Here is an example of a valid action to achieve this:
### ```
### goto('https://www.whitehouse.gov/about-the-white-house/presidents/'
# in practice, BrowsingResponseParser.parse_response also added )``` to the end of the string

# when the LLM returns only one string, it looks like this:
### goto('https://www.whitehouse.gov/about-the-white-house/presidents/')
# and parse_response added )``` to the end of the string
parts = action_str.split('```')
browser_actions = (
parts[1].strip() if parts[1].strip() != '' else parts[0].strip()
)
thought = parts[0].strip() if parts[1].strip() != '' else ''

# if the LLM wants to talk to the user, we extract the message
msg_content = ''
for sub_action in action_str.split('\n'):
for sub_action in browser_actions.split('\n'):
if 'send_msg_to_user(' in sub_action:
tree = ast.parse(sub_action)
args = tree.body[0].value.args # type: ignore
msg_content = args[0].value

return BrowseInteractiveAction(
browser_actions=action_str,
browser_actions=browser_actions,
thought=thought,
browsergym_send_msg_to_user=msg_content,
)
54 changes: 54 additions & 0 deletions tests/unit/test_browsing_agent_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pytest

from agenthub.browsing_agent.response_parser import (
BrowseInteractiveAction,
BrowsingResponseParser,
)


@pytest.mark.parametrize(
'action_str, expected',
[
("click('81'", "click('81')```"),
(
'"We need to search the internet\n```goto("google.com")',
'"We need to search the internet\n```goto("google.com"))```',
),
("```click('81'", "```click('81')```"),
("click('81')", "click('81'))```"),
],
)
def test_parse_response(action_str: str, expected: str) -> None:
# BrowsingResponseParser.parse_response
parser = BrowsingResponseParser()
response = {'choices': [{'message': {'content': action_str}}]}
result = parser.parse_response(response)
assert result == expected


@pytest.mark.parametrize(
'action_str, expected_browser_actions, expected_thought, expected_msg_content',
[
("click('81')```", "click('81')", '', ''),
("```click('81')```", "click('81')", '', ''),
(
"We need to perform a click\n```click('81')",
"click('81')",
'We need to perform a click',
'',
),
],
)
def test_parse_action(
action_str: str,
expected_browser_actions: str,
expected_thought: str,
expected_msg_content: str,
) -> None:
# BrowsingResponseParser.parse_action
parser = BrowsingResponseParser()
action = parser.parse_action(action_str)
assert isinstance(action, BrowseInteractiveAction)
assert action.browser_actions == expected_browser_actions
assert action.thought == expected_thought
assert action.browsergym_send_msg_to_user == expected_msg_content

0 comments on commit 6b1f23a

Please sign in to comment.