Skip to content

Commit

Permalink
Added LLM-check for visual browsing tool usage. (not support for GPT-…
Browse files Browse the repository at this point in the history
…4o models)
  • Loading branch information
adityasoni9998 committed Jan 26, 2025
1 parent f45e7ec commit 9b742c5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
8 changes: 6 additions & 2 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,14 @@ def get_observation_message(
text = obs.get_agent_obs_text()
if (
obs.trigger_by_action == ActionType.BROWSE_INTERACTIVE
and self.config.codeact_enable_visual_browsing
and self.llm.vision_is_active()
and obs.set_of_marks is not None
and len(obs.set_of_marks) > 0
and self.config.codeact_enable_visual_browsing
and self.llm.vision_is_active()
and (
self.mock_function_calling
or self.llm.is_visual_browser_tool_active()
)
):
text += 'Image: Current webpage screenshot (Note that only visible portion of webpage is present in the screenshot. You may need to scroll to view the remaining portion of the web-page.)\n'
message = Message(
Expand Down
19 changes: 19 additions & 0 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@
'o1-2024-12-17',
]

# visual browsing tool supported models
# This flag is needed since gpt-4o and gpt-4o-mini do not allow passing image_urls with role='tool'
VISUAL_BROWSING_TOOL_SUPPORTED_MODELS = [
'claude-3-5-sonnet',
'claude-3-5-sonnet-20240620',
'claude-3-5-sonnet-20241022',
'o1-2024-12-17',
]


REASONING_EFFORT_SUPPORTED_MODELS = [
'o1-2024-12-17',
]
Expand Down Expand Up @@ -472,6 +482,15 @@ def is_function_calling_active(self) -> bool:
)
return supports_fn_call

def is_visual_browser_tool_active(self) -> bool:
return (
self.config.model in VISUAL_BROWSING_TOOL_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in VISUAL_BROWSING_TOOL_SUPPORTED_MODELS
or any(
m in self.config.model for m in VISUAL_BROWSING_TOOL_SUPPORTED_MODELS
)
)

def _post_completion(self, response: ModelResponse) -> float:
"""Post-process the completion response.
Expand Down

0 comments on commit 9b742c5

Please sign in to comment.