Skip to content

Commit

Permalink
Add Planner Agent again
Browse files Browse the repository at this point in the history
  • Loading branch information
SmartManoj committed Jan 3, 2025
1 parent 2a8df13 commit 282e878
Show file tree
Hide file tree
Showing 17 changed files with 243 additions and 28 deletions.
6 changes: 6 additions & 0 deletions frontend/src/types/action-type.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ enum ActionType {
// Reject a request from user or another agent.
REJECT = "reject",

// Adds a task to the plan.
ADD_TASK = "add_task",

// Updates a task in the plan.
MODIFY_TASK = "modify_task",

// Changes the state of the agent, e.g. to paused or running
CHANGE_AGENT_STATE = "change_agent_state",
}
Expand Down
23 changes: 23 additions & 0 deletions frontend/src/types/core/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,27 @@ export interface BrowseInteractiveAction
};
}

export interface AddTaskAction extends OpenHandsActionEvent<"add_task"> {
source: "agent";
timeout: number;
args: {
parent: string;
goal: string;
subtasks: unknown[];
thought: string;
};
}

export interface ModifyTaskAction extends OpenHandsActionEvent<"modify_task"> {
source: "agent";
timeout: number;
args: {
task_id: string;
state: string;
thought: string;
};
}

export interface FileReadAction extends OpenHandsActionEvent<"read"> {
source: "agent";
args: {
Expand Down Expand Up @@ -123,4 +144,6 @@ export type OpenHandsAction =
| FileReadAction
| FileEditAction
| FileWriteAction
| AddTaskAction
| ModifyTaskAction
| RejectAction;
2 changes: 2 additions & 0 deletions frontend/src/types/core/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export type OpenHandsEventType =
| "browse"
| "browse_interactive"
| "reject"
| "add_task"
| "modify_task"
| "finish"
| "error";

Expand Down
2 changes: 2 additions & 0 deletions openhands/agenthub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
codeact_agent,
delegator_agent,
dummy_agent,
planner_agent,
)

__all__ = [
'codeact_agent',
'planner_agent',
'delegator_agent',
'dummy_agent',
'browsing_agent',
Expand Down
76 changes: 62 additions & 14 deletions openhands/agenthub/dummy_agent/agent.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import TypedDict
from typing import TypedDict, Union

from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.schema import AgentState
from openhands.events.action import (
Action,
AddTaskAction,
AgentFinishAction,
AgentRejectAction,
BrowseInteractiveAction,
Expand All @@ -14,10 +15,10 @@
FileReadAction,
FileWriteAction,
MessageAction,
ModifyTaskAction,
)
from openhands.events.observation import (
AgentStateChangedObservation,
BrowserOutputObservation,
CmdOutputObservation,
FileReadObservation,
FileWriteObservation,
Expand Down Expand Up @@ -48,6 +49,20 @@ class DummyAgent(Agent):
def __init__(self, llm: LLM, config: AgentConfig):
super().__init__(llm, config)
self.steps: list[ActionObs] = [
{
'action': AddTaskAction(
parent='None', goal='check the current directory'
),
'observations': [],
},
{
'action': AddTaskAction(parent='0', goal='run ls'),
'observations': [],
},
{
'action': ModifyTaskAction(task_id='0', state='in_progress'),
'observations': [],
},
{
'action': MessageAction('Time to get started!'),
'observations': [],
Expand Down Expand Up @@ -90,25 +105,15 @@ def __init__(self, llm: LLM, config: AgentConfig):
{
'action': BrowseURLAction(url='https://google.com'),
'observations': [
BrowserOutputObservation(
'<html><body>Simulated Google page</body></html>',
url='https://google.com',
screenshot='',
trigger_by_action='',
),
# BrowserOutputObservation('<html><body>Simulated Google page</body></html>',url='https://google.com',screenshot=''),
],
},
{
'action': BrowseInteractiveAction(
browser_actions='goto("https://google.com")'
),
'observations': [
BrowserOutputObservation(
'<html><body>Simulated Google page after interaction</body></html>',
url='https://google.com',
screenshot='',
trigger_by_action='',
),
# BrowserOutputObservation('<html><body>Simulated Google page after interaction</body></html>',url='https://google.com',screenshot=''),
],
},
{
Expand All @@ -130,6 +135,30 @@ def step(self, state: State) -> Action:
current_step = self.steps[state.iteration]
action = current_step['action']

# If the action is AddTaskAction or ModifyTaskAction, update the parent ID or task_id
if isinstance(action, AddTaskAction):
if action.parent == 'None':
action.parent = '' # Root task has no parent
elif action.parent == '0':
action.parent = state.root_task.id
elif action.parent.startswith('0.'):
action.parent = f'{state.root_task.id}{action.parent[1:]}'
elif isinstance(action, ModifyTaskAction):
if action.task_id == '0':
action.task_id = state.root_task.id
elif action.task_id.startswith('0.'):
action.task_id = f'{state.root_task.id}{action.task_id[1:]}'
# Ensure the task_id doesn't start with a dot
if action.task_id.startswith('.'):
action.task_id = action.task_id[1:]
elif isinstance(action, (BrowseURLAction, BrowseInteractiveAction)):
try:
return self.simulate_browser_action(action)
except (
Exception
): # This could be a specific exception for browser unavailability
return self.handle_browser_unavailable(action)

if state.iteration > 0:
prev_step = self.steps[state.iteration - 1]

Expand Down Expand Up @@ -161,3 +190,22 @@ def step(self, state: State) -> Action:
)

return action

def simulate_browser_action(
self, action: Union[BrowseURLAction, BrowseInteractiveAction]
) -> Action:
# Instead of simulating, we'll reject the browser action
return self.handle_browser_unavailable(action)

def handle_browser_unavailable(
self, action: Union[BrowseURLAction, BrowseInteractiveAction]
) -> Action:
# Create a message action to inform that browsing is not available
message = 'Browser actions are not available in the DummyAgent environment.'
if isinstance(action, BrowseURLAction):
message += f' Unable to browse URL: {action.url}'
elif isinstance(action, BrowseInteractiveAction):
message += (
f' Unable to perform interactive browsing: {action.browser_actions}'
)
return MessageAction(content=message)
4 changes: 4 additions & 0 deletions openhands/agenthub/planner_agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from openhands.agenthub.planner_agent.agent import PlannerAgent
from openhands.controller.agent import Agent

Agent.register('PlannerAgent', PlannerAgent)
53 changes: 53 additions & 0 deletions openhands/agenthub/planner_agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from openhands.agenthub.planner_agent.prompt import get_prompt_and_images
from openhands.agenthub.planner_agent.response_parser import PlannerResponseParser
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.message import ImageContent, Message, TextContent
from openhands.events.action import Action, AgentFinishAction
from openhands.llm.llm import LLM


class PlannerAgent(Agent):
VERSION = '1.0'
"""
The planner agent utilizes a special prompting strategy to create long term plans for solving problems.
The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
"""
response_parser = PlannerResponseParser()

def __init__(self, llm: LLM, config: AgentConfig):
"""Initialize the Planner Agent with an LLM
Parameters:
- llm (LLM): The llm to be used by this agent
"""
super().__init__(llm, config)

def step(self, state: State) -> Action:
"""Checks to see if current step is completed, returns AgentFinishAction if True.
Otherwise, creates a plan prompt and sends to model for inference, returning the result as the next action.
Parameters:
- state (State): The current state given the previous actions and observations
Returns:
- AgentFinishAction: If the last state was 'completed', 'verified', or 'abandoned'
- Action: The next action to take based on llm response
"""
if state.root_task.state in [
'completed',
'verified',
'abandoned',
]:
return AgentFinishAction()

prompt, image_urls = get_prompt_and_images(
state, self.llm.config.max_message_chars
)
content = [TextContent(text=prompt)]
if self.llm.vision_is_active() and image_urls:
content.append(ImageContent(image_urls=image_urls))
message = Message(role='user', content=content)
resp = self.llm.completion(messages=self.llm.format_messages_for_llm(message))
return self.response_parser.parse(resp)
37 changes: 37 additions & 0 deletions openhands/agenthub/planner_agent/response_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from openhands.controller.action_parser import ResponseParser
from openhands.core.utils import json
from openhands.events.action import (
Action,
)
from openhands.events.serialization.action import action_from_dict


class PlannerResponseParser(ResponseParser):
def __init__(self):
super().__init__()

def parse(self, response: str) -> Action:
action_str = self.parse_response(response)
return self.parse_action(action_str)

def parse_response(self, response) -> str:
# get the next action from the response
return response['choices'][0]['message']['content']

def parse_action(self, action_str: str) -> Action:
"""Parses a string to find an action within it
Parameters:
- response (str): The string to be parsed
Returns:
- Action: The action that was found in the response string
"""
# attempt to load the JSON dict from the response
action_dict = json.loads(action_str)

if 'content' in action_dict:
# The LLM gets confused here. Might as well be robust
action_dict['contents'] = action_dict.pop('content')

return action_from_dict(action_dict)
9 changes: 8 additions & 1 deletion openhands/controller/agent_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from openhands.events.action import (
Action,
ActionConfirmationStatus,
AddTaskAction,
AgentDelegateAction,
AgentFinishAction,
AgentRejectAction,
Expand All @@ -33,6 +34,7 @@
CmdRunAction,
IPythonRunCellAction,
MessageAction,
ModifyTaskAction,
NullAction,
RegenerateAction,
)
Expand Down Expand Up @@ -278,7 +280,12 @@ async def _handle_action(self, action: Action) -> None:
await self._handle_message_action(action)
elif isinstance(action, AgentDelegateAction):
await self.start_delegate(action)

elif isinstance(action, AddTaskAction):
self.state.root_task.add_subtask(
action.parent, action.goal, action.subtasks
)
elif isinstance(action, ModifyTaskAction):
self.state.root_task.set_subtask_state(action.task_id, action.state)
elif isinstance(action, AgentFinishAction):
self.state.outputs = action.outputs
self.state.metrics.merge(self.state.local_metrics)
Expand Down
4 changes: 4 additions & 0 deletions openhands/core/schema/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ class ActionTypeSchema(BaseModel):

SUMMARIZE: str = Field(default='summarize')

ADD_TASK: str = Field(default='add_task')

MODIFY_TASK: str = Field(default='modify_task')

PAUSE: str = Field(default='pause')
"""Pauses the task.
"""
Expand Down
2 changes: 2 additions & 0 deletions openhands/events/action/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
'AgentRejectAction',
'AgentDelegateAction',
'AgentSummarizeAction',
'AddTaskAction',
'ModifyTaskAction',
'ChangeAgentStateAction',
'IPythonRunCellAction',
'MessageAction',
Expand Down
2 changes: 2 additions & 0 deletions openhands/events/serialization/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
AgentFinishAction,
AgentRejectAction,
AgentDelegateAction,
AddTaskAction,
ModifyTaskAction,
ChangeAgentStateAction,
MessageAction,
RegenerateAction,
Expand Down
1 change: 1 addition & 0 deletions openhands/server/mock/listen.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def read_llm_models():
def read_llm_agents():
return [
'CodeActAgent',
'PlannerAgent',
]


Expand Down
Loading

0 comments on commit 282e878

Please sign in to comment.