All-Hands-AI · AlexCuadron · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
@@ -42,11 +42,13 @@
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
     'CodeActAgent': codeact_user_response,
     'CodeActSWEAgent': codeact_user_response,
+    'SupervisorAgent': codeact_user_response,
 }
 
 AGENT_CLS_TO_INST_SUFFIX = {
     'CodeActAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
     'CodeActSWEAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
+    'SupervisorAgent': 'When you think you have fixed the issue, please run the following command: <execute_bash> exit </execute_bash>.\n',
 }
 
 

diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py
@@ -14,6 +14,7 @@
     delegator_agent,
     dummy_agent,
     planner_agent,
+    supervisor_agent,
 )
 
 __all__ = [
@@ -23,6 +24,7 @@
     'delegator_agent',
     'dummy_agent',
     'browsing_agent',
+    'supervisor_agent',
 ]
 
 for agent in all_microagents.values():

diff --git a/openhands/agenthub/supervisor_agent/__init__.py b/openhands/agenthub/supervisor_agent/__init__.py
@@ -0,0 +1,4 @@
+from openhands.agenthub.supervisor_agent.agent import SupervisorAgent
+from openhands.controller.agent import Agent
+
+Agent.register('SupervisorAgent', SupervisorAgent)
diff --git a/openhands/agenthub/supervisor_agent/agent.py b/openhands/agenthub/supervisor_agent/agent.py
@@ -0,0 +1,156 @@
+import copy
+import logging
+from typing import Dict, List
+
+from openhands.agenthub.supervisor_agent.prompt import (
+    adjust_milestones,
+    get_initial_prompt,
+)
+from openhands.controller.agent import Agent
+from openhands.controller.state.state import State
+from openhands.core.config import AgentConfig
+from openhands.core.message import Message, TextContent
+from openhands.core.utils import json
+from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
+from openhands.events.action.agent import AgentRejectAction
+from openhands.events.observation.delegate import AgentDelegateObservation
+from openhands.llm.llm import LLM
+
+
+class SupervisorAgent(Agent):
+    VERSION = '1.0'
+    """
+    The Supervisor Agent is an agent that collects information from other agents
+    and makes decisions based on the information.
+    """
+
+    current_delegate: str = ''
+    sub_goals: List[Dict[str, str]] = []
+    current_goal_index: int = 0
+    summary: str = ''
+    task: str = ''
+
+    def __init__(self, llm: LLM, config: AgentConfig):
+        """Initialize the Supervisor Agent with an LLM
+
+        Parameters:
+        - llm (LLM): The llm to be used by this agent
+        """
+        super().__init__(llm, config)
+        # Set up logger
+        self.logger = logging.getLogger(__name__)
+        logging.basicConfig(level=logging.DEBUG)  # Set the logging level
+
+    def step(self, state: State) -> Action:
+        """Checks to see if current step is completed, returns AgentFinishAction if True.
+        Otherwise, delegates the task to the next agent in the pipeline.
+
+        Parameters:
+        - state (State): The current state given the previous actions and observations
+
+        Returns:
+        - AgentFinishAction: If the last state was 'completed', 'verified', or 'abandoned'
+        - AgentDelegateAction: The next agent to delegate the task to
+        """
+        self.logger.debug('Starting step with state: %s', state)
+        # Example logic for breaking down tasks and delegating
+        if not self.sub_goals:
+            self.logger.debug('No sub-goals found, breaking down task.')
+            task, _ = state.get_current_user_intent()
+            self.sub_goals = self.break_down_task(task)
+            self.logger.debug('Sub-goals: %s', self.sub_goals)
+            # If the LLM returns an empty list, reject the action
+            if self.sub_goals is None or self.sub_goals == []:
+                return AgentRejectAction()
+
+        if self.current_delegate == '':
+            self.logger.debug("Current delegate is empty, assigning 'manager'.")
+            # First subgoal as the current delegate is empty
+            self.current_delegate = 'manager'
+            return AgentDelegateAction(
+                agent='ManagerAgent',
+                inputs={'task': json.dumps(self.sub_goals[self.current_goal_index])},
+            )
+        elif self.current_delegate == 'manager':
+            self.logger.debug("Current delegate is 'manager'.")
+            last_observation = state.history.get_last_observation()
+
+            if not isinstance(last_observation, AgentDelegateObservation):
+                raise Exception('Last observation is not an AgentDelegateObservation')
+
+            if last_observation.outputs.get('action', '') == 'reject':
+                self.logger.debug('No summary found, creating adjustment prompt.')
+                reason = getattr(last_observation, 'reason', '')
+                # Ensure reason is a string
+                prompt = self.create_adjustment_prompt(reason)
+                # Get the sub-goals from the language model using the generated prompt
+                self.sub_goals = self.get_sub_goals_from_llm(prompt)
+                # Add the summary to the current sub-goal
+                current_task = copy.deepcopy(self.sub_goals[self.current_goal_index])
+                current_task['summary'] = (
+                    f'Summary from previous milestones: {self.summary}'
+                )
+                return AgentDelegateAction(
+                    agent='ManagerAgent', inputs={'task': json.dumps(current_task)}
+                )
+            else:
+                # Append the current milestone and summary to the agent's summary
+                summary = last_observation.outputs.get('summary', '')
+                self.append_to_summary(
+                    self.sub_goals[self.current_goal_index]['task'], summary
+                )
+                self.current_goal_index += 1
+
+                if self.current_goal_index < len(self.sub_goals):
+                    # Add the summary to the current sub-goal
+                    current_task = copy.deepcopy(
+                        self.sub_goals[self.current_goal_index]
+                    )
+                    current_task['summary'] = (
+                        f'Summary from previous milestones: {self.summary}'
+                    )
+
+                    return AgentDelegateAction(
+                        agent='ManagerAgent', inputs={'task': json.dumps(current_task)}
+                    )
+
+        return AgentFinishAction()
+
+    def break_down_task(self, task: str) -> List[Dict[str, str]]:
+        # Generate the initial prompt for breaking down the task
+        prompt = get_initial_prompt(task)
+        # Get the sub-goals from the language model using the generated prompt
+        return self.get_sub_goals_from_llm(prompt)
+
+    def should_interrupt(self, observation) -> bool:
+        # Logic to determine if the task should be interrupted
+        return False  # Placeholder
+
+    def summarize_history(self, history) -> str:
+        # Logic to summarize the history
+        return 'summary'  # Placeholder
+
+    def provide_guidance(self, state: State) -> Action:
+        # Logic to provide high-level guidance
+        return AgentFinishAction()  # Placeholder
+
+    def create_adjustment_prompt(self, reason: str) -> str:
+        return adjust_milestones(
+            self.sub_goals,
+            self.sub_goals[self.current_goal_index],
+            reason,
+            self.summary,
+            self.task,
+        )
+
+    def get_sub_goals_from_llm(self, prompt: str) -> List[Dict[str, str]]:
+        content = [TextContent(text=prompt)]
+        message = Message(role='user', content=content)
+        response = self.llm.completion(
+            messages=self.llm.format_messages_for_llm(message)
+        )
+        return json.loads(response['choices'][0]['message']['content'])
+
+    def append_to_summary(self, milestone_name: str, summary: str):
+        """Appends the milestone name and summary to the agent's summary state."""
+        self.summary += f'Milestone: {milestone_name}\nSummary: {summary}\n\n'
diff --git a/openhands/agenthub/supervisor_agent/prompt.py b/openhands/agenthub/supervisor_agent/prompt.py
@@ -0,0 +1,179 @@
+from typing import Dict, List
+
+from openhands.core.utils import json
+
+HISTORY_SIZE = 20
+
+# General Description
+general_description = """
+You are a strategic manager AI in a software development team. You MUST think CAREFULLY how to complete the task assigned to you.
+You MUST think on a HIGHER LEVEL view always.
+
+You've been given the following task:
+%(task)s
+
+As a strategic manager, you create a plan with different sub-tasks and delegate the tasks to your team.
+At your disposal, you have a team of agents who will complete tasks for you. However, those agents only focus on the details.
+They CANNOT see the big picture.
+They need you to define self-contained tasks, that are easy for them to understand and complete.
+
+"""
+
+# Initial Prompt
+initial_prompt = """
+## Plan
+Your goal is to create a high-level plan, a list of subtasks that will bring you closer to the completion of the task. Remember to think
+CAREFULLY about how to complete the task. With each subtask, you MUST provide a "suggested approach".
+Think, step by step, how you would complete the subtask. Then provide that as the suggested approach.
+Try to be as detailed as possible, your goal is to HELP the agent finish the subtask as soon as possible.
+
+You MAY provide a list of "important details" for each subtask. These are details that the agent MUST consider when completing the subtask.
+
+ONLY generate tasks that are necessary to complete the task.
+
+You MUST ONLY generate a list of JSONs:
+
+[
+    {
+      "task": "<Task 1 name>",
+      "suggested_approach": "<suggested approach>",
+      "important_details": "<important details>"
+    },
+    {
+      "task": "<Task 2 name>",
+      "suggested_approach": "<suggested approach>",
+      "important_details": "<important details>"
+    },
+    {
+      "task": "<Task 3 name>",
+      "suggested_approach": "<suggested approach>",
+      "important_details": "<important details>"
+    },
+]
+
+The tasks MUST be generated in order, they MUST NOT depend on future tasks or previous tasks. They MUST be independent.
+You MUST generate at least 1 task.
+
+For example:
+User prompt:
+
+"
+Enable quiet mode/no-verbose in CLI for use in pre-commit hook There seems to be only an option to increase the level of verbosity when using
+SQLFluff [CLI](https://docs.sqlfluff.com/en/stable/cli.html), not to limit it further. It would be great to have an option to further limit the amount of prints when running
+`sqlfluff fix`, especially in combination with deployment using a pre-commit hook. For example, only print the return status and the number of fixes applied, similar to how it
+is when using `black` in a pre-commit hook: ![image](https://user-images.githubusercontent.com/10177212/140480676-dc98d00b-4383-44f2-bb90-3301a6eedec2.png) This hides the potentially
+long list of fixes that are being applied to the SQL files, which can get quite verbose.
+"
+
+Your response:
+
+[
+    {
+        "task": "Research SQLFluff CLI verbosity options",
+        "suggested_approach": "Investigate the current SQLFluff CLI documentation and source code to understand how verbosity levels are currently implemented. Identify if there are any existing flags or settings that can be adjusted to reduce verbosity.",
+        "important_details": "Focus on the 'fix' command and any related verbosity settings. Document any findings that could be useful for implementing a quiet mode."
+    },
+    {
+        "task": "Design a quiet mode feature for SQLFluff CLI",
+        "suggested_approach": "Based on the research findings, design a new feature that allows users to enable a quiet mode. This mode should minimize output to only essential information such as return status and number of fixes applied.",
+        "important_details": "Ensure the design is compatible with existing CLI options and does not interfere with other functionalities."
+    },
+    {
+        "task": "Implement the quiet mode feature",
+        "suggested_approach": "Modify the SQLFluff CLI codebase to add the new quiet mode feature. Implement the necessary changes in the code to support this feature and ensure it can be activated via a command-line flag.",
+        "important_details": "Write unit tests to verify that the quiet mode works as expected and does not affect other CLI functionalities."
+    },
+    {
+        "task": "Test the quiet mode feature",
+        "suggested_approach": "Conduct thorough testing of the new quiet mode feature in various scenarios, including its use in a pre-commit hook. Ensure that it behaves as expected and provides the desired level of output reduction.",
+        "important_details": "Test with different verbosity levels to ensure compatibility and check for any edge cases that might cause unexpected behavior."
+    },
+    {
+        "task": "Document the new feature",
+        "suggested_approach": "Update the SQLFluff CLI documentation to include information about the new quiet mode feature. Provide examples of how to use it and explain its benefits.",
+        "important_details": "Ensure the documentation is clear and easy to understand for users who may not be familiar with the technical details."
+    }
+]
+"""
+
+adjustment_prompt = """
+
+    This is the current active plan that your subordinates are working on:
+    %(milestones)s
+
+    And this is the current subtask that your subordinates are working on:
+    ## Current subtask
+    subtask: %(milestone_task)s
+    Suggested Approach: %(milestone_suggested_approach)s
+    Important Details: %(milestone_important_details)s
+
+    However, it seems that the current subtask is not being completed successfully.
+    Because of the following reason: %(reason)s
+
+    You have the following contextual information that has been gathered up to this point.
+    This information MIGHT help you adjust the plan:
+    %(summary)s
+
+    ## Task
+    As a strategic manager, you must reflect on the failed subtask and decide on the necessary adjustments. Consider the following:
+
+    1. Analyze the reason for failure and determine if the suggested approach or important details need modification.
+    2. Decide if the failed subtask should be split into smaller, more manageable tasks.
+    3. Consider if new plan need to be added to address any gaps in the plan.
+    4. Update the remaining plan to ensure the overall plan remains feasible and effective.
+
+    You MUST NOT change the task you were given.
+
+    You MUST make changes to the current subtask or to the ones AFTER. In NO case you can change the ones BEFORE.
+    Generate ONLY a list of JSONs. Do NOT generate any markdown or comments.
+    """
+
+
+def get_initial_prompt(task: str) -> str:
+    """Gets the prompt for the planner agent.
+
+    Formatted with the most recent action-observation pairs, current task, and hint based on last action
+
+    Parameters:
+    - state (State): The state of the current agent
+
+    Returns: with historical values
+    """
+    return (general_description + initial_prompt) % {
+        'task': task,
+    }
+
+
+def adjust_milestones(
+    milestones: List[Dict],
+    subtask: Dict[str, str],
+    reason: str,
+    summary: str,
+    task: str,
+) -> str:
+    """Adjusts the milestones based on a failed subtask and its reason.
+
+    Parameters:
+    - milestones (List[Dict]): The current list of milestones.
+    - subtask (Dict): The subtask that was not completed successfully.
+    - reason (str): The reason provided for the failure.
+    - summary (str): A summary of everything up to this point.
+    - task (str): The user's task.
+
+    Returns: A prompt for the strategic manager agent to self-reflect and adjust the milestones.
+    """
+    # Extract values from the subtask dictionary
+    milestone_task = subtask['task']
+    milestone_suggested_approach = subtask['suggested_approach']
+    milestone_important_details = subtask['important_details']
+
+    # Use the extracted values in the string formatting
+    return (general_description + adjustment_prompt) % {
+        'milestones': json.dumps(milestones),
+        'reason': reason,
+        'summary': summary,
+        'task': task,
+        'milestone_task': milestone_task,
+        'milestone_suggested_approach': milestone_suggested_approach,
+        'milestone_important_details': milestone_important_details,
+    }