Output handler max iterations (#61)

* add max_iterations parameter for output_handler * add tests for output handler max iterations * add max_iterations params for CoderOutputHandler in Advanced output handling.ipynb * add MockTool for agent tests * replace DuckDuckGoSearchRun to MockTool * update OutputHandlerMaxIterationsExceeded * minor fixes * fix tests --------- Co-authored-by: User <[email protected]> Co-authored-by: whimo <[email protected]>
ShoggothAI · Jul 7, 2024 · a4c2fdd · a4c2fdd
1 parent 40cbf99
commit a4c2fdd
Show file tree

Hide file tree

Showing 43 changed files with 173 additions and 60 deletions.
diff --git a/examples/Advanced output handling.ipynb b/examples/Advanced output handling.ipynb
@@ -104,7 +104,7 @@
     "coder = ReActToolCallingAgent(\n",
     "    name=\"coder\",\n",
     "    tools=[PythonREPLTool()],\n",
-    "    output_handler=CoderOutputHandler(),\n",
+    "    output_handler=CoderOutputHandler(max_iterations=3),\n",
     "    verbose=True,\n",
     ")\n",
     "\n",

diff --git a/motleycrew/agents/output_handler.py b/motleycrew/agents/output_handler.py
@@ -5,6 +5,7 @@
 
 from motleycrew.agents.abstract_parent import MotleyAgentAbstractParent
 from motleycrew.common.exceptions import InvalidOutput
+from motleycrew.common import Defaults
 from motleycrew.tools import MotleyTool
 
 
@@ -22,7 +23,16 @@ class MotleyOutputHandler(MotleyTool, ABC):
     _exceptions_to_handle: tuple[Exception] = (InvalidOutput,)
     """Exceptions that should be returned to the agent when raised in the `handle_output` method."""
 
-    def __init__(self):
+    def __init__(self, max_iterations: int = Defaults.DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS):
+        """Initialize the output handler tool.
+
+        Args:
+            max_iterations (int): Maximum number of iterations to run the output handler.
+                If an exception is raised in the `handle_output` method, the output handler will return
+                the exception to the agent unless the number of iterations exceeds `max_iterations`,
+                in which case the output handler will raise OutputHandlerMaxIterationsExceeded.
+        """
+        self.max_iterations = max_iterations
         langchain_tool = self._create_langchain_tool()
         super().__init__(langchain_tool)
 

diff --git a/motleycrew/agents/parent.py b/motleycrew/agents/parent.py
@@ -12,11 +12,12 @@
 
 from motleycrew.agents.abstract_parent import MotleyAgentAbstractParent
 from motleycrew.common import MotleyAgentFactory, MotleySupportedTool
-from motleycrew.common import logger
+from motleycrew.common import logger, Defaults
 from motleycrew.common.exceptions import (
     AgentNotMaterialized,
     CannotModifyMaterializedAgent,
     InvalidOutput,
+    OutputHandlerMaxIterationsExceeded,
 )
 from motleycrew.tools import MotleyTool
 
@@ -131,18 +132,32 @@ def _prepare_output_handler(self) -> Optional[MotleyTool]:
         if isinstance(self.output_handler, MotleyOutputHandler):
             exceptions_to_handle = self.output_handler.exceptions_to_handle
             description = self.output_handler.description
+            max_iterations = self.output_handler.max_iterations
+
         else:
             exceptions_to_handle = (InvalidOutput,)
             description = self.output_handler.description or f"Output handler"
             assert isinstance(description, str)
             description += "\n ONLY RETURN THE FINAL RESULT USING THIS TOOL!"
+            max_iterations = Defaults.DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS
+
+        iteration = 0
 
         def handle_agent_output(*args, **kwargs):
             assert self.output_handler
+            nonlocal iteration
+
             try:
+                iteration += 1
                 output = self.output_handler._run(*args, **kwargs)
             except exceptions_to_handle as exc:
-                return f"{exc.__class__.__name__}: {str(exc)}"
+                if iteration <= max_iterations:
+                    return f"{exc.__class__.__name__}: {str(exc)}"
+                raise OutputHandlerMaxIterationsExceeded(
+                    last_call_args=args,
+                    last_call_kwargs=kwargs,
+                    last_exception=exc,
+                )
 
             raise DirectOutput(output)
 

diff --git a/motleycrew/common/defaults.py b/motleycrew/common/defaults.py
@@ -1,10 +1,11 @@
 """ Module description """
+
 from motleycrew.common import LLMFamily
 from motleycrew.common import GraphStoreType
 
 
 class Defaults:
-    """ Description
+    """Description
 
     Attributes:
         DEFAULT_LLM_FAMILY (str):
@@ -15,8 +16,10 @@ class Defaults:
         MODULE_INSTALL_COMMANDS (dict):
         DEFAULT_NUM_THREADS (int):
         DEFAULT_EVENT_LOOP_SLEEP (int):
+        DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS (int):
 
     """
+
     DEFAULT_LLM_FAMILY = LLMFamily.OPENAI
     DEFAULT_LLM_NAME = "gpt-4o"
     DEFAULT_LLM_TEMPERATURE = 0.0
@@ -35,3 +38,4 @@ class Defaults:
 
     DEFAULT_NUM_THREADS = 4
     DEFAULT_EVENT_LOOP_SLEEP = 1
+    DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS = 5
diff --git a/motleycrew/common/exceptions.py b/motleycrew/common/exceptions.py
@@ -1,6 +1,6 @@
 """ Module description"""
 
-from typing import Any, Optional
+from typing import Any, Dict, Optional
 
 from motleycrew.common import Defaults
 
@@ -142,3 +142,20 @@ class InvalidOutput(Exception):
     """Raised in output handlers when an agent's output is not accepted"""
 
     pass
+
+
+class OutputHandlerMaxIterationsExceeded(BaseException):
+    """Raised when the output handlers iteration limit is exceeded"""
+
+    def __init__(
+        self,
+        last_call_args: tuple,
+        last_call_kwargs: Dict[str, Any],
+        last_exception: Exception,
+    ):
+        self.last_call_args = last_call_args
+        self.last_call_kwargs = last_call_kwargs
+        self.last_exception = last_exception
+
+    def __str__(self):
+        return "Maximum number of output handler iterations exceeded"
diff --git a/.../v1_chat_completions/0e875e7397179704e9c0f59a201287a3d0c56d7b1a692ad20f775c55b97a94f6.pkl b/.../v1_chat_completions/0e875e7397179704e9c0f59a201287a3d0c56d7b1a692ad20f775c55b97a94f6.pkl
diff --git a/.../v1_chat_completions/142f514247583600c43a67e4328369554bc34f77f60fed42015d2bf2aa25bfa8.pkl b/.../v1_chat_completions/142f514247583600c43a67e4328369554bc34f77f60fed42015d2bf2aa25bfa8.pkl
diff --git a/.../v1_chat_completions/194738103260685fcfe14b080193c65baceb5fd00bb449694913f6f0762b0291.pkl b/.../v1_chat_completions/194738103260685fcfe14b080193c65baceb5fd00bb449694913f6f0762b0291.pkl
diff --git a/.../v1_chat_completions/38c587c596ba4a7320df077e92bb9a303734251dd6391f31a0b66c2dbeb0ce44.pkl b/.../v1_chat_completions/38c587c596ba4a7320df077e92bb9a303734251dd6391f31a0b66c2dbeb0ce44.pkl
diff --git a/.../v1_chat_completions/48ad067e4cacaf445989352111199526d44d510296a19e1d7244c43506147de3.pkl b/.../v1_chat_completions/48ad067e4cacaf445989352111199526d44d510296a19e1d7244c43506147de3.pkl
diff --git a/.../v1_chat_completions/49aefe482900e5529f22f21ffea3a217532d894d30ad60d5b902317f841507c7.pkl b/.../v1_chat_completions/49aefe482900e5529f22f21ffea3a217532d894d30ad60d5b902317f841507c7.pkl
diff --git a/.../v1_chat_completions/4e48419bce78705da5cb3ab1ca474e97b76ebf2327c1c3a612b665abda772ddd.pkl b/.../v1_chat_completions/4e48419bce78705da5cb3ab1ca474e97b76ebf2327c1c3a612b665abda772ddd.pkl
diff --git a/.../v1_chat_completions/50a29bae1049bc8ceee0c057f6e4f24bfa07c6c2b35ab4fa5f74d04ca879e6d8.pkl b/.../v1_chat_completions/50a29bae1049bc8ceee0c057f6e4f24bfa07c6c2b35ab4fa5f74d04ca879e6d8.pkl
diff --git a/.../v1_chat_completions/5e047e70f7f9302ccb2769f571a821ea9a897de0ec73acf2bbe540c63ef5d262.pkl b/.../v1_chat_completions/5e047e70f7f9302ccb2769f571a821ea9a897de0ec73acf2bbe540c63ef5d262.pkl
diff --git a/.../v1_chat_completions/a0cc27bc02e7df8d60981c2aacf3d49dbf534a6f999d02431db8e8de8145e52b.pkl b/.../v1_chat_completions/a0cc27bc02e7df8d60981c2aacf3d49dbf534a6f999d02431db8e8de8145e52b.pkl
diff --git a/.../v1_chat_completions/b3c179a00e76ca4c0fcf3b0d09bd2be9dc164849323b77cb39acc6f39e9ebae2.pkl b/.../v1_chat_completions/b3c179a00e76ca4c0fcf3b0d09bd2be9dc164849323b77cb39acc6f39e9ebae2.pkl
diff --git a/.../v1_chat_completions/d522c1c5bf62810f60ab6905c9d6ee28c4ae2af7f318ed0d99052f68ae0bba56.pkl b/.../v1_chat_completions/d522c1c5bf62810f60ab6905c9d6ee28c4ae2af7f318ed0d99052f68ae0bba56.pkl
diff --git a/...dd12711def9925eefe6d7163d44e2c052b2bf.pkl → ...2cd68029ac10a7a00045e8c336a82d12dc58a.pkl b/...dd12711def9925eefe6d7163d44e2c052b2bf.pkl → ...2cd68029ac10a7a00045e8c336a82d12dc58a.pkl
diff --git a/.../v1_chat_completions/10b915da79b3af5c79f0fb31f8fbf8ced61cf74463499dd891514cb7e5325e59.pkl b/.../v1_chat_completions/10b915da79b3af5c79f0fb31f8fbf8ced61cf74463499dd891514cb7e5325e59.pkl
diff --git a/.../v1_chat_completions/11b4c38f6b58f022f99691bfb7763a6a727df2b3b42cb6fc9a288a3e0ea4e8ae.pkl b/.../v1_chat_completions/11b4c38f6b58f022f99691bfb7763a6a727df2b3b42cb6fc9a288a3e0ea4e8ae.pkl
diff --git a/.../v1_chat_completions/29b0d013feff29c37c8a2aa14aaa6b8863d0d3feec1899dfa8deb79d2c486df5.pkl b/.../v1_chat_completions/29b0d013feff29c37c8a2aa14aaa6b8863d0d3feec1899dfa8deb79d2c486df5.pkl
diff --git a/.../v1_chat_completions/2af6eae4f7a1596e28cf4bfdf6549c5eb275a3b83e3fac88d02aace6feec4cc9.pkl b/.../v1_chat_completions/2af6eae4f7a1596e28cf4bfdf6549c5eb275a3b83e3fac88d02aace6feec4cc9.pkl
diff --git a/.../v1_chat_completions/2ff106841dfa3e905032e73975a6a34938bb2668d3d76ba7a5801760b4a6eb51.pkl b/.../v1_chat_completions/2ff106841dfa3e905032e73975a6a34938bb2668d3d76ba7a5801760b4a6eb51.pkl
diff --git a/.../v1_chat_completions/3c6f8ea143fba69a0da636c750ccb0d4f9bb21acec233ffda049f5019996c013.pkl b/.../v1_chat_completions/3c6f8ea143fba69a0da636c750ccb0d4f9bb21acec233ffda049f5019996c013.pkl
diff --git a/.../v1_chat_completions/44b802b8c97658a162e84c2a129d622340c7eac1df3843b586011c3e5676ae36.pkl b/.../v1_chat_completions/44b802b8c97658a162e84c2a129d622340c7eac1df3843b586011c3e5676ae36.pkl
diff --git a/.../v1_chat_completions/584d9a8ef900f68c4a9296d9ed420b1fe415b9ee1f7eeb0df2a01ef07254926d.pkl b/.../v1_chat_completions/584d9a8ef900f68c4a9296d9ed420b1fe415b9ee1f7eeb0df2a01ef07254926d.pkl
diff --git a/.../v1_chat_completions/6b29fb659ef95585653a113e3b8f957a0778f471e34b2a5169f154f0b15c20c6.pkl b/.../v1_chat_completions/6b29fb659ef95585653a113e3b8f957a0778f471e34b2a5169f154f0b15c20c6.pkl
diff --git a/.../v1_chat_completions/6d277e5cfca7e0db1e89ac67cd19d71bf7feb77558543cde88b6ee134971b5ad.pkl b/.../v1_chat_completions/6d277e5cfca7e0db1e89ac67cd19d71bf7feb77558543cde88b6ee134971b5ad.pkl
diff --git a/.../v1_chat_completions/7a5fc4da7dfb19a4cd5529f827086d6c119802428bd4e10e2177079d42bc527f.pkl b/.../v1_chat_completions/7a5fc4da7dfb19a4cd5529f827086d6c119802428bd4e10e2177079d42bc527f.pkl
diff --git a/.../v1_chat_completions/89aaf10840541c1f22dca15a2d3bb7af8d5562a929fdaddef8b17c20a427c41e.pkl b/.../v1_chat_completions/89aaf10840541c1f22dca15a2d3bb7af8d5562a929fdaddef8b17c20a427c41e.pkl
diff --git a/.../v1_chat_completions/951b765d6b645e5a18b04c9cda8bf73d8f8c409bd447374df6769a33652c72c3.pkl b/.../v1_chat_completions/951b765d6b645e5a18b04c9cda8bf73d8f8c409bd447374df6769a33652c72c3.pkl
diff --git a/.../v1_chat_completions/9fff94ceca00c1c34ec18766e4cb1551e247e59d6da133f54baccaac90bcf022.pkl b/.../v1_chat_completions/9fff94ceca00c1c34ec18766e4cb1551e247e59d6da133f54baccaac90bcf022.pkl
diff --git a/.../v1_chat_completions/bd0a0eb768ecda41e7ae1fb6fd7d574c05c61fd143a9059573815c38d2c9b1e7.pkl b/.../v1_chat_completions/bd0a0eb768ecda41e7ae1fb6fd7d574c05c61fd143a9059573815c38d2c9b1e7.pkl
diff --git a/.../v1_chat_completions/ccb7751e1332467bf3d3b6a720d72a347b8777184ddce0c28e4a7afff04003cb.pkl b/.../v1_chat_completions/ccb7751e1332467bf3d3b6a720d72a347b8777184ddce0c28e4a7afff04003cb.pkl
diff --git a/.../v1_chat_completions/e6390e10ab4a8252870620496b378e00862205c5ee5a29acebfdc33db34a988e.pkl b/.../v1_chat_completions/e6390e10ab4a8252870620496b378e00862205c5ee5a29acebfdc33db34a988e.pkl
diff --git a/.../v1_chat_completions/f03da7ec0c7359e684e8a9fc9c46d335c0d11100ea9402706ab0bf75bfca3eb0.pkl b/.../v1_chat_completions/f03da7ec0c7359e684e8a9fc9c46d335c0d11100ea9402706ab0bf75bfca3eb0.pkl
diff --git a/.../v1_chat_completions/f14562827ad1e3aa08d2cf536c24c17e25412399554b0555d3aa0b35f08eab3d.pkl b/.../v1_chat_completions/f14562827ad1e3aa08d2cf536c24c17e25412399554b0555d3aa0b35f08eab3d.pkl
diff --git a/tests/itest_golden_data/advanced_output_handling_ipynb.json b/tests/itest_golden_data/advanced_output_handling_ipynb.json
@@ -1 +1 @@
-"def bubble_sort(arr):\n    n = len(arr)\n    for i in range(n):\n        swapped = False\n        for j in range(0, n-i-1):\n            if arr[j] > arr[j+1]:\n                arr[j], arr[j+1] = arr[j+1], arr[j]\n                swapped = True\n        if not swapped:\n            break\n    return arr\n\n# Test the bubble sort function\nsample_array = [64, 34, 25, 12, 22, 11, 90]\nsorted_array = bubble_sort(sample_array)\nprint(sorted_array)\n\nThe `bubble_sort` function sorts an array using the bubble sort algorithm. It works by repeatedly stepping through the list, comparing adjacent elements and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The outer loop runs `n` times, where `n` is the length of the array, and the inner loop runs `n-i-1` times to avoid re-checking the already sorted elements. An optimization is added by using a `swapped` flag to detect if any swaps were made during an iteration. If no swaps were made, the array is already sorted, and the algorithm can terminate early. The test case demonstrates the function by sorting a sample array."
+"def bubble_sort(arr):\n    n = len(arr)\n    for i in range(n):\n        swapped = False\n        for j in range(0, n-i-1):\n            if arr[j] > arr[j+1]:\n                arr[j], arr[j+1] = arr[j+1], arr[j]\n                swapped = True\n        if not swapped:\n            break\n    return arr\n\n# Test the bubble sort function\nsample_array = [64, 34, 25, 12, 22, 11, 90]\nsorted_array = bubble_sort(sample_array)\nprint(sorted_array)\n\nThe `bubble_sort` function sorts an array using the bubble sort algorithm. It works by repeatedly stepping through the list, comparing adjacent elements and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The outer loop runs `n` times, where `n` is the length of the array. The inner loop runs `n-i-1` times to avoid re-checking the already sorted elements. An optimization is added by using a `swapped` flag to detect if any swaps were made during an iteration. If no swaps were made, the list is already sorted, and the algorithm can terminate early. The test case demonstrates the function by sorting a sample array."
diff --git a/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json b/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json
@@ -1 +1 @@
-"\\[\n\\begin{aligned}\nx &= \\frac{367}{71} \\\\\ny &= -\\frac{25}{49} \\\\\nx - y &= 2\n\\end{aligned}\n\\]"
+"Agent stopped due to iteration limit or time limit."
diff --git a/tests/test_agents/__init__.py b/tests/test_agents/__init__.py
@@ -0,0 +1,21 @@
+from typing import Type
+from langchain_core.tools import BaseTool
+from langchain_core.pydantic_v1 import BaseModel, Field
+
+
+class MockToolInput(BaseModel):
+    """Input for the MockTool tool."""
+
+    tool_input: str = Field(description="tool_input")
+
+
+class MockTool(BaseTool):
+    """Mock tool for run agent tests"""
+
+    name: str = "mock tool"
+    description: str = "Mock tool for tests"
+
+    args_schema: Type[BaseModel] = MockToolInput
+
+    def _run(self, tool_input: str, *args, **kwargs):
+        return tool_input
diff --git a/tests/test_agents/test_agents.py b/tests/test_agents/test_agents.py
@@ -1,14 +1,14 @@
 import os
 import pytest
 
-from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.prompts.chat import ChatPromptTemplate
 from motleycrew.agents.crewai.crewai_agent import CrewAIMotleyAgent
 from motleycrew.agents.langchain.tool_calling_react import ReActToolCallingAgent
 from motleycrew.agents.llama_index.llama_index_react import ReActLlamaIndexMotleyAgent
 from motleycrew.common.exceptions import AgentNotMaterialized, CannotModifyMaterializedAgent
 from motleycrew.tools.python_repl import create_repl_tool
 from motleycrew.tools.tool import MotleyTool
+from tests.test_agents import MockTool
 
 os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY"
 
@@ -28,7 +28,7 @@ def crewai_agent(self):
             backstory="",
             verbose=True,
             delegation=False,
-            tools=[DuckDuckGoSearchRun()],
+            tools=[MockTool()],
         )
         return agent
 
@@ -38,7 +38,7 @@ def langchain_agent(self):
             name="AI writer agent",
             prompt_prefix="Generate AI-generated content",
             description="AI-generated content",
-            tools=[DuckDuckGoSearchRun()],
+            tools=[MockTool()],
             verbose=True,
         )
         return agent
@@ -48,7 +48,7 @@ def llama_index_agent(self):
         agent = ReActLlamaIndexMotleyAgent(
             prompt_prefix="Uncover cutting-edge developments in AI and data science",
             description="AI researcher",
-            tools=[DuckDuckGoSearchRun()],
+            tools=[MockTool()],
             verbose=True,
         )
         return agent
@@ -65,7 +65,7 @@ def agent(self, request, crewai_agent, langchain_agent, llama_index_agent):
     @pytest.mark.parametrize("agent", test_agents_names, indirect=True)
     def test_add_tools(self, agent):
         assert len(agent.tools) == 1
-        tools = [DuckDuckGoSearchRun()]
+        tools = [MockTool()]
         agent.add_tools(tools)
         assert len(agent.tools) == 1
 

diff --git a/tests/test_agents/test_langchain_output_handler.py b/tests/test_agents/test_langchain_output_handler.py
@@ -1,11 +1,11 @@
 import pytest
-from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.agents import AgentFinish, AgentAction
 
 from motleycrew.agents import MotleyOutputHandler
 from motleycrew.agents.langchain.tool_calling_react import ReActToolCallingAgent
 from motleycrew.agents.parent import DirectOutput
-from motleycrew.common.exceptions import InvalidOutput
+from motleycrew.common.exceptions import InvalidOutput, OutputHandlerMaxIterationsExceeded
+from tests.test_agents import MockTool
 
 invalid_output = "Add more information about AI applications in medicine."
 
@@ -38,10 +38,10 @@ def fake_agent_take_next_step(
 @pytest.fixture
 def agent():
     agent = ReActToolCallingAgent(
-        tools=[DuckDuckGoSearchRun()],
+        tools=[MockTool()],
         verbose=True,
         chat_history=True,
-        output_handler=ReportOutputHandler(),
+        output_handler=ReportOutputHandler(max_iterations=5),
     )
     agent.materialize()
     object.__setattr__(agent._agent, "plan", fake_agent_plan)
@@ -56,6 +56,19 @@ def agent():
     return agent
 
 
+@pytest.fixture
+def run_kwargs(agent):
+    agent_executor = agent.agent.bound.bound.steps[1].bound
+
+    run_kwargs = {
+        "name_to_tool_map": {tool.name: tool for tool in agent_executor.tools},
+        "color_mapping": {},
+        "inputs": {},
+        "intermediate_steps": [],
+    }
+    return run_kwargs
+
+
 def test_agent_plan(agent):
     agent_executor = agent.agent
     agent_action = AgentAction("tool", "tool_input", "tool_log")
@@ -71,15 +84,7 @@ def test_agent_plan(agent):
     assert step.tool_input == "test_output"
 
 
-def test_agent_take_next_step(agent):
-    agent_executor = agent.agent.bound.bound.steps[1].bound
-
-    run_kwargs = {
-        "name_to_tool_map": {tool.name: tool for tool in agent_executor.tools},
-        "color_mapping": {},
-        "inputs": {},
-        "intermediate_steps": [],
-    }
+def test_agent_take_next_step(agent, run_kwargs):
 
     # test wrong output
     input_data = "Latest advancements in AI in 2024."
@@ -95,3 +100,14 @@ def test_agent_take_next_step(agent):
     assert isinstance(step_result.return_values, dict)
     output_result = step_result.return_values.get("output")
     assert output_result == {"checked_output": input_data}
+
+
+def test_output_handler_max_iteration(agent, run_kwargs):
+    input_data = "Latest advancements in AI in 2024."
+    run_kwargs["inputs"] = input_data
+
+    with pytest.raises(OutputHandlerMaxIterationsExceeded):
+        for iteration in range(agent.output_handler.max_iterations + 1):
+            agent.agent._take_next_step(**run_kwargs)
+
+    assert iteration == agent.output_handler.max_iterations