diff --git a/examples/Advanced output handling.ipynb b/examples/Advanced output handling.ipynb
index 20068a79..43bb386f 100644
--- a/examples/Advanced output handling.ipynb	
+++ b/examples/Advanced output handling.ipynb	
@@ -33,7 +33,7 @@
     "\n",
     "from motleycrew.tools import PythonREPLTool\n",
     "from motleycrew.tools import LLMTool\n",
-    "from motleycrew.agents import MotleyOutputHandler"
+    "from motleycrew.tools import MotleyTool"
    ]
   },
   {
@@ -42,7 +42,9 @@
    "source": [
     "In the [validating agent output](validating_agent_output.html) example, we used a simple tool to receive and check the output.\n",
     "\n",
-    "Here we want to have access to the agent input, so we subclass a special `MotleyOutputHandler` class and implement its `handle_output` method. Inside it, we'll call an LLM critic that will assess the agent's output."
+    "Here we want to have access to the agent input, so we subclass the `MotleyTool` class and implement its `run` method. Inside it, we'll call an LLM critic that will assess the agent's output.\n",
+    "\n",
+    "`MotleyTool` has the `exceptions_to_reflect` argument, which allows us to reflect exceptions from the tool back to the agent. This is useful for providing feedback on why the tool failed and how to fix it. By default, it reflects `InvalidOutput` exceptions, but you can pass other exceptions to it as well, even the `Exception` class for reflecting any exception."
    ]
   },
   {
@@ -51,8 +53,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "class CoderOutputHandler(MotleyOutputHandler):\n",
-    "    def handle_output(self, code: str, comment: str):\n",
+    "class CoderOutputHandler(MotleyTool):\n",
+    "    def __init__(self):\n",
+    "        super().__init__(\n",
+    "            name=\"coder_output_handler\",\n",
+    "            description=\"Output handler. ONLY RETURN THE FINAL RESULT USING THIS TOOL!\",\n",
+    "            return_direct=True,\n",
+    "            exceptions_to_reflect=[InvalidOutput, ValueError],\n",
+    "        )  # args_schema is inferred automatically from the run method signature, but you can specify it explicitly\n",
+    "\n",
+    "    def run(self, code: str, comment: str):\n",
     "        agent_prompt = self.agent_input[\"prompt\"]\n",
     "        critic = LLMTool(\n",
     "            name=\"critic\",\n",
@@ -81,33 +91,25 @@
    "source": [
     "In case the critic is not satisfied, the `InvalidOutput` exception will be returned to the agent, as if a regular tool was called. If everything is OK, we just return the agent's output.\n",
     "\n",
-    "The MotleyOutputHandler constructor has a `max_iterations` argument. It's the maximum number of times the exception will be returned to the agent. If the agent doesn't provide a valid output after `max_iterations`, a `OutputHandlerMaxIterationsExceeded` exception will be raised. It has `last_call_args` and `last_call_kwargs` attributes that store the arguments that the agent used in the last call. Also, it stores the last exception that was raised in the `last_exception` attribute.\n",
+    "The ReActToolCallingMotleyAgent constructor has a `max_iterations` argument, as well as most other agents. It's the maximum number of times the agent will be allowed to call tools. If the agent doesn't provide a valid output after `max_iterations`, an exception will be raised. To avoid this, you can add an iteration counter to the critic's state.\n",
     "\n",
-    "The output schema is inferred automatically from the `handle_output` method signature. You can also specify it directly using the `_args_schema` property (see below)."
+    "The output schema is inferred automatically from the `run` method signature. You can also specify it directly using the `args_schema` argument."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-07-19 16:55:50,737 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[typing.Any], will use JSON string\n",
-      "2024-07-19 16:55:50,741 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[dict[str, typing.Any]], will use JSON string\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "crew = MotleyCrew()\n",
     "\n",
     "coder = ReActToolCallingMotleyAgent(\n",
     "    name=\"coder\",\n",
-    "    tools=[PythonREPLTool()],\n",
-    "    output_handler=CoderOutputHandler(max_iterations=3),\n",
+    "    tools=[PythonREPLTool(), CoderOutputHandler()],\n",
+    "    force_output_handler=True,\n",
     "    verbose=True,\n",
+    "    max_iterations=15,\n",
     ")\n",
     "\n",
     "task = SimpleTask(\n",
@@ -125,14 +127,6 @@
    "execution_count": 4,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-07-19 16:55:50,774 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[typing.Any], will use JSON string\n",
-      "2024-07-19 16:55:50,775 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[dict[str, typing.Any]], will use JSON string\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -155,7 +149,7 @@
      "text": [
       "\u001b[32;1m\u001b[1;3m\n",
       "Invoking: `python_repl` with `{'command': 'def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1, arr[j]]\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)'}`\n",
-      "responded: Thought: The next step is to write the Python code for the bubble sort algorithm. After writing the code, I will test it using the REPL tool to ensure it works correctly. Finally, I will return the code along with an explanation of how it works.\n",
+      "responded: Thought: The next step is to write the Python code for the bubble sort algorithm. After writing the code, I will test it using the REPL tool to ensure it works correctly. Finally, I will return the code along with a comment explaining how it works.\n",
       "\n",
       "Let's start by writing the bubble sort algorithm and testing it with a sample list.\n",
       "\n",
@@ -163,7 +157,7 @@
       "\n",
       "\u001b[0m\u001b[36;1m\u001b[1;3mTypeError('list indices must be integers or slices, not tuple')\u001b[0m\u001b[32;1m\u001b[1;3m\n",
       "Invoking: `python_repl` with `{'command': 'def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)'}`\n",
-      "responded: Thought: There is a syntax error in the code where I mistakenly used a comma instead of a closing parenthesis in the swap operation. I need to correct this error and re-run the code to test the bubble sort implementation.\n",
+      "responded: Thought: There is a syntax error in the code where I mistakenly used a comma instead of a closing parenthesis when swapping elements. I will correct this error and re-run the code to test the bubble sort implementation.\n",
       "\n",
       "Let's fix the code and test it again.\n",
       "\n",
@@ -171,18 +165,24 @@
       "\n",
       "\u001b[0m\u001b[36;1m\u001b[1;3m[11, 12, 22, 25, 34, 64, 90]\n",
       "\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `output_handler` with `{'code': 'def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list.\"}`\n",
-      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested. The sorted list `[11, 12, 22, 25, 34, 64, 90]` confirms that the algorithm works correctly. Now, I will provide the code along with an explanation of how it works.\n",
+      "Invoking: `output_handler` with `{'code': 'def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop.\"}`\n",
+      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested. The code works as expected, sorting the sample list in ascending order. Now, I will return the code along with a comment explaining how it works.\n",
+      "\n",
+      "Let's proceed to return the final result.\n",
       "\n",
       "\n",
       "\n",
       "\u001b[0mCritic response:  The code and the comment are generally well-written and understandable. However, there are a few improvements that can be made for clarity and efficiency:\n",
       "\n",
-      "1. **Early Exit Optimization**: The current implementation always runs `n` iterations of the outer loop, even if the list becomes sorted before that. Adding a flag to detect if any swaps were made in an iteration can help exit early if the list is already sorted.\n",
+      "1. **Code Improvement:**\n",
+      "   - Add a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and we can break out of the loop early.\n",
+      "   - Use more descriptive variable names for better readability.\n",
       "\n",
-      "2. **Comment Clarity**: The comment is clear, but it can be slightly improved by mentioning the early exit optimization if implemented.\n",
+      "2. **Comment Improvement:**\n",
+      "   - Explain the optimization with the flag.\n",
+      "   - Mention the time complexity of the algorithm.\n",
       "\n",
-      "Here is the improved code and comment:\n",
+      "Here is the improved version of the code and comment:\n",
       "\n",
       "```python\n",
       "def bubble_sort(arr):\n",
@@ -204,16 +204,18 @@
       "```\n",
       "\n",
       "Comment:\n",
-      "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted.\n",
+      "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted.\n",
+      "\u001b[33;1m\u001b[1;3mThe code and the comment are generally well-written and understandable. However, there are a few improvements that can be made for clarity and efficiency:\n",
       "\n",
-      "These changes make the code more efficient and the comment more informative.\n",
-      "\u001b[33;1m\u001b[1;3mInvalidOutput: The code and the comment are generally well-written and understandable. However, there are a few improvements that can be made for clarity and efficiency:\n",
+      "1. **Code Improvement:**\n",
+      "   - Add a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and we can break out of the loop early.\n",
+      "   - Use more descriptive variable names for better readability.\n",
       "\n",
-      "1. **Early Exit Optimization**: The current implementation always runs `n` iterations of the outer loop, even if the list becomes sorted before that. Adding a flag to detect if any swaps were made in an iteration can help exit early if the list is already sorted.\n",
+      "2. **Comment Improvement:**\n",
+      "   - Explain the optimization with the flag.\n",
+      "   - Mention the time complexity of the algorithm.\n",
       "\n",
-      "2. **Comment Clarity**: The comment is clear, but it can be slightly improved by mentioning the early exit optimization if implemented.\n",
-      "\n",
-      "Here is the improved code and comment:\n",
+      "Here is the improved version of the code and comment:\n",
       "\n",
       "```python\n",
       "def bubble_sort(arr):\n",
@@ -235,34 +237,45 @@
       "```\n",
       "\n",
       "Comment:\n",
-      "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted.\n",
-      "\n",
-      "These changes make the code more efficient and the comment more informative.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `output_handler` with `{'code': 'def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list.\"}`\n",
-      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested. The sorted list `[11, 12, 22, 25, 34, 64, 90]` confirms that the algorithm works correctly. Now, I will provide the code along with an explanation of how it works.\n",
+      "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `output_handler` with `{'code': 'def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        swapped = False\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n                swapped = True\\n        if not swapped:\\n            break\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted.\"}`\n",
+      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested. The code works as expected, sorting the sample list in ascending order. Now, I will return the code along with a comment explaining how it works.\n",
       "\n",
+      "Let's proceed to return the final result.\n",
       "\n",
       "\n",
-      "\u001b[0mCritic response:  The code and the comment are generally well-written and understandable. However, there are a few improvements that can be made for clarity and efficiency:\n",
       "\n",
-      "1. **Early Exit Optimization**: The current implementation always runs `n` iterations of the outer loop, even if the list becomes sorted before that. Adding a flag to detect if any swaps were made in an iteration can help exit early if the list is already sorted.\n",
+      "\u001b[0mCritic response:  The code and the comment are well-written and clear. However, there are a few minor improvements that can be made for better readability and understanding:\n",
       "\n",
-      "2. **Comment Clarity**: The comment is clear, but it can be slightly improved by mentioning the early exit optimization and explaining the time complexity.\n",
+      "1. **Variable Naming**: Use more descriptive variable names.\n",
+      "2. **Commenting**: Add inline comments to explain key parts of the code.\n",
+      "3. **Docstring**: Add a docstring to the function to describe its purpose and parameters.\n",
       "\n",
-      "Here is the improved code and comment:\n",
+      "Here's the improved version:\n",
       "\n",
       "```python\n",
-      "def bubble_sort(arr):\n",
-      "    n = len(arr)\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "    \"\"\"\n",
+      "    n = len(array)\n",
       "    for i in range(n):\n",
       "        swapped = False\n",
-      "        for j in range(0, n-i-1):\n",
-      "            if arr[j] > arr[j+1]:\n",
-      "                arr[j], arr[j+1] = arr[j+1], arr[j]\n",
+      "        for j in range(0, n - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
       "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
       "        if not swapped:\n",
       "            break\n",
-      "    return arr\n",
+      "    return array\n",
       "\n",
       "# Test the bubble sort function\n",
       "sample_list = [64, 34, 25, 12, 22, 11, 90]\n",
@@ -270,32 +283,38 @@
       "print(sorted_list)\n",
       "```\n",
       "\n",
-      "Comment:\n",
-      "```\n",
-      "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted. The time complexity of bubble sort is O(n^2) in the worst and average cases, but it can be O(n) if the list is already sorted.\n",
-      "```\n",
+      "The comment explaining the code remains the same as it is already clear and comprehensive.\n",
+      "\u001b[33;1m\u001b[1;3mThe code and the comment are well-written and clear. However, there are a few minor improvements that can be made for better readability and understanding:\n",
       "\n",
-      "These changes make the code more efficient and the comment more informative.\n",
-      "\u001b[33;1m\u001b[1;3mInvalidOutput: The code and the comment are generally well-written and understandable. However, there are a few improvements that can be made for clarity and efficiency:\n",
+      "1. **Variable Naming**: Use more descriptive variable names.\n",
+      "2. **Commenting**: Add inline comments to explain key parts of the code.\n",
+      "3. **Docstring**: Add a docstring to the function to describe its purpose and parameters.\n",
       "\n",
-      "1. **Early Exit Optimization**: The current implementation always runs `n` iterations of the outer loop, even if the list becomes sorted before that. Adding a flag to detect if any swaps were made in an iteration can help exit early if the list is already sorted.\n",
+      "Here's the improved version:\n",
       "\n",
-      "2. **Comment Clarity**: The comment is clear, but it can be slightly improved by mentioning the early exit optimization and explaining the time complexity.\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
       "\n",
-      "Here is the improved code and comment:\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
       "\n",
-      "```python\n",
-      "def bubble_sort(arr):\n",
-      "    n = len(arr)\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "    \"\"\"\n",
+      "    n = len(array)\n",
       "    for i in range(n):\n",
       "        swapped = False\n",
-      "        for j in range(0, n-i-1):\n",
-      "            if arr[j] > arr[j+1]:\n",
-      "                arr[j], arr[j+1] = arr[j+1], arr[j]\n",
+      "        for j in range(0, n - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
       "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
       "        if not swapped:\n",
       "            break\n",
-      "    return arr\n",
+      "    return array\n",
       "\n",
       "# Test the bubble sort function\n",
       "sample_list = [64, 34, 25, 12, 22, 11, 90]\n",
@@ -303,33 +322,548 @@
       "print(sorted_list)\n",
       "```\n",
       "\n",
-      "Comment:\n",
+      "The comment explaining the code remains the same as it is already clear and comprehensive.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `output_handler` with `{'code': 'def bubble_sort(array):\\n    \"\"\"\\n    Sorts an array using the bubble sort algorithm.\\n\\n    Parameters:\\n    array (list): The list of elements to be sorted.\\n\\n    Returns:\\n    list: The sorted list.\\n    \"\"\"\\n    n = len(array)\\n    for i in range(n):\\n        swapped = False\\n        for j in range(0, n - i - 1):\\n            if array[j] > array[j + 1]:\\n                # Swap if the element found is greater than the next element\\n                array[j], array[j + 1] = array[j + 1], array[j]\\n                swapped = True\\n        # If no elements were swapped, the array is already sorted\\n        if not swapped:\\n            break\\n    return array\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted.\"}`\n",
+      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested. The code works as expected, sorting the sample list in ascending order. Now, I will return the code along with a comment explaining how it works.\n",
+      "\n",
+      "Let's proceed to return the final result.\n",
+      "\n",
+      "\n",
+      "\n",
+      "\u001b[0mCritic response:  The code and the comment are well-written and clear. However, there are a few minor improvements that can be made to enhance readability and efficiency:\n",
+      "\n",
+      "1. **Docstring Enhancement**: The docstring can be expanded to include a brief description of the algorithm's complexity.\n",
+      "2. **Variable Naming**: The variable `n` can be renamed to `length` for better readability.\n",
+      "3. **Testing**: The test case can be expanded to include more diverse examples, including edge cases like an empty list and a list with one element.\n",
+      "\n",
+      "Here is the improved version:\n",
+      "\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "    \"\"\"\n",
+      "    length = len(array)\n",
+      "    for i in range(length):\n",
+      "        swapped = False\n",
+      "        for j in range(0, length - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: {sorted_list}\")\n",
+      "```\n",
+      "\n",
+      "### Explanation of Improvements:\n",
+      "1. **Docstring Enhancement**: Added a brief description of the algorithm's time complexity.\n",
+      "2. **Variable Naming**: Changed `n` to `length` for better readability.\n",
+      "3. **Testing**: Added multiple test cases to cover different scenarios, including an empty list, a single-element list, a list with duplicate elements, and a list that is already sorted.\n",
+      "\n",
+      "These changes make the code more readable and the testing more comprehensive.\n",
+      "\u001b[33;1m\u001b[1;3mThe code and the comment are well-written and clear. However, there are a few minor improvements that can be made to enhance readability and efficiency:\n",
+      "\n",
+      "1. **Docstring Enhancement**: The docstring can be expanded to include a brief description of the algorithm's complexity.\n",
+      "2. **Variable Naming**: The variable `n` can be renamed to `length` for better readability.\n",
+      "3. **Testing**: The test case can be expanded to include more diverse examples, including edge cases like an empty list and a list with one element.\n",
+      "\n",
+      "Here is the improved version:\n",
+      "\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "    \"\"\"\n",
+      "    length = len(array)\n",
+      "    for i in range(length):\n",
+      "        swapped = False\n",
+      "        for j in range(0, length - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: {sorted_list}\")\n",
       "```\n",
-      "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted. The time complexity of bubble sort is O(n^2) in the worst and average cases, but it can be O(n) if the list is already sorted.\n",
+      "\n",
+      "### Explanation of Improvements:\n",
+      "1. **Docstring Enhancement**: Added a brief description of the algorithm's time complexity.\n",
+      "2. **Variable Naming**: Changed `n` to `length` for better readability.\n",
+      "3. **Testing**: Added multiple test cases to cover different scenarios, including an empty list, a single-element list, a list with duplicate elements, and a list that is already sorted.\n",
+      "\n",
+      "These changes make the code more readable and the testing more comprehensive.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `output_handler` with `{'code': 'def bubble_sort(array):\\n    \"\"\"\\n    Sorts an array using the bubble sort algorithm.\\n\\n    Parameters:\\n    array (list): The list of elements to be sorted.\\n\\n    Returns:\\n    list: The sorted list.\\n\\n    Time Complexity:\\n    - Worst and Average Case: O(n^2)\\n    - Best Case: O(n) when the list is already sorted\\n    \"\"\"\\n    length = len(array)\\n    for i in range(length):\\n        swapped = False\\n        for j in range(0, length - i - 1):\\n            if array[j] > array[j + 1]:\\n                # Swap if the element found is greater than the next element\\n                array[j], array[j + 1] = array[j + 1], array[j]\\n                swapped = True\\n        # If no elements were swapped, the array is already sorted\\n        if not swapped:\\n            break\\n    return array\\n\\n# Test the bubble sort function with various cases\\ntest_cases = [\\n    [64, 34, 25, 12, 22, 11, 90],\\n    [],\\n    [1],\\n    [2, 1],\\n    [3, 3, 3],\\n    [5, 1, 4, 2, 8]\\n]\\n\\nfor i, sample_list in enumerate(test_cases):\\n    sorted_list = bubble_sort(sample_list.copy())\\n    print(f\"Test case {i + 1}: {sorted_list}\")', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted.\"}`\n",
+      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested with various cases. The code works as expected, sorting the sample lists in ascending order. Now, I will return the final version of the code along with a comment explaining how it works.\n",
+      "\n",
+      "Let's proceed to return the final result.\n",
+      "\n",
+      "\n",
+      "\n",
+      "\u001b[0mCritic response:  The code and the comment are well-written and clear. However, there are a few minor improvements that could be made to enhance readability and efficiency:\n",
+      "\n",
+      "1. **Docstring Enhancement**: The docstring is already good, but adding a brief description of the algorithm at the beginning can make it even more informative.\n",
+      "2. **Variable Naming**: The variable `length` can be renamed to `n` to make it more concise, as it is commonly used to denote the length of a list in algorithms.\n",
+      "3. **Test Cases**: The test cases are good, but adding a few more edge cases, such as a list with negative numbers or a list with all elements in descending order, can make the testing more comprehensive.\n",
+      "4. **Function Return Type**: The return type in the docstring can be specified as `list` to `List[int]` for better type hinting.\n",
+      "\n",
+      "Here is the improved version of the code:\n",
+      "\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\n",
+      "    comparing each pair of adjacent items and swapping them if they are in the wrong order.\n",
+      "    This process is repeated until the list is sorted. The algorithm gets its name because\n",
+      "    smaller elements 'bubble' to the top of the list.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "    \"\"\"\n",
+      "    n = len(array)\n",
+      "    for i in range(n):\n",
+      "        swapped = False\n",
+      "        for j in range(0, n - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8],\n",
+      "    [-1, -3, -2, -5, -4],\n",
+      "    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: {sorted_list}\")\n",
       "```\n",
       "\n",
-      "These changes make the code more efficient and the comment more informative.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `output_handler` with `{'code': 'def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        swapped = False\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n                swapped = True\\n        if not swapped:\\n            break\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted. The time complexity of bubble sort is O(n^2) in the worst and average cases, but it can be O(n) if the list is already sorted.\"}`\n",
-      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested. The sorted list `[11, 12, 22, 25, 34, 64, 90]` confirms that the algorithm works correctly. Now, I will provide the code along with an explanation of how it works, including the early exit optimization and time complexity details.\n",
+      "These changes make the code slightly more readable and the testing more robust.\n",
+      "\u001b[33;1m\u001b[1;3mThe code and the comment are well-written and clear. However, there are a few minor improvements that could be made to enhance readability and efficiency:\n",
       "\n",
+      "1. **Docstring Enhancement**: The docstring is already good, but adding a brief description of the algorithm at the beginning can make it even more informative.\n",
+      "2. **Variable Naming**: The variable `length` can be renamed to `n` to make it more concise, as it is commonly used to denote the length of a list in algorithms.\n",
+      "3. **Test Cases**: The test cases are good, but adding a few more edge cases, such as a list with negative numbers or a list with all elements in descending order, can make the testing more comprehensive.\n",
+      "4. **Function Return Type**: The return type in the docstring can be specified as `list` to `List[int]` for better type hinting.\n",
       "\n",
+      "Here is the improved version of the code:\n",
       "\n",
-      "\u001b[0m"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-07-19 16:56:50,731 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[typing.Any], will use JSON string\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Critic response:  OK\n",
-      "\u001b[33;1m\u001b[1;3m('def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        swapped = False\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n                swapped = True\\n        if not swapped:\\n            break\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted. The time complexity of bubble sort is O(n^2) in the worst and average cases, but it can be O(n) if the list is already sorted.\")\u001b[0m\u001b[32;1m\u001b[1;3m('def bubble_sort(arr):\\n    n = len(arr)\\n    for i in range(n):\\n        swapped = False\\n        for j in range(0, n-i-1):\\n            if arr[j] > arr[j+1]:\\n                arr[j], arr[j+1] = arr[j+1], arr[j]\\n                swapped = True\\n        if not swapped:\\n            break\\n    return arr\\n\\n# Test the bubble sort function\\nsample_list = [64, 34, 25, 12, 22, 11, 90]\\nsorted_list = bubble_sort(sample_list)\\nprint(sorted_list)', \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted. The time complexity of bubble sort is O(n^2) in the worst and average cases, but it can be O(n) if the list is already sorted.\")\u001b[0m\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\n",
+      "    comparing each pair of adjacent items and swapping them if they are in the wrong order.\n",
+      "    This process is repeated until the list is sorted. The algorithm gets its name because\n",
+      "    smaller elements 'bubble' to the top of the list.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "    \"\"\"\n",
+      "    n = len(array)\n",
+      "    for i in range(n):\n",
+      "        swapped = False\n",
+      "        for j in range(0, n - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8],\n",
+      "    [-1, -3, -2, -5, -4],\n",
+      "    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: {sorted_list}\")\n",
+      "```\n",
+      "\n",
+      "These changes make the code slightly more readable and the testing more robust.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `output_handler` with `{'code': 'def bubble_sort(array):\\n    \"\"\"\\n    Sorts an array using the bubble sort algorithm.\\n\\n    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\\n    comparing each pair of adjacent items and swapping them if they are in the wrong order.\\n    This process is repeated until the list is sorted. The algorithm gets its name because\\n    smaller elements \\'bubble\\' to the top of the list.\\n\\n    Parameters:\\n    array (list): The list of elements to be sorted.\\n\\n    Returns:\\n    list: The sorted list.\\n\\n    Time Complexity:\\n    - Worst and Average Case: O(n^2)\\n    - Best Case: O(n) when the list is already sorted\\n    \"\"\"\\n    n = len(array)\\n    for i in range(n):\\n        swapped = False\\n        for j in range(0, n - i - 1):\\n            if array[j] > array[j + 1]:\\n                # Swap if the element found is greater than the next element\\n                array[j], array[j + 1] = array[j + 1], array[j]\\n                swapped = True\\n        # If no elements were swapped, the array is already sorted\\n        if not swapped:\\n            break\\n    return array\\n\\n# Test the bubble sort function with various cases\\ntest_cases = [\\n    [64, 34, 25, 12, 22, 11, 90],\\n    [],\\n    [1],\\n    [2, 1],\\n    [3, 3, 3],\\n    [5, 1, 4, 2, 8],\\n    [-1, -3, -2, -5, -4],\\n    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\\n]\\n\\nfor i, sample_list in enumerate(test_cases):\\n    sorted_list = bubble_sort(sample_list.copy())\\n    print(f\"Test case {i + 1}: {sorted_list}\")', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted.\"}`\n",
+      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested with various cases. The code works as expected, sorting the sample lists in ascending order. Now, I will return the final version of the code along with a comment explaining how it works.\n",
+      "\n",
+      "Let's proceed to return the final result.\n",
+      "\n",
+      "\n",
+      "\n",
+      "\u001b[0mCritic response:  The code and the comment are well-written and clear. However, there are a few minor improvements that can be made to enhance readability and efficiency:\n",
+      "\n",
+      "1. **Docstring Enhancement**: The docstring is already quite detailed, but it can be slightly improved for clarity.\n",
+      "2. **Variable Naming**: The variable `n` can be renamed to `length` for better readability.\n",
+      "3. **Test Case Output**: The test case output can be formatted to show both the original and sorted lists for better comparison.\n",
+      "\n",
+      "Here is the improved version:\n",
+      "\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\n",
+      "    comparing each pair of adjacent items and swapping them if they are in the wrong order.\n",
+      "    This process is repeated until the list is sorted. The algorithm gets its name because\n",
+      "    smaller elements 'bubble' to the top of the list.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "    \"\"\"\n",
+      "    length = len(array)\n",
+      "    for i in range(length):\n",
+      "        swapped = False\n",
+      "        for j in range(0, length - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8],\n",
+      "    [-1, -3, -2, -5, -4],\n",
+      "    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")\n",
+      "```\n",
+      "\n",
+      "The comment provided is already clear and concise, so no changes are needed there. The improvements made to the code are minor but enhance readability and the usefulness of the test case output.\n",
+      "\u001b[33;1m\u001b[1;3mThe code and the comment are well-written and clear. However, there are a few minor improvements that can be made to enhance readability and efficiency:\n",
+      "\n",
+      "1. **Docstring Enhancement**: The docstring is already quite detailed, but it can be slightly improved for clarity.\n",
+      "2. **Variable Naming**: The variable `n` can be renamed to `length` for better readability.\n",
+      "3. **Test Case Output**: The test case output can be formatted to show both the original and sorted lists for better comparison.\n",
+      "\n",
+      "Here is the improved version:\n",
+      "\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\n",
+      "    comparing each pair of adjacent items and swapping them if they are in the wrong order.\n",
+      "    This process is repeated until the list is sorted. The algorithm gets its name because\n",
+      "    smaller elements 'bubble' to the top of the list.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "    \"\"\"\n",
+      "    length = len(array)\n",
+      "    for i in range(length):\n",
+      "        swapped = False\n",
+      "        for j in range(0, length - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8],\n",
+      "    [-1, -3, -2, -5, -4],\n",
+      "    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")\n",
+      "```\n",
+      "\n",
+      "The comment provided is already clear and concise, so no changes are needed there. The improvements made to the code are minor but enhance readability and the usefulness of the test case output.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `output_handler` with `{'code': 'def bubble_sort(array):\\n    \"\"\"\\n    Sorts an array using the bubble sort algorithm.\\n\\n    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\\n    comparing each pair of adjacent items and swapping them if they are in the wrong order.\\n    This process is repeated until the list is sorted. The algorithm gets its name because\\n    smaller elements \\'bubble\\' to the top of the list.\\n\\n    Parameters:\\n    array (list): The list of elements to be sorted.\\n\\n    Returns:\\n    list: The sorted list.\\n\\n    Time Complexity:\\n    - Worst and Average Case: O(n^2)\\n    - Best Case: O(n) when the list is already sorted\\n    \"\"\"\\n    length = len(array)\\n    for i in range(length):\\n        swapped = False\\n        for j in range(0, length - i - 1):\\n            if array[j] > array[j + 1]:\\n                # Swap if the element found is greater than the next element\\n                array[j], array[j + 1] = array[j + 1], array[j]\\n                swapped = True\\n        # If no elements were swapped, the array is already sorted\\n        if not swapped:\\n            break\\n    return array\\n\\n# Test the bubble sort function with various cases\\ntest_cases = [\\n    [64, 34, 25, 12, 22, 11, 90],\\n    [],\\n    [1],\\n    [2, 1],\\n    [3, 3, 3],\\n    [5, 1, 4, 2, 8],\\n    [-1, -3, -2, -5, -4],\\n    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\\n]\\n\\nfor i, sample_list in enumerate(test_cases):\\n    sorted_list = bubble_sort(sample_list.copy())\\n    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted.\"}`\n",
+      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested with various cases. The code works as expected, sorting the sample lists in ascending order. Now, I will return the final version of the code along with a comment explaining how it works.\n",
+      "\n",
+      "Let's proceed to return the final result.\n",
+      "\n",
+      "\n",
+      "\n",
+      "\u001b[0mCritic response:  The code and the comment are well-written and clear. They provide a good explanation of how the bubble sort algorithm works, including the optimization with the swapped flag and the time complexity analysis. The test cases cover a variety of scenarios, which is good for verifying the correctness of the implementation.\n",
+      "\n",
+      "However, there are a few minor improvements that could be made:\n",
+      "\n",
+      "1. **Code Formatting and Readability**:\n",
+      "   - The comment inside the function could be slightly more concise.\n",
+      "   - The test cases could be printed in a more readable format.\n",
+      "\n",
+      "2. **Edge Case Handling**:\n",
+      "   - Although the test cases are comprehensive, it might be useful to include a test case with a list of identical elements to ensure the algorithm handles it correctly.\n",
+      "\n",
+      "3. **Function Documentation**:\n",
+      "   - The function's docstring is already quite detailed, but it could include a brief note on the space complexity as well.\n",
+      "\n",
+      "Here is the improved version of the code:\n",
+      "\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\n",
+      "    comparing each pair of adjacent items and swapping them if they are in the wrong order.\n",
+      "    This process is repeated until the list is sorted. The algorithm gets its name because\n",
+      "    smaller elements 'bubble' to the top of the list.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "\n",
+      "    Space Complexity:\n",
+      "    - O(1) as it is an in-place sorting algorithm\n",
+      "    \"\"\"\n",
+      "    length = len(array)\n",
+      "    for i in range(length):\n",
+      "        swapped = False\n",
+      "        for j in range(0, length - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8],\n",
+      "    [-1, -3, -2, -5, -4],\n",
+      "    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],\n",
+      "    [7, 7, 7, 7, 7]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")\n",
+      "```\n",
+      "\n",
+      "The improvements include:\n",
+      "- Adding space complexity information in the docstring.\n",
+      "- Including an additional test case with identical elements.\n",
+      "- Minor formatting changes for better readability.\n",
+      "\n",
+      "Overall, the original code and comment were already quite good, and these suggestions are just minor enhancements.\n",
+      "\u001b[33;1m\u001b[1;3mThe code and the comment are well-written and clear. They provide a good explanation of how the bubble sort algorithm works, including the optimization with the swapped flag and the time complexity analysis. The test cases cover a variety of scenarios, which is good for verifying the correctness of the implementation.\n",
+      "\n",
+      "However, there are a few minor improvements that could be made:\n",
+      "\n",
+      "1. **Code Formatting and Readability**:\n",
+      "   - The comment inside the function could be slightly more concise.\n",
+      "   - The test cases could be printed in a more readable format.\n",
+      "\n",
+      "2. **Edge Case Handling**:\n",
+      "   - Although the test cases are comprehensive, it might be useful to include a test case with a list of identical elements to ensure the algorithm handles it correctly.\n",
+      "\n",
+      "3. **Function Documentation**:\n",
+      "   - The function's docstring is already quite detailed, but it could include a brief note on the space complexity as well.\n",
+      "\n",
+      "Here is the improved version of the code:\n",
+      "\n",
+      "```python\n",
+      "def bubble_sort(array):\n",
+      "    \"\"\"\n",
+      "    Sorts an array using the bubble sort algorithm.\n",
+      "\n",
+      "    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\n",
+      "    comparing each pair of adjacent items and swapping them if they are in the wrong order.\n",
+      "    This process is repeated until the list is sorted. The algorithm gets its name because\n",
+      "    smaller elements 'bubble' to the top of the list.\n",
+      "\n",
+      "    Parameters:\n",
+      "    array (list): The list of elements to be sorted.\n",
+      "\n",
+      "    Returns:\n",
+      "    list: The sorted list.\n",
+      "\n",
+      "    Time Complexity:\n",
+      "    - Worst and Average Case: O(n^2)\n",
+      "    - Best Case: O(n) when the list is already sorted\n",
+      "\n",
+      "    Space Complexity:\n",
+      "    - O(1) as it is an in-place sorting algorithm\n",
+      "    \"\"\"\n",
+      "    length = len(array)\n",
+      "    for i in range(length):\n",
+      "        swapped = False\n",
+      "        for j in range(0, length - i - 1):\n",
+      "            if array[j] > array[j + 1]:\n",
+      "                # Swap if the element found is greater than the next element\n",
+      "                array[j], array[j + 1] = array[j + 1], array[j]\n",
+      "                swapped = True\n",
+      "        # If no elements were swapped, the array is already sorted\n",
+      "        if not swapped:\n",
+      "            break\n",
+      "    return array\n",
+      "\n",
+      "# Test the bubble sort function with various cases\n",
+      "test_cases = [\n",
+      "    [64, 34, 25, 12, 22, 11, 90],\n",
+      "    [],\n",
+      "    [1],\n",
+      "    [2, 1],\n",
+      "    [3, 3, 3],\n",
+      "    [5, 1, 4, 2, 8],\n",
+      "    [-1, -3, -2, -5, -4],\n",
+      "    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],\n",
+      "    [7, 7, 7, 7, 7]\n",
+      "]\n",
+      "\n",
+      "for i, sample_list in enumerate(test_cases):\n",
+      "    sorted_list = bubble_sort(sample_list.copy())\n",
+      "    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")\n",
+      "```\n",
+      "\n",
+      "The improvements include:\n",
+      "- Adding space complexity information in the docstring.\n",
+      "- Including an additional test case with identical elements.\n",
+      "- Minor formatting changes for better readability.\n",
+      "\n",
+      "Overall, the original code and comment were already quite good, and these suggestions are just minor enhancements.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `output_handler` with `{'code': 'def bubble_sort(array):\\n    \"\"\"\\n    Sorts an array using the bubble sort algorithm.\\n\\n    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\\n    comparing each pair of adjacent items and swapping them if they are in the wrong order.\\n    This process is repeated until the list is sorted. The algorithm gets its name because\\n    smaller elements \\'bubble\\' to the top of the list.\\n\\n    Parameters:\\n    array (list): The list of elements to be sorted.\\n\\n    Returns:\\n    list: The sorted list.\\n\\n    Time Complexity:\\n    - Worst and Average Case: O(n^2)\\n    - Best Case: O(n) when the list is already sorted\\n\\n    Space Complexity:\\n    - O(1) as it is an in-place sorting algorithm\\n    \"\"\"\\n    length = len(array)\\n    for i in range(length):\\n        swapped = False\\n        for j in range(0, length - i - 1):\\n            if array[j] > array[j + 1]:\\n                # Swap if the element found is greater than the next element\\n                array[j], array[j + 1] = array[j + 1], array[j]\\n                swapped = True\\n        # If no elements were swapped, the array is already sorted\\n        if not swapped:\\n            break\\n    return array\\n\\n# Test the bubble sort function with various cases\\ntest_cases = [\\n    [64, 34, 25, 12, 22, 11, 90],\\n    [],\\n    [1],\\n    [2, 1],\\n    [3, 3, 3],\\n    [5, 1, 4, 2, 8],\\n    [-1, -3, -2, -5, -4],\\n    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],\\n    [7, 7, 7, 7, 7]\\n]\\n\\nfor i, sample_list in enumerate(test_cases):\\n    sorted_list = bubble_sort(sample_list.copy())\\n    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")', 'comment': \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted. The space complexity is O(1) as it is an in-place sorting algorithm.\"}`\n",
+      "responded: Thought: The bubble sort algorithm has been successfully implemented and tested with various cases. The code works as expected, sorting the sample lists in ascending order. Now, I will return the final version of the code along with a comment explaining how it works.\n",
+      "\n",
+      "Let's proceed to return the final result.\n",
+      "\n",
+      "\n",
+      "\n",
+      "\u001b[0mCritic response:  OK\n",
+      "\u001b[32;1m\u001b[1;3m('def bubble_sort(array):\\n    \"\"\"\\n    Sorts an array using the bubble sort algorithm.\\n\\n    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\\n    comparing each pair of adjacent items and swapping them if they are in the wrong order.\\n    This process is repeated until the list is sorted. The algorithm gets its name because\\n    smaller elements \\'bubble\\' to the top of the list.\\n\\n    Parameters:\\n    array (list): The list of elements to be sorted.\\n\\n    Returns:\\n    list: The sorted list.\\n\\n    Time Complexity:\\n    - Worst and Average Case: O(n^2)\\n    - Best Case: O(n) when the list is already sorted\\n\\n    Space Complexity:\\n    - O(1) as it is an in-place sorting algorithm\\n    \"\"\"\\n    length = len(array)\\n    for i in range(length):\\n        swapped = False\\n        for j in range(0, length - i - 1):\\n            if array[j] > array[j + 1]:\\n                # Swap if the element found is greater than the next element\\n                array[j], array[j + 1] = array[j + 1], array[j]\\n                swapped = True\\n        # If no elements were swapped, the array is already sorted\\n        if not swapped:\\n            break\\n    return array\\n\\n# Test the bubble sort function with various cases\\ntest_cases = [\\n    [64, 34, 25, 12, 22, 11, 90],\\n    [],\\n    [1],\\n    [2, 1],\\n    [3, 3, 3],\\n    [5, 1, 4, 2, 8],\\n    [-1, -3, -2, -5, -4],\\n    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],\\n    [7, 7, 7, 7, 7]\\n]\\n\\nfor i, sample_list in enumerate(test_cases):\\n    sorted_list = bubble_sort(sample_list.copy())\\n    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")', \"The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted. The space complexity is O(1) as it is an in-place sorting algorithm.\")\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
      ]
@@ -429,61 +963,166 @@
        ".output_html .vg { color: #19177C } /* Name.Variable.Global */\n",
        ".output_html .vi { color: #19177C } /* Name.Variable.Instance */\n",
        ".output_html .vm { color: #19177C } /* Name.Variable.Magic */\n",
-       ".output_html .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span><span class=\"k\">def</span> <span class=\"nf\">bubble_sort</span><span class=\"p\">(</span><span class=\"n\">arr</span><span class=\"p\">):</span>\n",
-       "    <span class=\"n\">n</span> <span class=\"o\">=</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"n\">arr</span><span class=\"p\">)</span>\n",
-       "    <span class=\"k\">for</span> <span class=\"n\">i</span> <span class=\"ow\">in</span> <span class=\"nb\">range</span><span class=\"p\">(</span><span class=\"n\">n</span><span class=\"p\">):</span>\n",
+       ".output_html .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span><span class=\"k\">def</span> <span class=\"nf\">bubble_sort</span><span class=\"p\">(</span><span class=\"n\">array</span><span class=\"p\">):</span>\n",
+       "<span class=\"w\">    </span><span class=\"sd\">&quot;&quot;&quot;</span>\n",
+       "<span class=\"sd\">    Sorts an array using the bubble sort algorithm.</span>\n",
+       "\n",
+       "<span class=\"sd\">    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,</span>\n",
+       "<span class=\"sd\">    comparing each pair of adjacent items and swapping them if they are in the wrong order.</span>\n",
+       "<span class=\"sd\">    This process is repeated until the list is sorted. The algorithm gets its name because</span>\n",
+       "<span class=\"sd\">    smaller elements &#39;bubble&#39; to the top of the list.</span>\n",
+       "\n",
+       "<span class=\"sd\">    Parameters:</span>\n",
+       "<span class=\"sd\">    array (list): The list of elements to be sorted.</span>\n",
+       "\n",
+       "<span class=\"sd\">    Returns:</span>\n",
+       "<span class=\"sd\">    list: The sorted list.</span>\n",
+       "\n",
+       "<span class=\"sd\">    Time Complexity:</span>\n",
+       "<span class=\"sd\">    - Worst and Average Case: O(n^2)</span>\n",
+       "<span class=\"sd\">    - Best Case: O(n) when the list is already sorted</span>\n",
+       "\n",
+       "<span class=\"sd\">    Space Complexity:</span>\n",
+       "<span class=\"sd\">    - O(1) as it is an in-place sorting algorithm</span>\n",
+       "<span class=\"sd\">    &quot;&quot;&quot;</span>\n",
+       "    <span class=\"n\">length</span> <span class=\"o\">=</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"n\">array</span><span class=\"p\">)</span>\n",
+       "    <span class=\"k\">for</span> <span class=\"n\">i</span> <span class=\"ow\">in</span> <span class=\"nb\">range</span><span class=\"p\">(</span><span class=\"n\">length</span><span class=\"p\">):</span>\n",
        "        <span class=\"n\">swapped</span> <span class=\"o\">=</span> <span class=\"kc\">False</span>\n",
-       "        <span class=\"k\">for</span> <span class=\"n\">j</span> <span class=\"ow\">in</span> <span class=\"nb\">range</span><span class=\"p\">(</span><span class=\"mi\">0</span><span class=\"p\">,</span> <span class=\"n\">n</span><span class=\"o\">-</span><span class=\"n\">i</span><span class=\"o\">-</span><span class=\"mi\">1</span><span class=\"p\">):</span>\n",
-       "            <span class=\"k\">if</span> <span class=\"n\">arr</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"p\">]</span> <span class=\"o\">&gt;</span> <span class=\"n\">arr</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"o\">+</span><span class=\"mi\">1</span><span class=\"p\">]:</span>\n",
-       "                <span class=\"n\">arr</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"p\">],</span> <span class=\"n\">arr</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"o\">+</span><span class=\"mi\">1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"n\">arr</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"o\">+</span><span class=\"mi\">1</span><span class=\"p\">],</span> <span class=\"n\">arr</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"p\">]</span>\n",
+       "        <span class=\"k\">for</span> <span class=\"n\">j</span> <span class=\"ow\">in</span> <span class=\"nb\">range</span><span class=\"p\">(</span><span class=\"mi\">0</span><span class=\"p\">,</span> <span class=\"n\">length</span> <span class=\"o\">-</span> <span class=\"n\">i</span> <span class=\"o\">-</span> <span class=\"mi\">1</span><span class=\"p\">):</span>\n",
+       "            <span class=\"k\">if</span> <span class=\"n\">array</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"p\">]</span> <span class=\"o\">&gt;</span> <span class=\"n\">array</span><span class=\"p\">[</span><span class=\"n\">j</span> <span class=\"o\">+</span> <span class=\"mi\">1</span><span class=\"p\">]:</span>\n",
+       "                <span class=\"c1\"># Swap if the element found is greater than the next element</span>\n",
+       "                <span class=\"n\">array</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"p\">],</span> <span class=\"n\">array</span><span class=\"p\">[</span><span class=\"n\">j</span> <span class=\"o\">+</span> <span class=\"mi\">1</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"n\">array</span><span class=\"p\">[</span><span class=\"n\">j</span> <span class=\"o\">+</span> <span class=\"mi\">1</span><span class=\"p\">],</span> <span class=\"n\">array</span><span class=\"p\">[</span><span class=\"n\">j</span><span class=\"p\">]</span>\n",
        "                <span class=\"n\">swapped</span> <span class=\"o\">=</span> <span class=\"kc\">True</span>\n",
+       "        <span class=\"c1\"># If no elements were swapped, the array is already sorted</span>\n",
        "        <span class=\"k\">if</span> <span class=\"ow\">not</span> <span class=\"n\">swapped</span><span class=\"p\">:</span>\n",
        "            <span class=\"k\">break</span>\n",
-       "    <span class=\"k\">return</span> <span class=\"n\">arr</span>\n",
+       "    <span class=\"k\">return</span> <span class=\"n\">array</span>\n",
+       "\n",
+       "<span class=\"c1\"># Test the bubble sort function with various cases</span>\n",
+       "<span class=\"n\">test_cases</span> <span class=\"o\">=</span> <span class=\"p\">[</span>\n",
+       "    <span class=\"p\">[</span><span class=\"mi\">64</span><span class=\"p\">,</span> <span class=\"mi\">34</span><span class=\"p\">,</span> <span class=\"mi\">25</span><span class=\"p\">,</span> <span class=\"mi\">12</span><span class=\"p\">,</span> <span class=\"mi\">22</span><span class=\"p\">,</span> <span class=\"mi\">11</span><span class=\"p\">,</span> <span class=\"mi\">90</span><span class=\"p\">],</span>\n",
+       "    <span class=\"p\">[],</span>\n",
+       "    <span class=\"p\">[</span><span class=\"mi\">1</span><span class=\"p\">],</span>\n",
+       "    <span class=\"p\">[</span><span class=\"mi\">2</span><span class=\"p\">,</span> <span class=\"mi\">1</span><span class=\"p\">],</span>\n",
+       "    <span class=\"p\">[</span><span class=\"mi\">3</span><span class=\"p\">,</span> <span class=\"mi\">3</span><span class=\"p\">,</span> <span class=\"mi\">3</span><span class=\"p\">],</span>\n",
+       "    <span class=\"p\">[</span><span class=\"mi\">5</span><span class=\"p\">,</span> <span class=\"mi\">1</span><span class=\"p\">,</span> <span class=\"mi\">4</span><span class=\"p\">,</span> <span class=\"mi\">2</span><span class=\"p\">,</span> <span class=\"mi\">8</span><span class=\"p\">],</span>\n",
+       "    <span class=\"p\">[</span><span class=\"o\">-</span><span class=\"mi\">1</span><span class=\"p\">,</span> <span class=\"o\">-</span><span class=\"mi\">3</span><span class=\"p\">,</span> <span class=\"o\">-</span><span class=\"mi\">2</span><span class=\"p\">,</span> <span class=\"o\">-</span><span class=\"mi\">5</span><span class=\"p\">,</span> <span class=\"o\">-</span><span class=\"mi\">4</span><span class=\"p\">],</span>\n",
+       "    <span class=\"p\">[</span><span class=\"mi\">10</span><span class=\"p\">,</span> <span class=\"mi\">9</span><span class=\"p\">,</span> <span class=\"mi\">8</span><span class=\"p\">,</span> <span class=\"mi\">7</span><span class=\"p\">,</span> <span class=\"mi\">6</span><span class=\"p\">,</span> <span class=\"mi\">5</span><span class=\"p\">,</span> <span class=\"mi\">4</span><span class=\"p\">,</span> <span class=\"mi\">3</span><span class=\"p\">,</span> <span class=\"mi\">2</span><span class=\"p\">,</span> <span class=\"mi\">1</span><span class=\"p\">],</span>\n",
+       "    <span class=\"p\">[</span><span class=\"mi\">7</span><span class=\"p\">,</span> <span class=\"mi\">7</span><span class=\"p\">,</span> <span class=\"mi\">7</span><span class=\"p\">,</span> <span class=\"mi\">7</span><span class=\"p\">,</span> <span class=\"mi\">7</span><span class=\"p\">]</span>\n",
+       "<span class=\"p\">]</span>\n",
        "\n",
-       "<span class=\"c1\"># Test the bubble sort function</span>\n",
-       "<span class=\"n\">sample_list</span> <span class=\"o\">=</span> <span class=\"p\">[</span><span class=\"mi\">64</span><span class=\"p\">,</span> <span class=\"mi\">34</span><span class=\"p\">,</span> <span class=\"mi\">25</span><span class=\"p\">,</span> <span class=\"mi\">12</span><span class=\"p\">,</span> <span class=\"mi\">22</span><span class=\"p\">,</span> <span class=\"mi\">11</span><span class=\"p\">,</span> <span class=\"mi\">90</span><span class=\"p\">]</span>\n",
-       "<span class=\"n\">sorted_list</span> <span class=\"o\">=</span> <span class=\"n\">bubble_sort</span><span class=\"p\">(</span><span class=\"n\">sample_list</span><span class=\"p\">)</span>\n",
-       "<span class=\"nb\">print</span><span class=\"p\">(</span><span class=\"n\">sorted_list</span><span class=\"p\">)</span>\n",
+       "<span class=\"k\">for</span> <span class=\"n\">i</span><span class=\"p\">,</span> <span class=\"n\">sample_list</span> <span class=\"ow\">in</span> <span class=\"nb\">enumerate</span><span class=\"p\">(</span><span class=\"n\">test_cases</span><span class=\"p\">):</span>\n",
+       "    <span class=\"n\">sorted_list</span> <span class=\"o\">=</span> <span class=\"n\">bubble_sort</span><span class=\"p\">(</span><span class=\"n\">sample_list</span><span class=\"o\">.</span><span class=\"n\">copy</span><span class=\"p\">())</span>\n",
+       "    <span class=\"nb\">print</span><span class=\"p\">(</span><span class=\"sa\">f</span><span class=\"s2\">&quot;Test case </span><span class=\"si\">{</span><span class=\"n\">i</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"mi\">1</span><span class=\"si\">}</span><span class=\"s2\">: Original: </span><span class=\"si\">{</span><span class=\"n\">sample_list</span><span class=\"si\">}</span><span class=\"s2\">, Sorted: </span><span class=\"si\">{</span><span class=\"n\">sorted_list</span><span class=\"si\">}</span><span class=\"s2\">&quot;</span><span class=\"p\">)</span>\n",
        "</pre></div>\n"
       ],
       "text/latex": [
        "\\begin{Verbatim}[commandchars=\\\\\\{\\}]\n",
-       "\\PY{k}{def} \\PY{n+nf}{bubble\\PYZus{}sort}\\PY{p}{(}\\PY{n}{arr}\\PY{p}{)}\\PY{p}{:}\n",
-       "    \\PY{n}{n} \\PY{o}{=} \\PY{n+nb}{len}\\PY{p}{(}\\PY{n}{arr}\\PY{p}{)}\n",
-       "    \\PY{k}{for} \\PY{n}{i} \\PY{o+ow}{in} \\PY{n+nb}{range}\\PY{p}{(}\\PY{n}{n}\\PY{p}{)}\\PY{p}{:}\n",
+       "\\PY{k}{def} \\PY{n+nf}{bubble\\PYZus{}sort}\\PY{p}{(}\\PY{n}{array}\\PY{p}{)}\\PY{p}{:}\n",
+       "\\PY{+w}{    }\\PY{l+s+sd}{\\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
+       "\\PY{l+s+sd}{    Sorts an array using the bubble sort algorithm.}\n",
+       "\n",
+       "\\PY{l+s+sd}{    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,}\n",
+       "\\PY{l+s+sd}{    comparing each pair of adjacent items and swapping them if they are in the wrong order.}\n",
+       "\\PY{l+s+sd}{    This process is repeated until the list is sorted. The algorithm gets its name because}\n",
+       "\\PY{l+s+sd}{    smaller elements \\PYZsq{}bubble\\PYZsq{} to the top of the list.}\n",
+       "\n",
+       "\\PY{l+s+sd}{    Parameters:}\n",
+       "\\PY{l+s+sd}{    array (list): The list of elements to be sorted.}\n",
+       "\n",
+       "\\PY{l+s+sd}{    Returns:}\n",
+       "\\PY{l+s+sd}{    list: The sorted list.}\n",
+       "\n",
+       "\\PY{l+s+sd}{    Time Complexity:}\n",
+       "\\PY{l+s+sd}{    \\PYZhy{} Worst and Average Case: O(n\\PYZca{}2)}\n",
+       "\\PY{l+s+sd}{    \\PYZhy{} Best Case: O(n) when the list is already sorted}\n",
+       "\n",
+       "\\PY{l+s+sd}{    Space Complexity:}\n",
+       "\\PY{l+s+sd}{    \\PYZhy{} O(1) as it is an in\\PYZhy{}place sorting algorithm}\n",
+       "\\PY{l+s+sd}{    \\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
+       "    \\PY{n}{length} \\PY{o}{=} \\PY{n+nb}{len}\\PY{p}{(}\\PY{n}{array}\\PY{p}{)}\n",
+       "    \\PY{k}{for} \\PY{n}{i} \\PY{o+ow}{in} \\PY{n+nb}{range}\\PY{p}{(}\\PY{n}{length}\\PY{p}{)}\\PY{p}{:}\n",
        "        \\PY{n}{swapped} \\PY{o}{=} \\PY{k+kc}{False}\n",
-       "        \\PY{k}{for} \\PY{n}{j} \\PY{o+ow}{in} \\PY{n+nb}{range}\\PY{p}{(}\\PY{l+m+mi}{0}\\PY{p}{,} \\PY{n}{n}\\PY{o}{\\PYZhy{}}\\PY{n}{i}\\PY{o}{\\PYZhy{}}\\PY{l+m+mi}{1}\\PY{p}{)}\\PY{p}{:}\n",
-       "            \\PY{k}{if} \\PY{n}{arr}\\PY{p}{[}\\PY{n}{j}\\PY{p}{]} \\PY{o}{\\PYZgt{}} \\PY{n}{arr}\\PY{p}{[}\\PY{n}{j}\\PY{o}{+}\\PY{l+m+mi}{1}\\PY{p}{]}\\PY{p}{:}\n",
-       "                \\PY{n}{arr}\\PY{p}{[}\\PY{n}{j}\\PY{p}{]}\\PY{p}{,} \\PY{n}{arr}\\PY{p}{[}\\PY{n}{j}\\PY{o}{+}\\PY{l+m+mi}{1}\\PY{p}{]} \\PY{o}{=} \\PY{n}{arr}\\PY{p}{[}\\PY{n}{j}\\PY{o}{+}\\PY{l+m+mi}{1}\\PY{p}{]}\\PY{p}{,} \\PY{n}{arr}\\PY{p}{[}\\PY{n}{j}\\PY{p}{]}\n",
+       "        \\PY{k}{for} \\PY{n}{j} \\PY{o+ow}{in} \\PY{n+nb}{range}\\PY{p}{(}\\PY{l+m+mi}{0}\\PY{p}{,} \\PY{n}{length} \\PY{o}{\\PYZhy{}} \\PY{n}{i} \\PY{o}{\\PYZhy{}} \\PY{l+m+mi}{1}\\PY{p}{)}\\PY{p}{:}\n",
+       "            \\PY{k}{if} \\PY{n}{array}\\PY{p}{[}\\PY{n}{j}\\PY{p}{]} \\PY{o}{\\PYZgt{}} \\PY{n}{array}\\PY{p}{[}\\PY{n}{j} \\PY{o}{+} \\PY{l+m+mi}{1}\\PY{p}{]}\\PY{p}{:}\n",
+       "                \\PY{c+c1}{\\PYZsh{} Swap if the element found is greater than the next element}\n",
+       "                \\PY{n}{array}\\PY{p}{[}\\PY{n}{j}\\PY{p}{]}\\PY{p}{,} \\PY{n}{array}\\PY{p}{[}\\PY{n}{j} \\PY{o}{+} \\PY{l+m+mi}{1}\\PY{p}{]} \\PY{o}{=} \\PY{n}{array}\\PY{p}{[}\\PY{n}{j} \\PY{o}{+} \\PY{l+m+mi}{1}\\PY{p}{]}\\PY{p}{,} \\PY{n}{array}\\PY{p}{[}\\PY{n}{j}\\PY{p}{]}\n",
        "                \\PY{n}{swapped} \\PY{o}{=} \\PY{k+kc}{True}\n",
+       "        \\PY{c+c1}{\\PYZsh{} If no elements were swapped, the array is already sorted}\n",
        "        \\PY{k}{if} \\PY{o+ow}{not} \\PY{n}{swapped}\\PY{p}{:}\n",
        "            \\PY{k}{break}\n",
-       "    \\PY{k}{return} \\PY{n}{arr}\n",
+       "    \\PY{k}{return} \\PY{n}{array}\n",
+       "\n",
+       "\\PY{c+c1}{\\PYZsh{} Test the bubble sort function with various cases}\n",
+       "\\PY{n}{test\\PYZus{}cases} \\PY{o}{=} \\PY{p}{[}\n",
+       "    \\PY{p}{[}\\PY{l+m+mi}{64}\\PY{p}{,} \\PY{l+m+mi}{34}\\PY{p}{,} \\PY{l+m+mi}{25}\\PY{p}{,} \\PY{l+m+mi}{12}\\PY{p}{,} \\PY{l+m+mi}{22}\\PY{p}{,} \\PY{l+m+mi}{11}\\PY{p}{,} \\PY{l+m+mi}{90}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{l+m+mi}{1}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{l+m+mi}{2}\\PY{p}{,} \\PY{l+m+mi}{1}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{l+m+mi}{3}\\PY{p}{,} \\PY{l+m+mi}{3}\\PY{p}{,} \\PY{l+m+mi}{3}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{l+m+mi}{5}\\PY{p}{,} \\PY{l+m+mi}{1}\\PY{p}{,} \\PY{l+m+mi}{4}\\PY{p}{,} \\PY{l+m+mi}{2}\\PY{p}{,} \\PY{l+m+mi}{8}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{o}{\\PYZhy{}}\\PY{l+m+mi}{1}\\PY{p}{,} \\PY{o}{\\PYZhy{}}\\PY{l+m+mi}{3}\\PY{p}{,} \\PY{o}{\\PYZhy{}}\\PY{l+m+mi}{2}\\PY{p}{,} \\PY{o}{\\PYZhy{}}\\PY{l+m+mi}{5}\\PY{p}{,} \\PY{o}{\\PYZhy{}}\\PY{l+m+mi}{4}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{l+m+mi}{10}\\PY{p}{,} \\PY{l+m+mi}{9}\\PY{p}{,} \\PY{l+m+mi}{8}\\PY{p}{,} \\PY{l+m+mi}{7}\\PY{p}{,} \\PY{l+m+mi}{6}\\PY{p}{,} \\PY{l+m+mi}{5}\\PY{p}{,} \\PY{l+m+mi}{4}\\PY{p}{,} \\PY{l+m+mi}{3}\\PY{p}{,} \\PY{l+m+mi}{2}\\PY{p}{,} \\PY{l+m+mi}{1}\\PY{p}{]}\\PY{p}{,}\n",
+       "    \\PY{p}{[}\\PY{l+m+mi}{7}\\PY{p}{,} \\PY{l+m+mi}{7}\\PY{p}{,} \\PY{l+m+mi}{7}\\PY{p}{,} \\PY{l+m+mi}{7}\\PY{p}{,} \\PY{l+m+mi}{7}\\PY{p}{]}\n",
+       "\\PY{p}{]}\n",
        "\n",
-       "\\PY{c+c1}{\\PYZsh{} Test the bubble sort function}\n",
-       "\\PY{n}{sample\\PYZus{}list} \\PY{o}{=} \\PY{p}{[}\\PY{l+m+mi}{64}\\PY{p}{,} \\PY{l+m+mi}{34}\\PY{p}{,} \\PY{l+m+mi}{25}\\PY{p}{,} \\PY{l+m+mi}{12}\\PY{p}{,} \\PY{l+m+mi}{22}\\PY{p}{,} \\PY{l+m+mi}{11}\\PY{p}{,} \\PY{l+m+mi}{90}\\PY{p}{]}\n",
-       "\\PY{n}{sorted\\PYZus{}list} \\PY{o}{=} \\PY{n}{bubble\\PYZus{}sort}\\PY{p}{(}\\PY{n}{sample\\PYZus{}list}\\PY{p}{)}\n",
-       "\\PY{n+nb}{print}\\PY{p}{(}\\PY{n}{sorted\\PYZus{}list}\\PY{p}{)}\n",
+       "\\PY{k}{for} \\PY{n}{i}\\PY{p}{,} \\PY{n}{sample\\PYZus{}list} \\PY{o+ow}{in} \\PY{n+nb}{enumerate}\\PY{p}{(}\\PY{n}{test\\PYZus{}cases}\\PY{p}{)}\\PY{p}{:}\n",
+       "    \\PY{n}{sorted\\PYZus{}list} \\PY{o}{=} \\PY{n}{bubble\\PYZus{}sort}\\PY{p}{(}\\PY{n}{sample\\PYZus{}list}\\PY{o}{.}\\PY{n}{copy}\\PY{p}{(}\\PY{p}{)}\\PY{p}{)}\n",
+       "    \\PY{n+nb}{print}\\PY{p}{(}\\PY{l+s+sa}{f}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{Test case }\\PY{l+s+si}{\\PYZob{}}\\PY{n}{i}\\PY{+w}{ }\\PY{o}{+}\\PY{+w}{ }\\PY{l+m+mi}{1}\\PY{l+s+si}{\\PYZcb{}}\\PY{l+s+s2}{: Original: }\\PY{l+s+si}{\\PYZob{}}\\PY{n}{sample\\PYZus{}list}\\PY{l+s+si}{\\PYZcb{}}\\PY{l+s+s2}{, Sorted: }\\PY{l+s+si}{\\PYZob{}}\\PY{n}{sorted\\PYZus{}list}\\PY{l+s+si}{\\PYZcb{}}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{)}\n",
        "\\end{Verbatim}\n"
       ],
       "text/plain": [
-       "def bubble_sort(arr):\n",
-       "    n = len(arr)\n",
-       "    for i in range(n):\n",
+       "def bubble_sort(array):\n",
+       "    \"\"\"\n",
+       "    Sorts an array using the bubble sort algorithm.\n",
+       "\n",
+       "    The bubble sort algorithm works by repeatedly stepping through the list to be sorted,\n",
+       "    comparing each pair of adjacent items and swapping them if they are in the wrong order.\n",
+       "    This process is repeated until the list is sorted. The algorithm gets its name because\n",
+       "    smaller elements 'bubble' to the top of the list.\n",
+       "\n",
+       "    Parameters:\n",
+       "    array (list): The list of elements to be sorted.\n",
+       "\n",
+       "    Returns:\n",
+       "    list: The sorted list.\n",
+       "\n",
+       "    Time Complexity:\n",
+       "    - Worst and Average Case: O(n^2)\n",
+       "    - Best Case: O(n) when the list is already sorted\n",
+       "\n",
+       "    Space Complexity:\n",
+       "    - O(1) as it is an in-place sorting algorithm\n",
+       "    \"\"\"\n",
+       "    length = len(array)\n",
+       "    for i in range(length):\n",
        "        swapped = False\n",
-       "        for j in range(0, n-i-1):\n",
-       "            if arr[j] > arr[j+1]:\n",
-       "                arr[j], arr[j+1] = arr[j+1], arr[j]\n",
+       "        for j in range(0, length - i - 1):\n",
+       "            if array[j] > array[j + 1]:\n",
+       "                # Swap if the element found is greater than the next element\n",
+       "                array[j], array[j + 1] = array[j + 1], array[j]\n",
        "                swapped = True\n",
+       "        # If no elements were swapped, the array is already sorted\n",
        "        if not swapped:\n",
        "            break\n",
-       "    return arr\n",
+       "    return array\n",
+       "\n",
+       "# Test the bubble sort function with various cases\n",
+       "test_cases = [\n",
+       "    [64, 34, 25, 12, 22, 11, 90],\n",
+       "    [],\n",
+       "    [1],\n",
+       "    [2, 1],\n",
+       "    [3, 3, 3],\n",
+       "    [5, 1, 4, 2, 8],\n",
+       "    [-1, -3, -2, -5, -4],\n",
+       "    [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],\n",
+       "    [7, 7, 7, 7, 7]\n",
+       "]\n",
        "\n",
-       "# Test the bubble sort function\n",
-       "sample_list = [64, 34, 25, 12, 22, 11, 90]\n",
-       "sorted_list = bubble_sort(sample_list)\n",
-       "print(sorted_list)"
+       "for i, sample_list in enumerate(test_cases):\n",
+       "    sorted_list = bubble_sort(sample_list.copy())\n",
+       "    print(f\"Test case {i + 1}: Original: {sample_list}, Sorted: {sorted_list}\")"
       ]
      },
      "metadata": {},
@@ -504,7 +1143,7 @@
     {
      "data": {
       "text/markdown": [
-       "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs 'n' times, where 'n' is the length of the list, and the inner loop runs 'n-i-1' times to avoid re-checking the already sorted elements at the end of the list. An optimization is added to exit early if no swaps are made during an iteration, indicating that the list is already sorted. The time complexity of bubble sort is O(n^2) in the worst and average cases, but it can be O(n) if the list is already sorted."
+       "The bubble sort algorithm works by repeatedly stepping through the list to be sorted, comparing each pair of adjacent items and swapping them if they are in the wrong order. This process is repeated until the list is sorted. The algorithm gets its name because smaller elements 'bubble' to the top of the list. The outer loop runs n times, and the inner loop runs n-i-1 times, where n is the length of the list and i is the current iteration of the outer loop. An optimization is added with a flag to detect if any swapping happened in the inner loop. If no swapping happened, the list is already sorted, and the loop can be terminated early. The time complexity of bubble sort is O(n^2) in the worst and average cases, and O(n) in the best case when the list is already sorted. The space complexity is O(1) as it is an in-place sorting algorithm."
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -526,38 +1165,6 @@
    "source": [
     "final_result = f\"{code}\\n\\n{comment}\""
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Customizing the output handler\n",
-    "\n",
-    "When subclassing `MotleyOutputHandler`, you can override the default name, description and args schema. You can also specify which exceptions will be caught and returned to the agent if raised inside the output handler."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "\n",
-    "class CustomOutputHandlerSchema(BaseModel):\n",
-    "    code = Field(str, description=\"Python code demonstrating bubble sort\")\n",
-    "    comment = Field(str, description=\"Comment explaining how the code works\")\n",
-    "\n",
-    "\n",
-    "class MyCustomOutputHandler(MotleyOutputHandler):\n",
-    "    _name = \"Output handler\"\n",
-    "    _description = \"Output handler. ONLY RETURN THE FINAL RESULT USING THIS TOOL!\"\n",
-    "    _args_schema = CustomOutputHandlerSchema\n",
-    "    _exceptions_to_handle = (InvalidOutput, ValueError)  # ValueErrors will also be returned to the agent\n",
-    "\n",
-    "    def handle_output(self, code: str, comment: str):\n",
-    "        ..."
-   ]
   }
  ],
  "metadata": {
diff --git a/examples/Quickstart.ipynb b/examples/Quickstart.ipynb
index d97db356..f8b5846d 100644
--- a/examples/Quickstart.ipynb
+++ b/examples/Quickstart.ipynb
@@ -30,16 +30,45 @@
     "Motleycrew provides thin wrappers for all the common agent frameworks: Langchain, LlamaIndex, CrewAI, and Autogen (please let us know if you want any others added!).\n",
     "It also provides thin wrappers for Langchain and LlamaIndex tools, allowing you to use any of these tools with any of these agents.\n",
     "\n",
-    "Motleycrew also supports **delegation**: you can simply give any agent as a tool to any other agent. \n",
+    "MotleyCrew also supports **delegation**: you can simply give any agent as a tool to any other agent. \n",
     "\n",
     "All the wrappers for tools and agents implement the Runnable interface, so you can use them as-is in LCEL and Langgraph code.\n",
     "\n",
     "\n",
-    "### Output handlers\n",
+    "### Output handlers (aka return_direct)\n",
     "\n",
-    "An **output handler** is a special tool that the agent uses for submitting the final output instead of returning it raw. Besides defining a schema for the output, the output handler enables you to implement any validation logic inside it, including agent-based. If your agent returns invalid output, you can raise an exception that will be returned to the agent so that it can retry.\n",
+    "An **output handler** is a tool that the agent uses for submitting the final output instead of returning it raw. Besides defining a schema for the output, the output handler enables you to implement any validation logic inside it, including agent-based. If your agent returns invalid output, you can raise an exception that will be returned to the agent so that it can retry.\n",
     "\n",
-    "See our usage examples with a [simple validator](examples/validating_agent_output.html) and an [advanced output handler with multiple fields](examples/advanced_output_handling.html).\n"
+    "Essentially, an output handler is a tool that returns its output directly to the user, thus finishing the agent execution. This behavior is enabled by setting the `return_direct=True` argument for the tool. Unlike other frameworks, MotleyCrew allows to have multiple output handlers for one agent, from which the agent can choose one.\n",
+    "\n",
+    "MotleyCrew also supports **forced output handlers**. This means that the agent will only be able to return output via an output handler, and not directly. This is useful if you want to ensure that the agent only returns output in a specific format.\n",
+    "\n",
+    "See our usage examples with a [simple validator](examples/validating_agent_output.html) and an [advanced output handler with multiple fields](examples/advanced_output_handling.html).\n",
+    "\n",
+    "\n",
+    "### MotleyTool\n",
+    "\n",
+    "A tool in motleycrew, like in other frameworks, is basically a function that takes an input and returns an output.\n",
+    "It is called a tool in the sense that it is usually used by an agent to perform a specific action.\n",
+    "Besides the function itself, a tool also contains an input schema which describes the input format to the LLM.\n",
+    "\n",
+    "`MotleyTool` is the base class for all tools in motleycrew. It is a subclass of `Runnable` that adds some additional features to the tool, along with necessary adapters and converters.\n",
+    "\n",
+    "If you pass a tool from a supported framework (currently Langchain, LlamaIndex, and CrewAI) to a motleycrew agent, it will be automatically converted. If you want to have control over this, e.g. to customize tool params, you can do it manually.\n",
+    "\n",
+    "```python\n",
+    "motley_tool = MotleyTool.from_supported_tool(my_tool)\n",
+    "```\n",
+    "\n",
+    "It is also possible to define a custom tool using the `MotleyTool` base class, overriding the `run` method. This is especially useful if you want to access context such as the caller agent or its last input, which can be useful for validation.\n",
+    "\n",
+    "```python\n",
+    "class MyTool(MotleyTool):\n",
+    "    def run(self, some_input: str) -> str:\n",
+    "        return f\"Received {some_input} from agent {self.agent} with last input {self.agent_input}\"\n",
+    "```\n",
+    "\n",
+    "MotleyTool can reflect exceptions that are raised inside it back to the agent, which can then retry the tool call. You can pass a list of exception classes to the `exceptions_to_reflect` argument in the constructor (or even pass the `Exception` class to reflect everything)."
    ]
   },
   {
diff --git a/examples/Validating agent output.ipynb b/examples/Validating agent output.ipynb
index a31e73a8..dad852a1 100644
--- a/examples/Validating agent output.ipynb	
+++ b/examples/Validating agent output.ipynb	
@@ -18,6 +18,7 @@
    "outputs": [],
    "source": [
     "from langchain_community.tools import DuckDuckGoSearchRun\n",
+    "from langchain_core.tools import StructuredTool\n",
     "\n",
     "from motleycrew import MotleyCrew\n",
     "from motleycrew.agents.langchain import ReActToolCallingMotleyAgent\n",
@@ -26,7 +27,7 @@
     "\n",
     "from motleycrew.common.exceptions import InvalidOutput\n",
     "\n",
-    "from langchain_core.tools import StructuredTool"
+    "from motleycrew.tools import MotleyTool"
    ]
   },
   {
@@ -55,7 +56,10 @@
     "    name=\"output_handler\",\n",
     "    description=\"Output handler\",\n",
     "    func=check_output,\n",
-    ")  # Schema is inferred from the function signature, but you can also specify it explicitly"
+    ")  # Schema is inferred from the function signature, but you can also specify it explicitly\n",
+    "\n",
+    "# To set return_direct=True, we'll convert it into a MotleyTool\n",
+    "output_handler = MotleyTool.from_supported_tool(output_handler, return_direct=True)"
    ]
   },
   {
@@ -69,23 +73,14 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-07-19 17:06:03,116 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[typing.Any], will use JSON string\n",
-      "2024-07-19 17:06:03,120 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[dict[str, typing.Any]], will use JSON string\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "search_tool = DuckDuckGoSearchRun()\n",
     "researcher = ReActToolCallingMotleyAgent(\n",
-    "    tools=[search_tool],\n",
+    "    tools=[search_tool, output_handler],\n",
     "    verbose=True,\n",
     "    chat_history=True,\n",
-    "    output_handler=output_handler,\n",
+    "    force_output_handler=True,  # This will force the agent to use the output handler instead of regular output\n",
     ")\n",
     "\n",
     "crew = MotleyCrew()\n",
@@ -93,8 +88,8 @@
     "    crew=crew,\n",
     "    name=\"produce comprehensive analysis report on AI advancements in 2024\",\n",
     "    description=\"\"\"Conduct a comprehensive analysis of the latest advancements in AI in 2024.\n",
-    "  Identify key trends, breakthrough technologies, and potential industry impacts.\n",
-    "  Your final answer MUST be a full analysis report\"\"\",\n",
+    "Identify key trends, breakthrough technologies, and potential industry impacts.\n",
+    "Perform no more than 3 search queries. Your final answer MUST be a full analysis report\"\"\",\n",
     "    agent=researcher,\n",
     ")"
    ]
@@ -104,14 +99,6 @@
    "execution_count": 4,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-07-19 17:06:03,183 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[typing.Any], will use JSON string\n",
-      "2024-07-19 17:06:03,183 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[dict[str, typing.Any]], will use JSON string\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -121,69 +108,66 @@
       "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
       "\u001b[32;1m\u001b[1;3m\n",
       "Invoking: `duckduckgo_search` with `{'query': 'latest advancements in AI 2024'}`\n",
-      "responded: Thought: To conduct a comprehensive analysis of the latest advancements in AI in 2024, I need to gather information on key trends, breakthrough technologies, and potential industry impacts. I will start by searching for the latest news and reports on AI advancements in 2024.\n",
+      "responded: Thought: To conduct a comprehensive analysis of the latest advancements in AI in 2024, I need to gather information on key trends, breakthrough technologies, and potential industry impacts. I will start by performing a search query to get an overview of the latest advancements in AI in 2024. This will help identify the key areas to focus on for further detailed searches.\n",
+      "\n",
+      "I will perform the first search query to get an overview of the latest advancements in AI in 2024.\n",
       "\n",
-      "I will use the DuckDuckGo search tool to find relevant information.\n",
+      "Calling the tool to perform the search query.\n",
       "\n",
       "\n",
       "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mHere are some important current AI trends to look out for in the coming year. Reality check: more realistic expectations. Multimodal AI. Small (er) language models and open source advancements. GPU shortages and cloud costs. Model optimization is getting more accessible. Customized local models and data pipelines. More powerful virtual agents. In 2024, generative AI might actually become useful for the regular, non-tech person, and we are going to see more people tinkering with a million little AI models. State-of-the-art AI models ... Now. When OpenAI launched a free web app called ChatGPT in November 2022, nobody knew what was coming. But that low-key release changed everything. By January, ChatGPT had become the fastest ... Adobe Stock. It's been a year since OpenAI released ChatGPT, opening the door to seamlessly weave AI into the fabric of our daily lives, propelling industries into the future and even prompting ... 2. Open source AI models. In 2024, open source pretrained AI models gain significant traction, empowering businesses to accelerate growth by combining these models with private or real-time data. This synergy enhances productivity and cost-efficiency. IBM actively contributes to open source AI models, which are exemplified by its collaboration ...\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3mIn 2024, generative AI might actually become useful for the regular, non-tech person, and we are going to see more people tinkering with a million little AI models. State-of-the-art AI models ... The Top 5 Artificial Intelligence (AI) Trends For 2024. Adobe Stock. It's been a year since OpenAI released ChatGPT, opening the door to seamlessly weave AI into the fabric of our daily lives ... Generative AI took the world by storm in 2023. Its future—and ours—will be shaped by what we do next. Microsoft and Google have since moved beyond search to put chatbot-based assistants into ... The most important AI trends in 2024 Tags. Artificial Intelligence 9 February 2024 12 min read. Link copied 2022 was the year that generative artificial intelligence (AI) exploded into the public consciousness, and 2023 was the year it began to take root in the business world. 2024 thus stands to be a pivotal year for the future of AI, as ... Here are the top 10 AI and machine learning trends to prepare for in 2024. 1. Multimodal AI. Multimodal AI goes beyond traditional single-mode data processing to encompass multiple input types, such as text, images and sound -- a step toward mimicking the human ability to process diverse sensory information.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
       "Invoking: `duckduckgo_search` with `{'query': 'key trends in AI 2024'}`\n",
-      "responded: Thought: The initial search has provided some insights into the key trends and advancements in AI for 2024. However, to create a comprehensive analysis report, I need more detailed information on specific breakthrough technologies and their potential industry impacts. I will perform additional searches focusing on these aspects.\n",
+      "responded: Thought: The initial search results provide a general overview of the latest advancements in AI in 2024, highlighting trends such as generative AI, multimodal AI, and the integration of AI into daily life and business. To create a comprehensive analysis, I need to delve deeper into specific key trends, breakthrough technologies, and their potential industry impacts. \n",
       "\n",
-      "I will conduct parallel searches on the following topics:\n",
-      "1. Key trends in AI 2024\n",
-      "2. Breakthrough AI technologies in 2024\n",
-      "3. Potential industry impacts of AI advancements in 2024\n",
+      "Next, I will perform two more focused search queries:\n",
+      "1. Key trends in AI 2024.\n",
+      "2. Breakthrough AI technologies in 2024.\n",
       "\n",
+      "These searches will help gather detailed information to complete the analysis.\n",
       "\n",
+      "Calling the tools to perform the two focused search queries.\n",
       "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mThe most important AI trends in 2024. 2022 was the year that generative artificial intelligence (AI) exploded into the public consciousness, and 2023 was the year it began to take root in the business world. 2024 thus stands to be a pivotal year for the future of AI, as researchers and enterprises seek to establish how this evolutionary leap in ... 5. Data, analytics, and AI leaders are becoming less independent. This past year, we began to notice that increasing numbers of organizations were cutting back on the proliferation of technology and data \"chiefs,\" including chief data and analytics officers (and sometimes chief AI officers). In 2024, generative AI might actually become useful for the regular, non-tech person, and we are going to see more people tinkering with a million little AI models. State-of-the-art AI models ... If 2023 was the year the world discovered generative AI (gen AI), 2024 is the year organizations truly began using—and deriving business value from—this new technology.In the latest McKinsey Global Survey on AI, 65 percent of respondents report that their organizations are regularly using gen AI, nearly double the percentage from our previous survey just ten months ago. This year's trends reflect a deepening sophistication and caution in AI development and deployment strategies, with an eye to ethics, safety and the evolving regulatory landscape. Here are the top 10 AI and machine learning trends to prepare for in 2024. 1. Multimodal AI. Multimodal AI goes beyond traditional single-mode data processing to ...\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `{'query': 'breakthrough AI technologies 2024'}`\n",
-      "responded: Thought: The initial search has provided some insights into the key trends and advancements in AI for 2024. However, to create a comprehensive analysis report, I need more detailed information on specific breakthrough technologies and their potential industry impacts. I will perform additional searches focusing on these aspects.\n",
       "\n",
-      "I will conduct parallel searches on the following topics:\n",
-      "1. Key trends in AI 2024\n",
-      "2. Breakthrough AI technologies in 2024\n",
-      "3. Potential industry impacts of AI advancements in 2024\n",
       "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m5. Data, analytics, and AI leaders are becoming less independent. This past year, we began to notice that increasing numbers of organizations were cutting back on the proliferation of technology and data \"chiefs,\" including chief data and analytics officers (and sometimes chief AI officers). Here are some important current AI trends to look out for in the coming year. Reality check: more realistic expectations. Multimodal AI. Small (er) language models and open source advancements. GPU shortages and cloud costs. Model optimization is getting more accessible. Customized local models and data pipelines. If 2023 was the year the world discovered generative AI (gen AI), 2024 is the year organizations truly began using—and deriving business value from—this new technology.In the latest McKinsey Global Survey on AI, 65 percent of respondents report that their organizations are regularly using gen AI, nearly double the percentage from our previous survey just ten months ago. In 2024, generative AI might actually become useful for the regular, non-tech person, and we are going to see more people tinkering with a million little AI models. State-of-the-art AI models ... The Top 5 Artificial Intelligence (AI) Trends For 2024. Adobe Stock. It's been a year since OpenAI released ChatGPT, opening the door to seamlessly weave AI into the fabric of our daily lives ...\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `duckduckgo_search` with `{'query': 'breakthrough AI technologies 2024'}`\n",
+      "responded: Thought: The initial search results provide a general overview of the latest advancements in AI in 2024, highlighting trends such as generative AI, multimodal AI, and the integration of AI into daily life and business. To create a comprehensive analysis, I need to delve deeper into specific key trends, breakthrough technologies, and their potential industry impacts. \n",
+      "\n",
+      "Next, I will perform two more focused search queries:\n",
+      "1. Key trends in AI 2024.\n",
+      "2. Breakthrough AI technologies in 2024.\n",
       "\n",
+      "These searches will help gather detailed information to complete the analysis.\n",
       "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mGenerative AI tools like ChatGPT reached mass adoption in record time, and reset the course of an entire industry. ... AI for everything: 10 Breakthrough Technologies 2024. 10 Breakthrough Technologies. Every year, the reporters and editors at MIT Technology Review survey the tech landscape and pick 10 technologies that we think have the greatest potential to change ... In 2024, generative AI might actually become useful for the regular, non-tech person, and we are going to see more people tinkering with a million little AI models. State-of-the-art AI models ... 2022 was the year that generative artificial intelligence (AI) exploded into the public consciousness, and 2023 was the year it began to take root in the business world. 2024 thus stands to be a pivotal year for the future of AI, as researchers and enterprises seek to establish how this evolutionary leap in technology can be most practically integrated into our everyday lives. Jun 25, 2024. Top 10 Emerging Technologies of 2024: AI-powered scientific discovery and elastocalorics are among the 10 listed technologies. Image: World Economic Forum. This article is part of: Annual Meeting of the New Champions. The World Economic Forum's Top 10 Emerging Technologies of 2024 report lists this year's most impactful emerging ...\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `{'query': 'potential industry impacts of AI advancements 2024'}`\n",
-      "responded: Thought: The initial search has provided some insights into the key trends and advancements in AI for 2024. However, to create a comprehensive analysis report, I need more detailed information on specific breakthrough technologies and their potential industry impacts. I will perform additional searches focusing on these aspects.\n",
+      "Calling the tools to perform the two focused search queries.\n",
       "\n",
-      "I will conduct parallel searches on the following topics:\n",
-      "1. Key trends in AI 2024\n",
-      "2. Breakthrough AI technologies in 2024\n",
-      "3. Potential industry impacts of AI advancements in 2024\n",
       "\n",
       "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m10 Breakthrough Technologies. Every year, the reporters and editors at MIT Technology Review survey the tech landscape and pick 10 technologies that we think have the greatest potential to change ... Generative AI took the world by storm in 2023. Its future—and ours—will be shaped by what we do next. Microsoft and Google have since moved beyond search to put chatbot-based assistants into ... In 2024, generative AI might actually become useful for the regular, non-tech person, and we are going to see more people tinkering with a million little AI models. State-of-the-art AI models ... AI and emerging technology at Davos 2024: 5 surprising things to know; Impactful AI: How tech leaders and social innovators are advancing AI for social good; Post breakthrough: How AI can lift climate research out of the lab and into the real world In 2024, AI will foster a breakthrough in precision education. The AI Solution An AI system, equipped with advanced natural language processing and machine learning, will be launched to create ...\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `output_handler` with `{'output': '### Comprehensive Analysis of the Latest Advancements in AI in 2024\\n\\n#### Key Trends in AI 2024\\n\\n1. **Generative AI**: Generative AI continues to evolve, becoming more useful for everyday users and businesses. The technology is being integrated into various applications, from creative tools to business solutions, enabling more people to experiment with and benefit from AI models.\\n\\n2. **Multimodal AI**: This trend involves AI systems that can process and integrate multiple types of data inputs, such as text, images, and sound. This advancement mimics human sensory processing and enhances the capabilities of AI in understanding and generating complex data.\\n\\n3. **Smaller Language Models and Open Source Advancements**: There is a shift towards smaller, more efficient language models that are easier to deploy and manage. Open source contributions are accelerating innovation and making advanced AI technologies more accessible.\\n\\n4. **Model Optimization and Customization**: AI models are becoming more optimized and customizable, allowing businesses to tailor AI solutions to their specific needs. This trend is driven by advancements in model optimization techniques and the development of customized local models and data pipelines.\\n\\n5. **Integration into Daily Life and Business**: AI is becoming seamlessly integrated into daily life and business operations. From AI-powered chatbots to advanced analytics, organizations are leveraging AI to drive efficiency, innovation, and value.\\n\\n#### Breakthrough AI Technologies in 2024\\n\\n1. **Generative AI Models**: State-of-the-art generative AI models are being developed and deployed, offering new capabilities in content creation, design, and problem-solving.\\n\\n2. **AI for Precision Education**: AI is making significant strides in the field of education, with systems designed to provide personalized learning experiences and improve educational outcomes.\\n\\n3. **AI in Climate Research**: AI technologies are being used to advance climate research, helping scientists analyze vast amounts of data and develop solutions to address climate change.\\n\\n4. **AI for Social Good**: There is a growing focus on using AI for social good, with tech leaders and social innovators working together to develop AI solutions that address societal challenges.\\n\\n#### Potential Industry Impacts\\n\\n1. **Business and Enterprise**: AI is transforming business operations, from automating routine tasks to providing advanced analytics and insights. Companies are using AI to enhance productivity, drive innovation, and gain a competitive edge.\\n\\n2. **Healthcare**: AI is revolutionizing healthcare by improving diagnostics, personalizing treatment plans, and streamlining administrative processes. AI-powered tools are enhancing patient care and outcomes.\\n\\n3. **Education**: AI is reshaping education by offering personalized learning experiences and improving access to quality education. AI-driven tools are helping educators tailor instruction to individual student needs.\\n\\n4. **Climate and Environment**: AI is playing a crucial role in climate research and environmental conservation. AI technologies are helping researchers analyze data, model climate scenarios, and develop sustainable solutions.\\n\\n5. **Social Impact**: AI is being leveraged to address social issues, from improving public safety to enhancing accessibility for individuals with disabilities. AI-driven initiatives are making a positive impact on communities worldwide.\\n\\nIn conclusion, 2024 is a pivotal year for AI, with significant advancements and trends shaping the future of technology and its applications across various industries. The integration of AI into daily life and business, coupled with breakthrough technologies, is driving innovation and creating new opportunities for growth and development.'}`\n",
+      "responded: Thought: I now have sufficient information to compile a comprehensive analysis of the latest advancements in AI in 2024. The gathered data includes key trends, breakthrough technologies, and potential industry impacts. I will synthesize this information into a detailed report.\n",
       "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mHere are some important current AI trends to look out for in the coming year. Reality check: more realistic expectations. Multimodal AI. Small (er) language models and open source advancements. GPU shortages and cloud costs. Model optimization is getting more accessible. Customized local models and data pipelines. More powerful virtual agents. If 2023 was the year the world discovered generative AI (gen AI), 2024 is the year organizations truly began using—and deriving business value from—this new technology.In the latest McKinsey Global Survey on AI, 65 percent of respondents report that their organizations are regularly using gen AI, nearly double the percentage from our previous survey just ten months ago. In 2024, generative AI might actually become useful for the regular, non-tech person, and we are going to see more people tinkering with a million little AI models. State-of-the-art AI models ... The manufacturing industry is expected to see the largest financial impact due to AI. An Accenture report forecasts that the manufacturing sector will reap the greatest financial benefit from AI ... getty. 2024 will mark a watershed moment for generative artificial intelligence, triggering a metamorphosis across the global economic landscape as businesses wake up to its broad potential ...\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Detected AgentFinish, blocking it to force output via output handler.\n",
-      "\u001b[0m\u001b[38;5;200m\u001b[1;3mYou must call the `output_handler` tool to return the final output.\n",
-      "\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `output_handler` with `{'output': '### Comprehensive Analysis of AI Advancements in 2024\\n\\n#### Key Trends in AI 2024\\n\\n1. **Generative AI Maturation**:\\n   - Generative AI, which gained significant attention in 2023, is expected to become more practical and useful for everyday users in 2024. This includes applications in content creation, customer service, and personalized experiences.\\n   - The adoption of generative AI by businesses is accelerating, with 65% of organizations regularly using it, nearly double the percentage from the previous year.\\n\\n2. **Multimodal AI**:\\n   - Multimodal AI, which processes and integrates data from multiple sources (e.g., text, images, audio), is becoming more prevalent. This allows for more sophisticated and context-aware AI applications.\\n\\n3. **Smaller and Open Source Models**:\\n   - There is a trend towards smaller, more efficient AI models that are easier to deploy and manage. Open source AI models are gaining traction, enabling businesses to customize and optimize AI solutions for their specific needs.\\n\\n4. **Ethics and Regulation**:\\n   - As AI technologies become more integrated into society, there is a growing emphasis on ethical considerations, safety, and regulatory compliance. Organizations are adopting more cautious and responsible AI development and deployment strategies.\\n\\n5. **AI in Data and Analytics**:\\n   - The role of AI in data and analytics is evolving, with a focus on integrating AI capabilities into existing data pipelines and analytics workflows. This trend is driven by the need for more accurate and actionable insights.\\n\\n#### Breakthrough AI Technologies in 2024\\n\\n1. **AI-Powered Scientific Discovery**:\\n   - AI is playing a crucial role in accelerating scientific research and discovery. AI algorithms are being used to analyze vast amounts of data, identify patterns, and generate new hypotheses in fields such as medicine, materials science, and climate research.\\n\\n2. **Elastocalorics**:\\n   - Elastocaloric materials, which can change temperature under mechanical stress, are being explored for their potential in energy-efficient cooling systems. AI is being used to optimize the design and performance of these materials.\\n\\n3. **Advanced Virtual Agents**:\\n   - Virtual agents are becoming more powerful and capable of handling complex interactions. These agents are being used in customer service, healthcare, and education to provide personalized and efficient support.\\n\\n4. **Model Optimization**:\\n   - Techniques for optimizing AI models, such as pruning and quantization, are becoming more accessible. This allows for the deployment of AI models on resource-constrained devices, such as smartphones and IoT devices.\\n\\n#### Potential Industry Impacts\\n\\n1. **Manufacturing**:\\n   - The manufacturing industry is expected to see the largest financial impact from AI. AI-driven automation, predictive maintenance, and quality control are enhancing productivity and reducing costs.\\n\\n2. **Healthcare**:\\n   - AI is transforming healthcare by enabling early diagnosis, personalized treatment plans, and efficient management of healthcare resources. AI-powered tools are improving patient outcomes and reducing the burden on healthcare professionals.\\n\\n3. **Finance**:\\n   - In the finance sector, AI is being used for fraud detection, risk management, and personalized financial services. AI algorithms are enhancing the accuracy and efficiency of financial operations.\\n\\n4. **Retail**:\\n   - AI is revolutionizing the retail industry by enabling personalized shopping experiences, optimizing supply chain management, and enhancing customer service. Retailers are using AI to analyze consumer behavior and predict trends.\\n\\n5. **Energy**:\\n   - AI is playing a key role in optimizing energy production and consumption. AI algorithms are being used to manage smart grids, predict energy demand, and improve the efficiency of renewable energy sources.'}`\n",
-      "responded: Thought: I have compiled a comprehensive analysis of the latest advancements in AI in 2024, including key trends, breakthrough technologies, and potential industry impacts. The next step is to return this final output using the `output_handler` tool.\n",
+      "Calling the tool to provide the final analysis report.\n",
       "\n",
       "\n",
       "\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3mInvalidOutput: Add more information about AI applications in physics.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "\u001b[0m\u001b[33;1m\u001b[1;3mAdd more information about AI applications in physics.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
       "Invoking: `duckduckgo_search` with `{'query': 'AI applications in physics 2024'}`\n",
-      "responded: Thought: To complete the comprehensive analysis, I need to include more information about AI applications in physics. I will search for the latest advancements and applications of AI in the field of physics in 2024.\n",
+      "responded: Thought: To add more information about AI applications in physics, I need to perform a search query specifically focused on the latest advancements and applications of AI in the field of physics in 2024. This will help gather relevant details to enhance the analysis report.\n",
+      "\n",
+      "Calling the tool to perform the search query.\n",
       "\n",
       "\n",
       "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mResearchers used generative AI to develop a physics-informed technique to classify phase transitions in materials or physical systems that is much more efficient than existing machine-learning approaches. The work was led by researchers at MIT and the University of Basel. More information: Julian Arnold et al, Mapping out phase diagrams with generative classifiers, Physical Review Letters (2024). DOI: 10.1103/PhysRevLett.132.207301 . On arXiv (2023): DOI: 10.48550 ... \"This can lead to new innovations and applications in many industries.\" LCLS and SSRL are DOE Office of Science user facilities. This research was supported by the Office of Science. Citations: S. Chitturi et al., npj Computational Materials, 18 July 2024 (10.1038/s41524-024-01326-2) For pharmaceutical applications, the challenge is even greater—with about 10 60 possible drug-like molecules containing only the most basic building blocks (C, H, O, N and S atoms). Our physics-inspired fast and explainable surrogate models will be of great use in those applications, and play a complementary role to other emerging techniques, like foundation models.\" The research was supported by the MIT-IBM Watson AI Lab and the U.S. Army Research Office through the Institute for Soldier Nanotechnologies.\u001b[0m"
+      "\u001b[0m\u001b[36;1m\u001b[1;3mCalculating faster: Coupling AI with fundamental physics. by Stefanie Terp, Technical University of Berlin. Results overview. Credit: Nature Communications (2024). DOI: 10.1038/s41467-024-50620-6 ... Researchers used generative AI to develop a physics-informed technique to classify phase transitions in materials or physical systems that is much more efficient than existing machine-learning approaches. The work was led by researchers at MIT and the University of Basel. Uncovering the mechanisms of physics is driving a new paradigm in artificial intelligence (AI) discovery. Today, physics has enabled us to understand the AI paradigm in a wide range of matter, energy, and space-time scales through data, knowledge, priors, and laws. At the same time, the AI paradigm also draws on and introduces the knowledge and laws of physics to promote its own development ... More information: Julian Arnold et al, Mapping out phase diagrams with generative classifiers, Physical Review Letters (2024). DOI: 10.1103/PhysRevLett.132.207301 . On arXiv (2023): DOI: 10.48550 ... The past five years have seen a lot of progress in the use of artificial intelligence (AI) in theoretical investigations in pure mathematics and theoretical physics. This domain is markedly ...\u001b[0m"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING:langchain_core.callbacks.manager:Error in RootListenersTracer.on_chain_end callback: ValueError()\n",
-      "2024-07-19 17:07:33,737 - motleycrew - WARNING - No known Cypher type matching annotation typing.Optional[typing.Any], will use JSON string\n"
+      "WARNING:langchain_core.callbacks.manager:Error in RootListenersTracer.on_chain_end callback: ValueError(\"Expected str, BaseMessage, List[BaseMessage], or Tuple[BaseMessage]. Got {'checked_output': '### Comprehensive Analysis of the Latest Advancements in AI in 2024\\\\n\\\\n#### Key Trends in AI 2024\\\\n\\\\n1. **Generative AI**: Generative AI continues to evolve, becoming more useful for everyday users and businesses. The technology is being integrated into various applications, from creative tools to business solutions, enabling more people to experiment with and benefit from AI models.\\\\n\\\\n2. **Multimodal AI**: This trend involves AI systems that can process and integrate multiple types of data inputs, such as text, images, and sound. This advancement mimics human sensory processing and enhances the capabilities of AI in understanding and generating complex data.\\\\n\\\\n3. **Smaller Language Models and Open Source Advancements**: There is a shift towards smaller, more efficient language models that are easier to deploy and manage. Open source contributions are accelerating innovation and making advanced AI technologies more accessible.\\\\n\\\\n4. **Model Optimization and Customization**: AI models are becoming more optimized and customizable, allowing businesses to tailor AI solutions to their specific needs. This trend is driven by advancements in model optimization techniques and the development of customized local models and data pipelines.\\\\n\\\\n5. **Integration into Daily Life and Business**: AI is becoming seamlessly integrated into daily life and business operations. From AI-powered chatbots to advanced analytics, organizations are leveraging AI to drive efficiency, innovation, and value.\\\\n\\\\n#### Breakthrough AI Technologies in 2024\\\\n\\\\n1. **Generative AI Models**: State-of-the-art generative AI models are being developed and deployed, offering new capabilities in content creation, design, and problem-solving.\\\\n\\\\n2. **AI for Precision Education**: AI is making significant strides in the field of education, with systems designed to provide personalized learning experiences and improve educational outcomes.\\\\n\\\\n3. **AI in Climate Research**: AI technologies are being used to advance climate research, helping scientists analyze vast amounts of data and develop solutions to address climate change.\\\\n\\\\n4. **AI for Social Good**: There is a growing focus on using AI for social good, with tech leaders and social innovators working together to develop AI solutions that address societal challenges.\\\\n\\\\n5. **AI in Physics**: AI is being used to uncover mechanisms in physics, driving new paradigms in AI discovery. Researchers are using generative AI to classify phase transitions in materials and physical systems more efficiently. AI is also being coupled with fundamental physics to enhance calculations and theoretical investigations in pure mathematics and theoretical physics.\\\\n\\\\n#### Potential Industry Impacts\\\\n\\\\n1. **Business and Enterprise**: AI is transforming business operations, from automating routine tasks to providing advanced analytics and insights. Companies are using AI to enhance productivity, drive innovation, and gain a competitive edge.\\\\n\\\\n2. **Healthcare**: AI is revolutionizing healthcare by improving diagnostics, personalizing treatment plans, and streamlining administrative processes. AI-powered tools are enhancing patient care and outcomes.\\\\n\\\\n3. **Education**: AI is reshaping education by offering personalized learning experiences and improving access to quality education. AI-driven tools are helping educators tailor instruction to individual student needs.\\\\n\\\\n4. **Climate and Environment**: AI is playing a crucial role in climate research and environmental conservation. AI technologies are helping researchers analyze data, model climate scenarios, and develop sustainable solutions.\\\\n\\\\n5. **Social Impact**: AI is being leveraged to address social issues, from improving public safety to enhancing accessibility for individuals with disabilities. AI-driven initiatives are making a positive impact on communities worldwide.\\\\n\\\\n6. **Physics**: AI applications in physics are leading to significant advancements in understanding and modeling physical phenomena. The integration of AI with fundamental physics is enabling faster and more efficient calculations, as well as new insights in theoretical investigations.\\\\n\\\\nIn conclusion, 2024 is a pivotal year for AI, with significant advancements and trends shaping the future of technology and its applications across various industries. The integration of AI into daily life and business, coupled with breakthrough technologies, is driving innovation and creating new opportunities for growth and development.'}.\")\n"
      ]
     },
     {
@@ -191,12 +175,14 @@
      "output_type": "stream",
      "text": [
       "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `output_handler` with `{'output': '### Comprehensive Analysis of AI Advancements in 2024\\n\\n#### Key Trends in AI 2024\\n\\n1. **Generative AI Maturation**:\\n   - Generative AI, which gained significant attention in 2023, is expected to become more practical and useful for everyday users in 2024. This includes applications in content creation, customer service, and personalized experiences.\\n   - The adoption of generative AI by businesses is accelerating, with 65% of organizations regularly using it, nearly double the percentage from the previous year.\\n\\n2. **Multimodal AI**:\\n   - Multimodal AI, which processes and integrates data from multiple sources (e.g., text, images, audio), is becoming more prevalent. This allows for more sophisticated and context-aware AI applications.\\n\\n3. **Smaller and Open Source Models**:\\n   - There is a trend towards smaller, more efficient AI models that are easier to deploy and manage. Open source AI models are gaining traction, enabling businesses to customize and optimize AI solutions for their specific needs.\\n\\n4. **Ethics and Regulation**:\\n   - As AI technologies become more integrated into society, there is a growing emphasis on ethical considerations, safety, and regulatory compliance. Organizations are adopting more cautious and responsible AI development and deployment strategies.\\n\\n5. **AI in Data and Analytics**:\\n   - The role of AI in data and analytics is evolving, with a focus on integrating AI capabilities into existing data pipelines and analytics workflows. This trend is driven by the need for more accurate and actionable insights.\\n\\n#### Breakthrough AI Technologies in 2024\\n\\n1. **AI-Powered Scientific Discovery**:\\n   - AI is playing a crucial role in accelerating scientific research and discovery. AI algorithms are being used to analyze vast amounts of data, identify patterns, and generate new hypotheses in fields such as medicine, materials science, and climate research.\\n\\n2. **Elastocalorics**:\\n   - Elastocaloric materials, which can change temperature under mechanical stress, are being explored for their potential in energy-efficient cooling systems. AI is being used to optimize the design and performance of these materials.\\n\\n3. **Advanced Virtual Agents**:\\n   - Virtual agents are becoming more powerful and capable of handling complex interactions. These agents are being used in customer service, healthcare, and education to provide personalized and efficient support.\\n\\n4. **Model Optimization**:\\n   - Techniques for optimizing AI models, such as pruning and quantization, are becoming more accessible. This allows for the deployment of AI models on resource-constrained devices, such as smartphones and IoT devices.\\n\\n5. **AI in Physics**:\\n   - Researchers are using generative AI to develop physics-informed techniques for classifying phase transitions in materials and physical systems. This approach is more efficient than existing machine-learning methods and has the potential to drive new innovations in various industries.\\n\\n#### Potential Industry Impacts\\n\\n1. **Manufacturing**:\\n   - The manufacturing industry is expected to see the largest financial impact from AI. AI-driven automation, predictive maintenance, and quality control are enhancing productivity and reducing costs.\\n\\n2. **Healthcare**:\\n   - AI is transforming healthcare by enabling early diagnosis, personalized treatment plans, and efficient management of healthcare resources. AI-powered tools are improving patient outcomes and reducing the burden on healthcare professionals.\\n\\n3. **Finance**:\\n   - In the finance sector, AI is being used for fraud detection, risk management, and personalized financial services. AI algorithms are enhancing the accuracy and efficiency of financial operations.\\n\\n4. **Retail**:\\n   - AI is revolutionizing the retail industry by enabling personalized shopping experiences, optimizing supply chain management, and enhancing customer service. Retailers are using AI to analyze consumer behavior and predict trends.\\n\\n5. **Energy**:\\n   - AI is playing a key role in optimizing energy production and consumption. AI algorithms are being used to manage smart grids, predict energy demand, and improve the efficiency of renewable energy sources.'}`\n",
-      "responded: Thought: I have gathered additional information on AI applications in physics, specifically focusing on the use of generative AI to classify phase transitions in materials and physical systems. I will now integrate this information into the comprehensive analysis report and finalize it.\n",
+      "Invoking: `output_handler` with `{'output': '### Comprehensive Analysis of the Latest Advancements in AI in 2024\\n\\n#### Key Trends in AI 2024\\n\\n1. **Generative AI**: Generative AI continues to evolve, becoming more useful for everyday users and businesses. The technology is being integrated into various applications, from creative tools to business solutions, enabling more people to experiment with and benefit from AI models.\\n\\n2. **Multimodal AI**: This trend involves AI systems that can process and integrate multiple types of data inputs, such as text, images, and sound. This advancement mimics human sensory processing and enhances the capabilities of AI in understanding and generating complex data.\\n\\n3. **Smaller Language Models and Open Source Advancements**: There is a shift towards smaller, more efficient language models that are easier to deploy and manage. Open source contributions are accelerating innovation and making advanced AI technologies more accessible.\\n\\n4. **Model Optimization and Customization**: AI models are becoming more optimized and customizable, allowing businesses to tailor AI solutions to their specific needs. This trend is driven by advancements in model optimization techniques and the development of customized local models and data pipelines.\\n\\n5. **Integration into Daily Life and Business**: AI is becoming seamlessly integrated into daily life and business operations. From AI-powered chatbots to advanced analytics, organizations are leveraging AI to drive efficiency, innovation, and value.\\n\\n#### Breakthrough AI Technologies in 2024\\n\\n1. **Generative AI Models**: State-of-the-art generative AI models are being developed and deployed, offering new capabilities in content creation, design, and problem-solving.\\n\\n2. **AI for Precision Education**: AI is making significant strides in the field of education, with systems designed to provide personalized learning experiences and improve educational outcomes.\\n\\n3. **AI in Climate Research**: AI technologies are being used to advance climate research, helping scientists analyze vast amounts of data and develop solutions to address climate change.\\n\\n4. **AI for Social Good**: There is a growing focus on using AI for social good, with tech leaders and social innovators working together to develop AI solutions that address societal challenges.\\n\\n5. **AI in Physics**: AI is being used to uncover mechanisms in physics, driving new paradigms in AI discovery. Researchers are using generative AI to classify phase transitions in materials and physical systems more efficiently. AI is also being coupled with fundamental physics to enhance calculations and theoretical investigations in pure mathematics and theoretical physics.\\n\\n#### Potential Industry Impacts\\n\\n1. **Business and Enterprise**: AI is transforming business operations, from automating routine tasks to providing advanced analytics and insights. Companies are using AI to enhance productivity, drive innovation, and gain a competitive edge.\\n\\n2. **Healthcare**: AI is revolutionizing healthcare by improving diagnostics, personalizing treatment plans, and streamlining administrative processes. AI-powered tools are enhancing patient care and outcomes.\\n\\n3. **Education**: AI is reshaping education by offering personalized learning experiences and improving access to quality education. AI-driven tools are helping educators tailor instruction to individual student needs.\\n\\n4. **Climate and Environment**: AI is playing a crucial role in climate research and environmental conservation. AI technologies are helping researchers analyze data, model climate scenarios, and develop sustainable solutions.\\n\\n5. **Social Impact**: AI is being leveraged to address social issues, from improving public safety to enhancing accessibility for individuals with disabilities. AI-driven initiatives are making a positive impact on communities worldwide.\\n\\n6. **Physics**: AI applications in physics are leading to significant advancements in understanding and modeling physical phenomena. The integration of AI with fundamental physics is enabling faster and more efficient calculations, as well as new insights in theoretical investigations.\\n\\nIn conclusion, 2024 is a pivotal year for AI, with significant advancements and trends shaping the future of technology and its applications across various industries. The integration of AI into daily life and business, coupled with breakthrough technologies, is driving innovation and creating new opportunities for growth and development.'}`\n",
+      "responded: Thought: I have gathered relevant information on the latest advancements and applications of AI in the field of physics in 2024. This includes the use of generative AI for classifying phase transitions, coupling AI with fundamental physics for faster calculations, and the integration of AI in theoretical investigations in pure mathematics and theoretical physics. I will now incorporate this information into the comprehensive analysis report.\n",
       "\n",
+      "Calling the tool to provide the final analysis report with the added information on AI applications in physics.\n",
       "\n",
       "\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m{'checked_output': '### Comprehensive Analysis of AI Advancements in 2024\\n\\n#### Key Trends in AI 2024\\n\\n1. **Generative AI Maturation**:\\n   - Generative AI, which gained significant attention in 2023, is expected to become more practical and useful for everyday users in 2024. This includes applications in content creation, customer service, and personalized experiences.\\n   - The adoption of generative AI by businesses is accelerating, with 65% of organizations regularly using it, nearly double the percentage from the previous year.\\n\\n2. **Multimodal AI**:\\n   - Multimodal AI, which processes and integrates data from multiple sources (e.g., text, images, audio), is becoming more prevalent. This allows for more sophisticated and context-aware AI applications.\\n\\n3. **Smaller and Open Source Models**:\\n   - There is a trend towards smaller, more efficient AI models that are easier to deploy and manage. Open source AI models are gaining traction, enabling businesses to customize and optimize AI solutions for their specific needs.\\n\\n4. **Ethics and Regulation**:\\n   - As AI technologies become more integrated into society, there is a growing emphasis on ethical considerations, safety, and regulatory compliance. Organizations are adopting more cautious and responsible AI development and deployment strategies.\\n\\n5. **AI in Data and Analytics**:\\n   - The role of AI in data and analytics is evolving, with a focus on integrating AI capabilities into existing data pipelines and analytics workflows. This trend is driven by the need for more accurate and actionable insights.\\n\\n#### Breakthrough AI Technologies in 2024\\n\\n1. **AI-Powered Scientific Discovery**:\\n   - AI is playing a crucial role in accelerating scientific research and discovery. AI algorithms are being used to analyze vast amounts of data, identify patterns, and generate new hypotheses in fields such as medicine, materials science, and climate research.\\n\\n2. **Elastocalorics**:\\n   - Elastocaloric materials, which can change temperature under mechanical stress, are being explored for their potential in energy-efficient cooling systems. AI is being used to optimize the design and performance of these materials.\\n\\n3. **Advanced Virtual Agents**:\\n   - Virtual agents are becoming more powerful and capable of handling complex interactions. These agents are being used in customer service, healthcare, and education to provide personalized and efficient support.\\n\\n4. **Model Optimization**:\\n   - Techniques for optimizing AI models, such as pruning and quantization, are becoming more accessible. This allows for the deployment of AI models on resource-constrained devices, such as smartphones and IoT devices.\\n\\n5. **AI in Physics**:\\n   - Researchers are using generative AI to develop physics-informed techniques for classifying phase transitions in materials and physical systems. This approach is more efficient than existing machine-learning methods and has the potential to drive new innovations in various industries.\\n\\n#### Potential Industry Impacts\\n\\n1. **Manufacturing**:\\n   - The manufacturing industry is expected to see the largest financial impact from AI. AI-driven automation, predictive maintenance, and quality control are enhancing productivity and reducing costs.\\n\\n2. **Healthcare**:\\n   - AI is transforming healthcare by enabling early diagnosis, personalized treatment plans, and efficient management of healthcare resources. AI-powered tools are improving patient outcomes and reducing the burden on healthcare professionals.\\n\\n3. **Finance**:\\n   - In the finance sector, AI is being used for fraud detection, risk management, and personalized financial services. AI algorithms are enhancing the accuracy and efficiency of financial operations.\\n\\n4. **Retail**:\\n   - AI is revolutionizing the retail industry by enabling personalized shopping experiences, optimizing supply chain management, and enhancing customer service. Retailers are using AI to analyze consumer behavior and predict trends.\\n\\n5. **Energy**:\\n   - AI is playing a key role in optimizing energy production and consumption. AI algorithms are being used to manage smart grids, predict energy demand, and improve the efficiency of renewable energy sources.'}\u001b[0m\u001b[32;1m\u001b[1;3m{'checked_output': '### Comprehensive Analysis of AI Advancements in 2024\\n\\n#### Key Trends in AI 2024\\n\\n1. **Generative AI Maturation**:\\n   - Generative AI, which gained significant attention in 2023, is expected to become more practical and useful for everyday users in 2024. This includes applications in content creation, customer service, and personalized experiences.\\n   - The adoption of generative AI by businesses is accelerating, with 65% of organizations regularly using it, nearly double the percentage from the previous year.\\n\\n2. **Multimodal AI**:\\n   - Multimodal AI, which processes and integrates data from multiple sources (e.g., text, images, audio), is becoming more prevalent. This allows for more sophisticated and context-aware AI applications.\\n\\n3. **Smaller and Open Source Models**:\\n   - There is a trend towards smaller, more efficient AI models that are easier to deploy and manage. Open source AI models are gaining traction, enabling businesses to customize and optimize AI solutions for their specific needs.\\n\\n4. **Ethics and Regulation**:\\n   - As AI technologies become more integrated into society, there is a growing emphasis on ethical considerations, safety, and regulatory compliance. Organizations are adopting more cautious and responsible AI development and deployment strategies.\\n\\n5. **AI in Data and Analytics**:\\n   - The role of AI in data and analytics is evolving, with a focus on integrating AI capabilities into existing data pipelines and analytics workflows. This trend is driven by the need for more accurate and actionable insights.\\n\\n#### Breakthrough AI Technologies in 2024\\n\\n1. **AI-Powered Scientific Discovery**:\\n   - AI is playing a crucial role in accelerating scientific research and discovery. AI algorithms are being used to analyze vast amounts of data, identify patterns, and generate new hypotheses in fields such as medicine, materials science, and climate research.\\n\\n2. **Elastocalorics**:\\n   - Elastocaloric materials, which can change temperature under mechanical stress, are being explored for their potential in energy-efficient cooling systems. AI is being used to optimize the design and performance of these materials.\\n\\n3. **Advanced Virtual Agents**:\\n   - Virtual agents are becoming more powerful and capable of handling complex interactions. These agents are being used in customer service, healthcare, and education to provide personalized and efficient support.\\n\\n4. **Model Optimization**:\\n   - Techniques for optimizing AI models, such as pruning and quantization, are becoming more accessible. This allows for the deployment of AI models on resource-constrained devices, such as smartphones and IoT devices.\\n\\n5. **AI in Physics**:\\n   - Researchers are using generative AI to develop physics-informed techniques for classifying phase transitions in materials and physical systems. This approach is more efficient than existing machine-learning methods and has the potential to drive new innovations in various industries.\\n\\n#### Potential Industry Impacts\\n\\n1. **Manufacturing**:\\n   - The manufacturing industry is expected to see the largest financial impact from AI. AI-driven automation, predictive maintenance, and quality control are enhancing productivity and reducing costs.\\n\\n2. **Healthcare**:\\n   - AI is transforming healthcare by enabling early diagnosis, personalized treatment plans, and efficient management of healthcare resources. AI-powered tools are improving patient outcomes and reducing the burden on healthcare professionals.\\n\\n3. **Finance**:\\n   - In the finance sector, AI is being used for fraud detection, risk management, and personalized financial services. AI algorithms are enhancing the accuracy and efficiency of financial operations.\\n\\n4. **Retail**:\\n   - AI is revolutionizing the retail industry by enabling personalized shopping experiences, optimizing supply chain management, and enhancing customer service. Retailers are using AI to analyze consumer behavior and predict trends.\\n\\n5. **Energy**:\\n   - AI is playing a key role in optimizing energy production and consumption. AI algorithms are being used to manage smart grids, predict energy demand, and improve the efficiency of renewable energy sources.'}\u001b[0m\n",
+      "\n",
+      "\u001b[0m\u001b[32;1m\u001b[1;3m{'checked_output': '### Comprehensive Analysis of the Latest Advancements in AI in 2024\\n\\n#### Key Trends in AI 2024\\n\\n1. **Generative AI**: Generative AI continues to evolve, becoming more useful for everyday users and businesses. The technology is being integrated into various applications, from creative tools to business solutions, enabling more people to experiment with and benefit from AI models.\\n\\n2. **Multimodal AI**: This trend involves AI systems that can process and integrate multiple types of data inputs, such as text, images, and sound. This advancement mimics human sensory processing and enhances the capabilities of AI in understanding and generating complex data.\\n\\n3. **Smaller Language Models and Open Source Advancements**: There is a shift towards smaller, more efficient language models that are easier to deploy and manage. Open source contributions are accelerating innovation and making advanced AI technologies more accessible.\\n\\n4. **Model Optimization and Customization**: AI models are becoming more optimized and customizable, allowing businesses to tailor AI solutions to their specific needs. This trend is driven by advancements in model optimization techniques and the development of customized local models and data pipelines.\\n\\n5. **Integration into Daily Life and Business**: AI is becoming seamlessly integrated into daily life and business operations. From AI-powered chatbots to advanced analytics, organizations are leveraging AI to drive efficiency, innovation, and value.\\n\\n#### Breakthrough AI Technologies in 2024\\n\\n1. **Generative AI Models**: State-of-the-art generative AI models are being developed and deployed, offering new capabilities in content creation, design, and problem-solving.\\n\\n2. **AI for Precision Education**: AI is making significant strides in the field of education, with systems designed to provide personalized learning experiences and improve educational outcomes.\\n\\n3. **AI in Climate Research**: AI technologies are being used to advance climate research, helping scientists analyze vast amounts of data and develop solutions to address climate change.\\n\\n4. **AI for Social Good**: There is a growing focus on using AI for social good, with tech leaders and social innovators working together to develop AI solutions that address societal challenges.\\n\\n5. **AI in Physics**: AI is being used to uncover mechanisms in physics, driving new paradigms in AI discovery. Researchers are using generative AI to classify phase transitions in materials and physical systems more efficiently. AI is also being coupled with fundamental physics to enhance calculations and theoretical investigations in pure mathematics and theoretical physics.\\n\\n#### Potential Industry Impacts\\n\\n1. **Business and Enterprise**: AI is transforming business operations, from automating routine tasks to providing advanced analytics and insights. Companies are using AI to enhance productivity, drive innovation, and gain a competitive edge.\\n\\n2. **Healthcare**: AI is revolutionizing healthcare by improving diagnostics, personalizing treatment plans, and streamlining administrative processes. AI-powered tools are enhancing patient care and outcomes.\\n\\n3. **Education**: AI is reshaping education by offering personalized learning experiences and improving access to quality education. AI-driven tools are helping educators tailor instruction to individual student needs.\\n\\n4. **Climate and Environment**: AI is playing a crucial role in climate research and environmental conservation. AI technologies are helping researchers analyze data, model climate scenarios, and develop sustainable solutions.\\n\\n5. **Social Impact**: AI is being leveraged to address social issues, from improving public safety to enhancing accessibility for individuals with disabilities. AI-driven initiatives are making a positive impact on communities worldwide.\\n\\n6. **Physics**: AI applications in physics are leading to significant advancements in understanding and modeling physical phenomena. The integration of AI with fundamental physics is enabling faster and more efficient calculations, as well as new insights in theoretical investigations.\\n\\nIn conclusion, 2024 is a pivotal year for AI, with significant advancements and trends shaping the future of technology and its applications across various industries. The integration of AI into daily life and business, coupled with breakthrough technologies, is driving innovation and creating new opportunities for growth and development.'}\u001b[0m\n",
       "\n",
       "\u001b[1m> Finished chain.\u001b[0m\n"
      ]
@@ -231,59 +217,47 @@
     {
      "data": {
       "text/markdown": [
-       "### Comprehensive Analysis of AI Advancements in 2024\n",
+       "### Comprehensive Analysis of the Latest Advancements in AI in 2024\n",
        "\n",
        "#### Key Trends in AI 2024\n",
        "\n",
-       "1. **Generative AI Maturation**:\n",
-       "   - Generative AI, which gained significant attention in 2023, is expected to become more practical and useful for everyday users in 2024. This includes applications in content creation, customer service, and personalized experiences.\n",
-       "   - The adoption of generative AI by businesses is accelerating, with 65% of organizations regularly using it, nearly double the percentage from the previous year.\n",
+       "1. **Generative AI**: Generative AI continues to evolve, becoming more useful for everyday users and businesses. The technology is being integrated into various applications, from creative tools to business solutions, enabling more people to experiment with and benefit from AI models.\n",
        "\n",
-       "2. **Multimodal AI**:\n",
-       "   - Multimodal AI, which processes and integrates data from multiple sources (e.g., text, images, audio), is becoming more prevalent. This allows for more sophisticated and context-aware AI applications.\n",
+       "2. **Multimodal AI**: This trend involves AI systems that can process and integrate multiple types of data inputs, such as text, images, and sound. This advancement mimics human sensory processing and enhances the capabilities of AI in understanding and generating complex data.\n",
        "\n",
-       "3. **Smaller and Open Source Models**:\n",
-       "   - There is a trend towards smaller, more efficient AI models that are easier to deploy and manage. Open source AI models are gaining traction, enabling businesses to customize and optimize AI solutions for their specific needs.\n",
+       "3. **Smaller Language Models and Open Source Advancements**: There is a shift towards smaller, more efficient language models that are easier to deploy and manage. Open source contributions are accelerating innovation and making advanced AI technologies more accessible.\n",
        "\n",
-       "4. **Ethics and Regulation**:\n",
-       "   - As AI technologies become more integrated into society, there is a growing emphasis on ethical considerations, safety, and regulatory compliance. Organizations are adopting more cautious and responsible AI development and deployment strategies.\n",
+       "4. **Model Optimization and Customization**: AI models are becoming more optimized and customizable, allowing businesses to tailor AI solutions to their specific needs. This trend is driven by advancements in model optimization techniques and the development of customized local models and data pipelines.\n",
        "\n",
-       "5. **AI in Data and Analytics**:\n",
-       "   - The role of AI in data and analytics is evolving, with a focus on integrating AI capabilities into existing data pipelines and analytics workflows. This trend is driven by the need for more accurate and actionable insights.\n",
+       "5. **Integration into Daily Life and Business**: AI is becoming seamlessly integrated into daily life and business operations. From AI-powered chatbots to advanced analytics, organizations are leveraging AI to drive efficiency, innovation, and value.\n",
        "\n",
        "#### Breakthrough AI Technologies in 2024\n",
        "\n",
-       "1. **AI-Powered Scientific Discovery**:\n",
-       "   - AI is playing a crucial role in accelerating scientific research and discovery. AI algorithms are being used to analyze vast amounts of data, identify patterns, and generate new hypotheses in fields such as medicine, materials science, and climate research.\n",
+       "1. **Generative AI Models**: State-of-the-art generative AI models are being developed and deployed, offering new capabilities in content creation, design, and problem-solving.\n",
        "\n",
-       "2. **Elastocalorics**:\n",
-       "   - Elastocaloric materials, which can change temperature under mechanical stress, are being explored for their potential in energy-efficient cooling systems. AI is being used to optimize the design and performance of these materials.\n",
+       "2. **AI for Precision Education**: AI is making significant strides in the field of education, with systems designed to provide personalized learning experiences and improve educational outcomes.\n",
        "\n",
-       "3. **Advanced Virtual Agents**:\n",
-       "   - Virtual agents are becoming more powerful and capable of handling complex interactions. These agents are being used in customer service, healthcare, and education to provide personalized and efficient support.\n",
+       "3. **AI in Climate Research**: AI technologies are being used to advance climate research, helping scientists analyze vast amounts of data and develop solutions to address climate change.\n",
        "\n",
-       "4. **Model Optimization**:\n",
-       "   - Techniques for optimizing AI models, such as pruning and quantization, are becoming more accessible. This allows for the deployment of AI models on resource-constrained devices, such as smartphones and IoT devices.\n",
+       "4. **AI for Social Good**: There is a growing focus on using AI for social good, with tech leaders and social innovators working together to develop AI solutions that address societal challenges.\n",
        "\n",
-       "5. **AI in Physics**:\n",
-       "   - Researchers are using generative AI to develop physics-informed techniques for classifying phase transitions in materials and physical systems. This approach is more efficient than existing machine-learning methods and has the potential to drive new innovations in various industries.\n",
+       "5. **AI in Physics**: AI is being used to uncover mechanisms in physics, driving new paradigms in AI discovery. Researchers are using generative AI to classify phase transitions in materials and physical systems more efficiently. AI is also being coupled with fundamental physics to enhance calculations and theoretical investigations in pure mathematics and theoretical physics.\n",
        "\n",
        "#### Potential Industry Impacts\n",
        "\n",
-       "1. **Manufacturing**:\n",
-       "   - The manufacturing industry is expected to see the largest financial impact from AI. AI-driven automation, predictive maintenance, and quality control are enhancing productivity and reducing costs.\n",
+       "1. **Business and Enterprise**: AI is transforming business operations, from automating routine tasks to providing advanced analytics and insights. Companies are using AI to enhance productivity, drive innovation, and gain a competitive edge.\n",
+       "\n",
+       "2. **Healthcare**: AI is revolutionizing healthcare by improving diagnostics, personalizing treatment plans, and streamlining administrative processes. AI-powered tools are enhancing patient care and outcomes.\n",
+       "\n",
+       "3. **Education**: AI is reshaping education by offering personalized learning experiences and improving access to quality education. AI-driven tools are helping educators tailor instruction to individual student needs.\n",
        "\n",
-       "2. **Healthcare**:\n",
-       "   - AI is transforming healthcare by enabling early diagnosis, personalized treatment plans, and efficient management of healthcare resources. AI-powered tools are improving patient outcomes and reducing the burden on healthcare professionals.\n",
+       "4. **Climate and Environment**: AI is playing a crucial role in climate research and environmental conservation. AI technologies are helping researchers analyze data, model climate scenarios, and develop sustainable solutions.\n",
        "\n",
-       "3. **Finance**:\n",
-       "   - In the finance sector, AI is being used for fraud detection, risk management, and personalized financial services. AI algorithms are enhancing the accuracy and efficiency of financial operations.\n",
+       "5. **Social Impact**: AI is being leveraged to address social issues, from improving public safety to enhancing accessibility for individuals with disabilities. AI-driven initiatives are making a positive impact on communities worldwide.\n",
        "\n",
-       "4. **Retail**:\n",
-       "   - AI is revolutionizing the retail industry by enabling personalized shopping experiences, optimizing supply chain management, and enhancing customer service. Retailers are using AI to analyze consumer behavior and predict trends.\n",
+       "6. **Physics**: AI applications in physics are leading to significant advancements in understanding and modeling physical phenomena. The integration of AI with fundamental physics is enabling faster and more efficient calculations, as well as new insights in theoretical investigations.\n",
        "\n",
-       "5. **Energy**:\n",
-       "   - AI is playing a key role in optimizing energy production and consumption. AI algorithms are being used to manage smart grids, predict energy demand, and improve the efficiency of renewable energy sources."
+       "In conclusion, 2024 is a pivotal year for AI, with significant advancements and trends shaping the future of technology and its applications across various industries. The integration of AI into daily life and business, coupled with breakthrough technologies, is driving innovation and creating new opportunities for growth and development."
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
diff --git a/examples/llama_index_output_handler.py b/examples/llama_index_output_handler.py
index 0f243206..b3ea1048 100644
--- a/examples/llama_index_output_handler.py
+++ b/examples/llama_index_output_handler.py
@@ -8,33 +8,29 @@
 from motleycrew.tasks import SimpleTask
 from motleycrew.common.exceptions import InvalidOutput
 from motleycrew.common import AsyncBackend
-
-from langchain_core.tools import StructuredTool
+from motleycrew.tools import MotleyTool
 
 
 def main():
     """Main function of running the example."""
     search_tool = DuckDuckGoSearchRun()
 
-    def check_output(output: str):
-        if "medicine" not in output.lower():
-            raise InvalidOutput(
-                "Add more information about AI applications in medicine."
-            )
+    class OutputHandler(MotleyTool):
+        def run(self, output: str):
+            if "medicine" not in output.lower():
+                raise InvalidOutput("Add more information about AI applications in medicine.")
 
-        return {"checked_output": output}
+            return {"checked_output": output}
 
-    output_handler = StructuredTool.from_function(
-        name="output_handler",
-        description="Output handler",
-        func=check_output,
+    output_handler = OutputHandler(
+        name="output_handler", description="Output handler", return_direct=True
     )
 
     # TODO: add LlamaIndex native tools
     researcher = ReActLlamaIndexMotleyAgent(
         prompt_prefix="Your goal is to uncover cutting-edge developments in AI and data science",
-        tools=[search_tool],
-        output_handler=output_handler,
+        tools=[search_tool, output_handler],
+        force_output_handler=True,
         verbose=True,
         max_iterations=16,  # default is 10, we add more because the output handler may reject the output
     )
diff --git a/examples/output_handler.py b/examples/output_handler.py
deleted file mode 100644
index b1635c4c..00000000
--- a/examples/output_handler.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from dotenv import load_dotenv
-from langchain_community.tools import DuckDuckGoSearchRun
-from motleycache import enable_cache
-
-from motleycrew import MotleyCrew
-from motleycrew.agents import MotleyOutputHandler
-from motleycrew.agents.langchain.tool_calling_react import (
-    ReActToolCallingMotleyAgent,
-)
-from motleycrew.common import configure_logging
-from motleycrew.common.exceptions import InvalidOutput
-from motleycrew.tasks import SimpleTask
-
-
-def main():
-    search_tool = DuckDuckGoSearchRun()
-
-    tools = [search_tool]
-
-    class ReportOutputHandler(MotleyOutputHandler):
-        def handle_output(self, output: str):
-            if "medicine" not in output.lower():
-                raise InvalidOutput("Add more information about AI applications in medicine.")
-
-            if "2024" in self.agent_input["prompt"]:
-                output += "\n\nThis report is up-to-date for 2024."
-
-            output += f"\n\nBrought to you by motleycrew's {self.agent}."
-
-            return {"checked_output": output}
-
-    researcher = ReActToolCallingMotleyAgent(
-        tools=tools,
-        verbose=True,
-        chat_history=True,
-        output_handler=ReportOutputHandler(),
-    )
-
-    crew = MotleyCrew()
-    task = SimpleTask(
-        crew=crew,
-        name="produce comprehensive analysis report on AI advancements in 2024",
-        description="""Conduct a comprehensive analysis of the latest advancements in AI in 2024.
-      Identify key trends, breakthrough technologies, and potential industry impacts.
-      Your final answer MUST be a full analysis report""",
-        agent=researcher,
-    )
-    crew.run()
-    print(task.output)
-
-
-if __name__ == "__main__":
-    enable_cache()
-    configure_logging(verbose=True)
-
-    load_dotenv()
-    main()
diff --git a/motleycrew/agents/__init__.py b/motleycrew/agents/__init__.py
index b5d8f69f..91f3fdc5 100644
--- a/motleycrew/agents/__init__.py
+++ b/motleycrew/agents/__init__.py
@@ -1,13 +1,11 @@
 """Everything agent-related: wrappers, pre-made agents, output handlers etc."""
 
 from .abstract_parent import MotleyAgentAbstractParent
-from .output_handler import MotleyOutputHandler
 from .parent import MotleyAgentParent
 from .langchain import LangchainMotleyAgent
 
 __all__ = [
     "MotleyAgentAbstractParent",
     "MotleyAgentParent",
-    "MotleyOutputHandler",
     "LangchainMotleyAgent",
 ]
diff --git a/motleycrew/agents/abstract_parent.py b/motleycrew/agents/abstract_parent.py
index 50174811..53ab36e9 100644
--- a/motleycrew/agents/abstract_parent.py
+++ b/motleycrew/agents/abstract_parent.py
@@ -23,10 +23,6 @@ def invoke(
         pass
 
     @abstractmethod
-    def as_tool(self) -> "MotleyTool":
-        """Convert the agent to a MotleyTool to be used by other agents via delegation.
-
-        Returns:
-            The tool representation of the agent.
-        """
+    def call_as_tool(self, *args, **kwargs) -> Any:
+        """Method that is called when the agent is used as a tool by another agent."""
         pass
diff --git a/motleycrew/agents/crewai/crewai.py b/motleycrew/agents/crewai/crewai.py
index 03bc0756..cd9e700d 100644
--- a/motleycrew/agents/crewai/crewai.py
+++ b/motleycrew/agents/crewai/crewai.py
@@ -31,7 +31,7 @@ def __init__(
         name: str | None = None,
         agent_factory: MotleyAgentFactory[CrewAIAgentWithConfig] | None = None,
         tools: Sequence[MotleySupportedTool] | None = None,
-        output_handler: MotleySupportedTool | None = None,
+        force_output_handler: bool = False,
         verbose: bool = False,
     ):
         """
@@ -63,14 +63,15 @@ def __init__(
 
             tools: Tools to add to the agent.
 
-            output_handler: Output handler for the agent.
+            force_output_handler: Whether to force the agent to return through an output handler.
+                NOTE: This is currently not supported for CrewAI agents.
 
             verbose: Whether to log verbose output.
         """
 
-        if output_handler:
+        if force_output_handler:
             raise NotImplementedError(
-                "Output handler is not supported for CrewAI agents "
+                "Forced output handlers are not supported for CrewAI agents "
                 "because of the specificity of CrewAI's prompts."
             )
 
@@ -81,7 +82,7 @@ def __init__(
             name=name,
             agent_factory=agent_factory,
             tools=tools,
-            output_handler=output_handler,
+            force_output_handler=False,
             verbose=verbose,
         )
 
diff --git a/motleycrew/agents/crewai/crewai_agent.py b/motleycrew/agents/crewai/crewai_agent.py
index 2c89b8a7..38f61432 100644
--- a/motleycrew/agents/crewai/crewai_agent.py
+++ b/motleycrew/agents/crewai/crewai_agent.py
@@ -26,8 +26,8 @@ def __init__(
         description: str | None = None,
         delegation: bool = False,
         tools: Sequence[MotleySupportedTool] | None = None,
+        force_output_handler: bool = False,
         llm: Optional[Any] = None,
-        output_handler: MotleySupportedTool | None = None,
         verbose: bool = False,
     ):
         """
@@ -55,7 +55,8 @@ def __init__(
 
             llm: LLM instance to use.
 
-            output_handler: Output handler for the agent.
+            force_output_handler: Whether to force the use of an output handler.
+                NOTE: This is currently not supported for CrewAI agents.
 
             verbose: Whether to log verbose output.
         """
@@ -91,6 +92,6 @@ def agent_factory(tools: dict[str, MotleyTool]):
             name=role,
             agent_factory=agent_factory,
             tools=tools,
-            output_handler=output_handler,
+            force_output_handler=force_output_handler,
             verbose=verbose,
         )
diff --git a/motleycrew/agents/langchain/langchain.py b/motleycrew/agents/langchain/langchain.py
index 129812bd..aff03dea 100644
--- a/motleycrew/agents/langchain/langchain.py
+++ b/motleycrew/agents/langchain/langchain.py
@@ -26,7 +26,7 @@ def __init__(
         prompt_prefix: str | ChatPromptTemplate | None = None,
         agent_factory: MotleyAgentFactory[AgentExecutor] | None = None,
         tools: Sequence[MotleySupportedTool] | None = None,
-        output_handler: MotleySupportedTool | None = None,
+        force_output_handler: bool = False,
         chat_history: bool | GetSessionHistoryCallable = True,
         input_as_messages: bool = False,
         runnable_config: RunnableConfig | None = None,
@@ -60,7 +60,8 @@ def __init__(
 
             tools: Tools to add to the agent.
 
-            output_handler: Output handler for the agent.
+            force_output_handler: Whether to force the agent to return through an output handler.
+                If True, at least one tool must have return_direct set to True.
 
             chat_history: Whether to use chat history or not.
                 If `True`, uses `InMemoryChatMessageHistory`.
@@ -82,12 +83,10 @@ def __init__(
             name=name,
             agent_factory=agent_factory,
             tools=tools,
-            output_handler=output_handler,
+            force_output_handler=force_output_handler,
             verbose=verbose,
         )
 
-        self._agent_finish_blocker_tool = self._create_agent_finish_blocker_tool()
-
         if chat_history is True:
             chat_history = InMemoryChatMessageHistory()
             self.get_session_history_callable = lambda _: chat_history
@@ -97,6 +96,8 @@ def __init__(
         self.input_as_messages = input_as_messages
         self.runnable_config = runnable_config
 
+        self._create_agent_error_tool()
+
     def materialize(self):
         """Materialize the agent and wrap it in RunnableWithMessageHistory if needed."""
         if self.is_materialized:
@@ -105,8 +106,9 @@ def materialize(self):
         super().materialize()
         assert isinstance(self._agent, AgentExecutor)
 
-        if self.output_handler:
-            self._agent.tools += [self._agent_finish_blocker_tool]
+        if self.get_output_handlers():
+            assert self._agent_error_tool
+            self._agent.tools += [self._agent_error_tool]
 
             object.__setattr__(
                 self._agent.agent, "plan", self.agent_plan_decorator(self._agent.agent.plan)
@@ -118,22 +120,19 @@ def materialize(self):
                 self.take_next_step_decorator(self._agent._take_next_step),
             )
 
-            prepared_output_handler = None
             for tool in self.agent.tools:
-                if tool.name == self.output_handler.name:
-                    prepared_output_handler = tool
-
-            object.__setattr__(
-                prepared_output_handler,
-                "_run",
-                self._run_tool_direct_decorator(prepared_output_handler._run),
-            )
-
-            object.__setattr__(
-                prepared_output_handler,
-                "run",
-                self.run_tool_direct_decorator(prepared_output_handler.run),
-            )
+                if tool.return_direct:
+                    object.__setattr__(
+                        tool,
+                        "_run",
+                        self._run_tool_direct_decorator(tool._run),
+                    )
+
+                    object.__setattr__(
+                        tool,
+                        "run",
+                        self.run_tool_direct_decorator(tool.run),
+                    )
 
         if self.get_session_history_callable:
             logger.info("Wrapping agent in RunnableWithMessageHistory")
diff --git a/motleycrew/agents/langchain/legacy_react.py b/motleycrew/agents/langchain/legacy_react.py
index abb12f8a..a4cc5591 100644
--- a/motleycrew/agents/langchain/legacy_react.py
+++ b/motleycrew/agents/langchain/legacy_react.py
@@ -14,10 +14,10 @@
 from motleycrew.common.llms import init_llm
 from motleycrew.tools import MotleyTool
 
-OUTPUT_HANDLER_WITH_DEFAULT_PROMPT_MESSAGE = (
+FORCED_OUTPUT_HANDLER_WITH_DEFAULT_PROMPT_MESSAGE = (
     "Langchain's default ReAct prompt tells the agent to include a final answer keyword, "
-    "which later confuses the parser when an output handler is used. "
-    "Please provide a custom prompt if using an output handler."
+    "which later confuses the agent when an output handler is used. "
+    "Please provide a custom prompt if forcing an output handler."
 )
 
 
@@ -34,8 +34,8 @@ def __init__(
         description: str | None = None,
         name: str | None = None,
         prompt_prefix: str | None = None,
-        output_handler: MotleySupportedTool | None = None,
         chat_history: bool | GetSessionHistoryCallable = True,
+        force_output_handler: bool = False,
         prompt: str | None = None,
         handle_parsing_errors: bool = True,
         handle_tool_errors: bool = True,
@@ -51,6 +51,7 @@ def __init__(
             prompt_prefix: Prefix to the agent's prompt.
             output_handler: Output handler for the agent.
             chat_history: Whether to use chat history or not.
+            force_output_handler: Whether to force the agent to return through an output handler.
             prompt: Custom prompt to use with the agent.
             handle_parsing_errors: Whether to handle parsing errors.
             handle_tool_errors: Whether to handle tool errors.
@@ -60,8 +61,8 @@ def __init__(
             verbose: Whether to log verbose output.
         """
         if prompt is None:
-            if output_handler is not None:
-                raise Exception(OUTPUT_HANDLER_WITH_DEFAULT_PROMPT_MESSAGE)
+            if force_output_handler:
+                raise Exception(FORCED_OUTPUT_HANDLER_WITH_DEFAULT_PROMPT_MESSAGE)
             prompt = hub.pull("hwchase17/react")
 
         if llm is None:
@@ -97,7 +98,6 @@ def agent_factory(
             name=name,
             agent_factory=agent_factory,
             tools=tools,
-            output_handler=output_handler,
             chat_history=chat_history,
             runnable_config=runnable_config,
             verbose=verbose,
diff --git a/motleycrew/agents/langchain/tool_calling_react.py b/motleycrew/agents/langchain/tool_calling_react.py
index a4358bbf..c62589cf 100644
--- a/motleycrew/agents/langchain/tool_calling_react.py
+++ b/motleycrew/agents/langchain/tool_calling_react.py
@@ -47,7 +47,7 @@ def render_text_description(tools: list[BaseTool]) -> str:
 
 
 def get_relevant_default_prompt(
-    llm: BaseChatModel, output_handler: Optional[MotleySupportedTool]
+    llm: BaseChatModel, force_output_handler: bool
 ) -> ChatPromptTemplate:
     if ChatAnthropic is not None and isinstance(llm, ChatAnthropic):
         prompts = ToolCallingReActPromptsForAnthropic()
@@ -55,7 +55,7 @@ def get_relevant_default_prompt(
         # Anthropic prompts are more specific, so we use the OpenAI prompts as the default
         prompts = ToolCallingReActPromptsForOpenAI()
 
-    if output_handler:
+    if force_output_handler:
         return prompts.prompt_template_with_output_handler
     return prompts.prompt_template_without_output_handler
 
@@ -64,19 +64,17 @@ def create_tool_calling_react_agent(
     llm: BaseChatModel,
     tools: Sequence[BaseTool],
     prompt: ChatPromptTemplate,
-    output_handler: BaseTool | None = None,
+    output_handlers: Sequence[BaseTool],
+    force_output_handler: bool,
     intermediate_steps_processor: Callable | None = None,
 ) -> Runnable:
     prompt = prompt.partial(
         tools=render_text_description(list(tools)),
-        output_handler=render_text_description([output_handler]) if output_handler else "",
+        output_handlers=render_text_description(output_handlers) if force_output_handler else "",
     )
     check_variables(prompt)
 
     tools_for_llm = list(tools)
-    if output_handler:
-        tools_for_llm.append(output_handler)
-
     llm_with_tools = llm.bind_tools(tools=tools_for_llm)
 
     if not intermediate_steps_processor:
@@ -113,7 +111,7 @@ def __init__(
         prompt_prefix: str | ChatPromptTemplate | None = None,
         prompt: ChatPromptTemplate | None = None,
         chat_history: bool | GetSessionHistoryCallable = True,
-        output_handler: MotleySupportedTool | None = None,
+        force_output_handler: bool = False,
         handle_parsing_errors: bool = True,
         handle_tool_errors: bool = True,
         llm: BaseChatModel | None = None,
@@ -135,7 +133,8 @@ def __init__(
                 If a callable is passed, it is used to get the chat history by session_id.
                 See :class:`langchain_core.runnables.history.RunnableWithMessageHistory`
                 for more details.
-            output_handler: Output handler for the agent.
+            force_output_handler: Whether to force the agent to return through an output handler.
+                If True, at least one tool must have return_direct set to True.
             handle_parsing_errors: Whether to handle parsing errors.
             handle_tool_errors: Whether to handle tool errors.
                 If True, `handle_tool_error` and `handle_validation_error` in all tools
@@ -163,28 +162,23 @@ def __init__(
             raise ValueError("You must provide at least one tool to the ReActToolCallingAgent")
 
         if prompt is None:
-            prompt = get_relevant_default_prompt(llm=llm, output_handler=output_handler)
+            prompt = get_relevant_default_prompt(llm=llm, force_output_handler=force_output_handler)
 
-        def agent_factory(
-            tools: dict[str, MotleyTool], output_handler: Optional[MotleyTool] = None
-        ) -> AgentExecutor:
+        def agent_factory(tools: dict[str, MotleyTool]) -> AgentExecutor:
+            output_handlers_for_langchain = [
+                t.to_langchain_tool() for t in tools.values() if t.return_direct
+            ]
             tools_for_langchain = [t.to_langchain_tool() for t in tools.values()]
-            if output_handler:
-                output_handler_for_langchain = output_handler.to_langchain_tool()
-            else:
-                output_handler_for_langchain = None
 
             agent = create_tool_calling_react_agent(
                 llm=llm,
                 tools=tools_for_langchain,
                 prompt=prompt,
-                output_handler=output_handler_for_langchain,
+                output_handlers=output_handlers_for_langchain,
+                force_output_handler=force_output_handler,
                 intermediate_steps_processor=intermediate_steps_processor,
             )
 
-            if output_handler_for_langchain:
-                tools_for_langchain.append(output_handler_for_langchain)
-
             if handle_tool_errors:
                 for tool in tools_for_langchain:
                     tool.handle_tool_error = True
@@ -205,7 +199,7 @@ def agent_factory(
             name=name,
             agent_factory=agent_factory,
             tools=tools,
-            output_handler=output_handler,
+            force_output_handler=force_output_handler,
             chat_history=chat_history,
             input_as_messages=True,
             runnable_config=runnable_config,
diff --git a/motleycrew/agents/langchain/tool_calling_react_prompts.py b/motleycrew/agents/langchain/tool_calling_react_prompts.py
index 40be639b..7dddca99 100644
--- a/motleycrew/agents/langchain/tool_calling_react_prompts.py
+++ b/motleycrew/agents/langchain/tool_calling_react_prompts.py
@@ -59,7 +59,7 @@ class ToolCallingReActPromptsForOpenAI(ToolCallingReActPrompts):
 If the information so far is not sufficient to answer the question precisely and completely (rather than sloppily and approximately), don't hesitate to use tools again, until sufficient information is gathered.
 Don't stop this until you are certain that you have enough information to answer the question.
 {output_instruction}
-{output_handler}
+{output_handlers}
 
 Begin!
 """
@@ -72,9 +72,9 @@ class ToolCallingReActPromptsForOpenAI(ToolCallingReActPrompts):
 but without the backticks."""
 
     output_instruction_with_output_handler = """
-If you have sufficient information to answer the question, you must call the output handler tool.
+If you have sufficient information to answer the question, you must call the relevant tool.
 
-NEVER return the final answer directly, but always do it by CALLING this tool:
+NEVER return the final answer directly, but always do it by CALLING the relevant tool:
 """
 
     example_messages = [
@@ -108,8 +108,8 @@ class ToolCallingReActPromptsForOpenAI(ToolCallingReActPrompts):
 The reply must contain the tool call or calls that you described in the thought.
 TOOL CALLS WITHOUT A THOUGHT WILL NOT BE ACCEPTED!
 
-If you have sufficient information to answer the question, call the output handler tool:
-{output_handler}
+If you have sufficient information to answer the question, call the relevant tool:
+{output_handlers}
 
 Write your reply, starting with "Thought:":
 """
@@ -147,15 +147,15 @@ class ToolCallingReActPromptsForAnthropic(ToolCallingReActPrompts):
 If the information so far is not sufficient to answer the question precisely and completely (rather than sloppily and approximately), don't hesitate to use tools again, until sufficient information is gathered.
 Don't stop this until you are certain that you have enough information to answer the question.
 {output_instruction}
-{output_handler}
+{output_handlers}
 
 Begin!
 """
 
     output_instruction_with_output_handler = """
-If you have sufficient information to answer the question, you must call the output handler tool.
+If you have sufficient information to answer the question, you must call the relevant tool.
 
-NEVER return the final answer directly, but always do it by CALLING this tool:
+NEVER return the final answer directly, but always do it by CALLING the relevant tool:
 """
 
     output_instruction_without_output_handler = """
@@ -200,8 +200,8 @@ class ToolCallingReActPromptsForAnthropic(ToolCallingReActPrompts):
 The reply must contain the tool call or calls that you described in the thought.
 TOOL CALLS WITHOUT A THOUGHT WILL NOT BE ACCEPTED!
 
-If you have sufficient information to answer the question, call the output handler tool with the relevant arguments:
-{output_handler}
+If you have sufficient information to answer the question, call the relevant tool:
+{output_handlers}
 
 Write your reply, starting with `<thinking>`:
 """
diff --git a/motleycrew/agents/llama_index/llama_index.py b/motleycrew/agents/llama_index/llama_index.py
index aa1fbc70..7e2fddec 100644
--- a/motleycrew/agents/llama_index/llama_index.py
+++ b/motleycrew/agents/llama_index/llama_index.py
@@ -17,10 +17,10 @@
 
 from langchain_core.runnables import RunnableConfig
 
-from motleycrew.agents.parent import MotleyAgentParent, DirectOutput
-from motleycrew.common import MotleySupportedTool
-from motleycrew.common import MotleyAgentFactory
+from motleycrew.agents.parent import MotleyAgentParent
+from motleycrew.common import MotleySupportedTool, MotleyAgentFactory, AuxPrompts
 from motleycrew.common.utils import ensure_module_is_installed
+from motleycrew.tools import DirectOutput
 
 
 class LlamaIndexMotleyAgent(MotleyAgentParent):
@@ -33,7 +33,7 @@ def __init__(
         name: str | None = None,
         agent_factory: MotleyAgentFactory[AgentRunner] | None = None,
         tools: Sequence[MotleySupportedTool] | None = None,
-        output_handler: MotleySupportedTool | None = None,
+        force_output_handler: bool = False,
         verbose: bool = False,
     ):
         """
@@ -64,6 +64,9 @@ def __init__(
 
             tools: Tools to add to the agent.
 
+            force_output_handler: Whether to force the agent to return through an output handler.
+                If True, at least one tool must have return_direct set to True.
+
             output_handler: Output handler for the agent.
 
             verbose: Whether to log verbose output.
@@ -74,17 +77,35 @@ def __init__(
             name=name,
             agent_factory=agent_factory,
             tools=tools,
-            output_handler=output_handler,
+            force_output_handler=force_output_handler,
             verbose=verbose,
         )
 
+        self.direct_output = None
+
+    def _propagate_error_step(self, task_id: str, message: str):
+        error_step = TaskStep(
+            task_id=task_id,
+            step_id=str(uuid.uuid4()),
+            input=message,
+        )
+
+        step_queue = self._agent.state.get_step_queue(task_id)
+        step_queue.clear()
+        step_queue.extend([error_step])
+
     def _run_step_decorator(self):
         """Decorator for the ``AgentRunner._run_step`` method that catches DirectOutput exceptions.
 
-        It also blocks plain output and forces the use of the output handler tool if it is present.
+        It also blocks plain output and forces the use of the output handler tool if necessary.
+
+        Note that as of now, LlamaIndex agents only allow one tool call per step,
+        so we don't need to worry about ambiguous output handler calls.
         """
         ensure_module_is_installed("llama_index")
 
+        output_handlers = self.get_output_handlers()
+
         def decorator(func):
             def wrapper(
                 task_id: str,
@@ -105,25 +126,18 @@ def wrapper(
                     )
                     return cur_step_output
 
-                if self.output_handler is None:
+                if not output_handlers:
                     return cur_step_output
 
-                if cur_step_output.is_last:
+                if cur_step_output.is_last and self.force_output_handler:
                     cur_step_output.is_last = False
-                    task_id = cur_step_output.task_step.task_id
-                    output_task_step = TaskStep(
-                        task_id=task_id,
-                        step_id=str(uuid.uuid4()),
-                        input="You must call the `{}` tool to return the output.".format(
-                            self.output_handler.name
+                    self._propagate_error_step(
+                        task_id=cur_step_output.task_step.task_id,
+                        message=AuxPrompts.get_direct_output_error_message(
+                            output_handlers=output_handlers
                         ),
                     )
 
-                    cur_step_output.next_steps.append(output_task_step)
-
-                    step_queue = self._agent.state.get_step_queue(task_id)
-                    step_queue.extend(cur_step_output.next_steps)
-
                 return cur_step_output
 
             return wrapper
@@ -144,7 +158,7 @@ def invoke(
 
         output = self.agent.chat(prompt)
 
-        if self.output_handler:
+        if self.direct_output is not None:
             return self.direct_output.output
 
         return output.response
diff --git a/motleycrew/agents/llama_index/llama_index_react.py b/motleycrew/agents/llama_index/llama_index_react.py
index 6142d010..fbf7c211 100644
--- a/motleycrew/agents/llama_index/llama_index_react.py
+++ b/motleycrew/agents/llama_index/llama_index_react.py
@@ -27,8 +27,8 @@ def __init__(
         description: str | None = None,
         name: str | None = None,
         tools: Sequence[MotleySupportedTool] | None = None,
+        force_output_handler: bool = False,
         llm: LLM | None = None,
-        output_handler: MotleySupportedTool | None = None,
         verbose: bool = False,
         max_iterations: int = 10,
     ):
@@ -51,9 +51,10 @@ def __init__(
 
             tools: Tools to add to the agent.
 
-            llm: LLM instance to use.
+            force_output_handler: Whether to force the agent to return through an output handler.
+                If True, at least one tool must have return_direct set to True.
 
-            output_handler: Output handler for the agent.
+            llm: LLM instance to use.
 
             verbose: Whether to log verbose output.
 
@@ -82,6 +83,6 @@ def agent_factory(tools: dict[str, MotleyTool]) -> ReActAgent:
             name=name,
             agent_factory=agent_factory,
             tools=tools,
-            output_handler=output_handler,
+            force_output_handler=force_output_handler,
             verbose=verbose,
         )
diff --git a/motleycrew/agents/mixins.py b/motleycrew/agents/mixins.py
index 1c5c52d8..24012860 100644
--- a/motleycrew/agents/mixins.py
+++ b/motleycrew/agents/mixins.py
@@ -4,42 +4,52 @@
 from langchain_core.callbacks import CallbackManagerForChainRun
 from langchain_core.messages import AIMessage
 from langchain_core.runnables import RunnableConfig
-from langchain_core.tools import BaseTool, Tool
-from motleycrew.agents.parent import DirectOutput
-from motleycrew.tools import MotleyTool
+from langchain_core.tools import BaseTool, StructuredTool
+from motleycrew.tools import MotleyTool, DirectOutput
+from motleycrew.common import AuxPrompts
 
 
 class LangchainOutputHandlingAgentMixin:
     """A mixin for Langchain-based agents that support output handlers."""
 
-    output_handler: Optional[MotleyTool] = None
-    _agent_finish_blocker_tool: Optional[BaseTool] = None
+    _agent_error_tool: Optional[BaseTool] = None
+    get_output_handlers: Callable[[], List[MotleyTool]] = None
+    force_output_handler: bool = False
 
-    def _create_agent_finish_blocker_tool(self) -> BaseTool:
+    def _create_agent_error_tool(self) -> BaseTool:
         """Create a tool that will force the agent to retry if it attempts to return the output
-        bypassing the output handler.
+        bypassing tools.
         """
 
-        def create_agent_finish_blocking_message(input: Any = None) -> str:
-            return (
-                f"You must call the `{self.output_handler.name}` tool to return the final output.\n"
-            )
+        def return_error_message(message: str, error_message: str) -> str:
+            return error_message
 
-        return Tool.from_function(
-            name="agent_finish_blocker",
+        self._agent_error_tool = StructuredTool.from_function(
+            name="agent_error_tool",
             description="",
-            func=create_agent_finish_blocking_message,
+            func=return_error_message,
         )
 
-    def _is_blocker_action(self, action: AgentAction) -> bool:
-        """Checks whether the action of the response blocking tool"""
-        return bool(
-            isinstance(action, AgentAction) and action.tool == self._agent_finish_blocker_tool.name
+    def _create_error_action(self, message: str, error_message: str) -> AgentAction:
+        return AgentAction(
+            tool=self._agent_error_tool.name,
+            tool_input={
+                "message": message,
+                "error_message": error_message,
+            },
+            log=f"\nError in agent behavior, forcing retry: {error_message}\n",
         )
 
+    def _is_error_action(self, action: AgentAction) -> bool:
+        """Checks whether the action of the agent error tool"""
+        return bool(isinstance(action, AgentAction) and action.tool == self._agent_error_tool.name)
+
     def agent_plan_decorator(self, func: Callable):
         """Decorator for Agent.plan() method that intercepts AgentFinish events"""
 
+        output_handlers = self.get_output_handlers()
+        output_handler_names = set(handler.name for handler in output_handlers)
+
         def wrapper(
             intermediate_steps: List[Tuple[AgentAction, str]],
             callbacks: "Callbacks" = None,
@@ -47,33 +57,51 @@ def wrapper(
         ) -> Union[AgentAction, AgentFinish]:
             additional_notes = []
 
-            if self.output_handler:
-                to_remove_steps = []
-                for intermediate_step in intermediate_steps:
-                    action, action_output = intermediate_step
-                    if self._is_blocker_action(action):
-                        # Add the interaction telling the LLM that it must use the output handler
-                        additional_notes.append(("ai", action.tool_input))
-                        additional_notes.append(("user", action_output))
-                        to_remove_steps.append(intermediate_step)
+            to_remove_steps = []
+            for intermediate_step in intermediate_steps:
+                action, action_output = intermediate_step
+                if self._is_error_action(action):
+                    # Add the interaction telling the LLM that it errored
+                    additional_notes.append(("ai", action.tool_input["message"]))
+                    additional_notes.append(("user", action_output))
+                    to_remove_steps.append(intermediate_step)
 
-                for to_remove_step in to_remove_steps:
-                    intermediate_steps.remove(to_remove_step)
+            for to_remove_step in to_remove_steps:
+                intermediate_steps.remove(to_remove_step)
 
             if additional_notes:
                 kwargs["additional_notes"] = additional_notes
 
             step = func(intermediate_steps, callbacks, **kwargs)
 
-            if not isinstance(step, AgentFinish):
-                return step
-
-            if self.output_handler is not None:
-                return AgentAction(
-                    tool=self._agent_finish_blocker_tool.name,
-                    tool_input=step.log,
-                    log="\nDetected AgentFinish, blocking it to force output via output handler.\n",
-                )
+            if isinstance(step, AgentAction):
+                step = [step]
+
+            if output_handlers:
+                if isinstance(step, AgentFinish) and self.force_output_handler:
+                    # Attempted to return output directly, blocking
+                    return self._create_error_action(
+                        message=step.log,
+                        error_message=AuxPrompts.get_direct_output_error_message(output_handlers),
+                    )
+                try:
+                    step = list(step)
+                except TypeError:
+                    return step  # Not an iterable, so we can't check for output handlers
+
+                if len(step) <= 1:
+                    return step  # At most one action in the step
+
+                # Check whether there is at least one output handler in the step
+                for action in step:
+                    if action.tool in output_handler_names:
+                        # Attempted to call multiple output handlers or included other tool calls, blocking
+                        return self._create_error_action(
+                            message=step.log,
+                            error_message=AuxPrompts.get_ambiguous_output_handler_call_error_message(
+                                current_output_handler=action.tool, output_handlers=output_handlers
+                            ),
+                        )
             return step
 
         return wrapper
diff --git a/motleycrew/agents/output_handler.py b/motleycrew/agents/output_handler.py
deleted file mode 100644
index 26f936b1..00000000
--- a/motleycrew/agents/output_handler.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Optional
-
-from langchain_core.pydantic_v1 import BaseModel
-from langchain_core.tools import StructuredTool
-
-from motleycrew.agents.abstract_parent import MotleyAgentAbstractParent
-from motleycrew.common import Defaults
-from motleycrew.common.exceptions import InvalidOutput
-from motleycrew.tools import MotleyTool
-
-
-class MotleyOutputHandler(MotleyTool, ABC):
-    """Base class for output handler tools.
-
-    Output handler tools are used to process the final output of an agent.
-
-    For creating an output handler tool, inherit from this class and implement
-    the `handle_output` method.
-
-    Attributes:
-        _name: Name of the output handler tool.
-        _description: Description of the output handler tool.
-        _args_schema: Pydantic schema for the arguments of the output handler tool.
-            Inferred from the ``handle_output`` method signature if not provided.
-        _exceptions_to_handle: Exceptions that should be returned to the agent when raised.
-    """
-
-    _name: str = "output_handler"
-    _description: str = "Output handler. ONLY RETURN THE FINAL RESULT USING THIS TOOL!"
-    _args_schema: Optional[BaseModel] = None
-    _exceptions_to_handle: tuple[Exception] = (InvalidOutput,)
-
-    def __init__(self, max_iterations: int = Defaults.DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS):
-        """
-        Args:
-            max_iterations: Maximum number of iterations to run the output handler.
-                If an exception is raised in the ``handle_output`` method, the output handler
-                will return the exception to the agent unless the number of iterations exceeds
-                ``max_iterations``, in which case the output handler will raise
-                :class:`motleycrew.common.exceptions.OutputHandlerMaxIterationsExceeded`.
-        """
-        self.max_iterations = max_iterations
-        langchain_tool = self._create_langchain_tool()
-        super().__init__(langchain_tool)
-
-        self.agent: Optional[MotleyAgentAbstractParent] = None
-        self.agent_input: Optional[dict] = None
-
-    @property
-    def exceptions_to_handle(self):
-        return self._exceptions_to_handle
-
-    def _create_langchain_tool(self):
-        return StructuredTool.from_function(
-            name=self._name,
-            description=self._description,
-            args_schema=self._args_schema,
-            func=self.handle_output,
-        )
-
-    @abstractmethod
-    def handle_output(self, *args, **kwargs):
-        """Method for processing the final output of an agent.
-
-        Implement this method in your output handler tool.
-        """
-        pass
diff --git a/motleycrew/agents/parent.py b/motleycrew/agents/parent.py
index 4532866c..f32dd13d 100644
--- a/motleycrew/agents/parent.py
+++ b/motleycrew/agents/parent.py
@@ -1,29 +1,17 @@
 from __future__ import annotations
 
-import inspect
 from abc import ABC, abstractmethod
-from typing import (
-    TYPE_CHECKING,
-    Optional,
-    Sequence,
-    Any,
-    Union,
-)
+from typing import TYPE_CHECKING, Any, Optional, Sequence, Union
 
 from langchain_core.messages import BaseMessage
 from langchain_core.prompts.chat import ChatPromptTemplate, HumanMessage, SystemMessage
 from langchain_core.runnables import RunnableConfig
-from langchain_core.tools import StructuredTool
-from langchain_core.tools import Tool
+
 from motleycrew.agents.abstract_parent import MotleyAgentAbstractParent
-from motleycrew.agents.output_handler import MotleyOutputHandler
-from motleycrew.common import MotleyAgentFactory, MotleySupportedTool
-from motleycrew.common import logger, Defaults
+from motleycrew.common import MotleyAgentFactory, MotleySupportedTool, logger
 from motleycrew.common.exceptions import (
     AgentNotMaterialized,
     CannotModifyMaterializedAgent,
-    InvalidOutput,
-    OutputHandlerMaxIterationsExceeded,
 )
 from motleycrew.tools import MotleyTool
 
@@ -31,17 +19,6 @@
     from motleycrew import MotleyCrew
 
 
-class DirectOutput(BaseException):
-    """Auxiliary exception to return the agent output directly through the output handler.
-
-    When the output handler returns an output, this exception is raised with the output.
-    It is then handled by the agent, who should gracefully return the output to the user.
-    """
-
-    def __init__(self, output: Any):
-        self.output = output
-
-
 class MotleyAgentParent(MotleyAgentAbstractParent, ABC):
     """Parent class for all motleycrew agents.
 
@@ -61,9 +38,8 @@ def __init__(
         name: str | None = None,
         agent_factory: MotleyAgentFactory | None = None,
         tools: Sequence[MotleySupportedTool] | None = None,
-        output_handler: MotleySupportedTool | None = None,
+        force_output_handler: bool = False,
         verbose: bool = False,
-        agent_name: str | None = None,
     ):
         """
         Args:
@@ -85,7 +61,8 @@ def __init__(
 
                 See :class:`motleycrew.common.types.MotleyAgentFactory` for more details.
             tools: Tools to add to the agent.
-            output_handler: Output handler for the agent.
+            force_output_handler: Whether to force the agent to return through an output handler.
+                If True, at least one tool must have return_direct set to True.
             verbose: Whether to log verbose output.
         """
         self.name = name or description
@@ -93,7 +70,7 @@ def __init__(
         self.prompt_prefix = prompt_prefix
         self.agent_factory = agent_factory
         self.tools: dict[str, MotleyTool] = {}
-        self.output_handler = output_handler
+        self.force_output_handler = force_output_handler
         self.verbose = verbose
         self.crew: MotleyCrew | None = None
 
@@ -102,6 +79,8 @@ def __init__(
         if tools:
             self.add_tools(tools)
 
+        self._check_force_output_handler()
+
     def __repr__(self):
         return f"{self.__class__.__name__}(name={self.name})"
 
@@ -169,55 +148,16 @@ def is_materialized(self):
         """Whether the agent is materialized."""
         return self._agent is not None
 
-    def _prepare_output_handler(self) -> Optional[MotleyTool]:
-        """
-        Wraps the output handler in one more tool layer,
-        adding the necessary stuff for returning direct output through output handler.
-        """
-        if not self.output_handler:
-            return None
-
-        # TODO: make this neater by constructing MotleyOutputHandler from tools?
-        if isinstance(self.output_handler, MotleyOutputHandler):
-            exceptions_to_handle = self.output_handler.exceptions_to_handle
-            description = self.output_handler.description
-            max_iterations = self.output_handler.max_iterations
-
-        else:
-            exceptions_to_handle = (InvalidOutput,)
-            description = self.output_handler.description or f"Output handler"
-            assert isinstance(description, str)
-            description += "\n ONLY RETURN THE FINAL RESULT USING THIS TOOL!"
-            max_iterations = Defaults.DEFAULT_OUTPUT_HANDLER_MAX_ITERATIONS
-
-        iteration = 0
-
-        def handle_agent_output(*args, **kwargs):
-            assert self.output_handler
-            nonlocal iteration
-
-            try:
-                iteration += 1
-                output = self.output_handler._run(*args, **kwargs, config=RunnableConfig())
-            except exceptions_to_handle as exc:
-                if iteration <= max_iterations:
-                    return f"{exc.__class__.__name__}: {str(exc)}"
-                raise OutputHandlerMaxIterationsExceeded(
-                    last_call_args=args,
-                    last_call_kwargs=kwargs,
-                    last_exception=exc,
-                )
-
-            raise DirectOutput(output)
-
-        prepared_output_handler = StructuredTool(
-            name=self.output_handler.name,
-            description=description,
-            func=handle_agent_output,
-            args_schema=self.output_handler.args_schema,
-        )
-
-        return MotleyTool.from_langchain_tool(prepared_output_handler)
+    def get_output_handlers(self):
+        """Get all output handlers (tools with return_direct set to True)."""
+        return [tool for tool in self.tools.values() if tool.return_direct]
+
+    def _check_force_output_handler(self):
+        """If force_output_handler is set to True, at least one tool must have return_direct set to True."""
+        if self.force_output_handler and not self.get_output_handlers():
+            raise ValueError(
+                "force_return_through_tool is set to True, but no tools have return_direct set to True."
+            )
 
     def materialize(self):
         """Materialize the agent by creating the agent instance using the agent factory.
@@ -229,18 +169,7 @@ def materialize(self):
             return
         assert self.agent_factory, "Cannot materialize agent without a factory provided"
 
-        output_handler = self._prepare_output_handler()
-
-        if inspect.signature(self.agent_factory).parameters.get("output_handler"):
-            logger.info("Agent factory accepts output handler, passing it")
-            self._agent = self.agent_factory(tools=self.tools, output_handler=output_handler)
-        elif output_handler:
-            logger.info("Agent factory does not accept output handler, passing it as a tool")
-            tools_with_output_handler = self.tools.copy()
-            tools_with_output_handler[output_handler.name] = output_handler
-            self._agent = self.agent_factory(tools=tools_with_output_handler)
-        else:
-            self._agent = self.agent_factory(tools=self.tools)
+        self._agent = self.agent_factory(tools=self.tools)
 
     def prepare_for_invocation(self, input: dict, prompt_as_messages: bool = False) -> str:
         """Prepare the agent for invocation by materializing it and composing the prompt.
@@ -257,9 +186,9 @@ def prepare_for_invocation(self, input: dict, prompt_as_messages: bool = False)
         """
         self.materialize()
 
-        if isinstance(self.output_handler, MotleyOutputHandler):
-            self.output_handler.agent = self
-            self.output_handler.agent_input = input
+        for tool in self.tools.values():
+            tool.agent = self
+            tool.agent_input = input
 
         prompt = self.compose_prompt(input, input.get("prompt"), as_messages=prompt_as_messages)
         return prompt
@@ -276,36 +205,21 @@ def add_tools(self, tools: Sequence[MotleySupportedTool]):
         for t in tools:
             motley_tool = MotleyTool.from_supported_tool(t)
             if motley_tool.name not in self.tools:
+                if motley_tool.agent is not None:
+                    raise ValueError(
+                        f"Tool {motley_tool.name} already has an agent assigned to it, please use unique tool instances."
+                    )
                 self.tools[motley_tool.name] = motley_tool
 
-    def as_tool(self) -> MotleyTool:
-        """Convert the agent to a tool to be used by other agents via delegation.
+    def call_as_tool(self, *args, **kwargs):
+        """Method that is called when the agent is used as a tool by another agent."""
 
-        Returns:
-            The tool representation of the agent.
-        """
-
-        if not self.description:
-            raise ValueError("Agent must have a description to be called as a tool")
-
-        def call_agent(*args, **kwargs):
-            # TODO: this thing is hacky, we should have a better way to pass structured input
-            if args:
-                return self.invoke({"prompt": args[0]})
-            if len(kwargs) == 1:
-                return self.invoke({"prompt": list(kwargs.values())[0]})
-            return self.invoke(kwargs)
-
-        # To be specialized if we expect structured input
-        return MotleyTool.from_langchain_tool(
-            Tool(
-                name=self.name.replace(
-                    " ", "_"
-                ).lower(),  # OpenAI doesn't accept spaces in function names
-                description=self.description,
-                func=call_agent,
-            )
-        )
+        # TODO: this thing is hacky, we should have a better way to pass structured input
+        if args:
+            return self.invoke({"prompt": args[0]})
+        if len(kwargs) == 1:
+            return self.invoke({"prompt": list(kwargs.values())[0]})
+        return self.invoke(kwargs)
 
     @abstractmethod
     def invoke(
diff --git a/motleycrew/common/__init__.py b/motleycrew/common/__init__.py
index b1d9f4d8..33812d11 100644
--- a/motleycrew/common/__init__.py
+++ b/motleycrew/common/__init__.py
@@ -1,5 +1,6 @@
 """Common utilities, types, enums, exceptions, loggers etc."""
 
+from .aux_prompts import AuxPrompts
 from .defaults import Defaults
 from .enums import AsyncBackend
 from .enums import GraphStoreType
@@ -13,6 +14,7 @@
 from .types import MotleySupportedTool
 
 __all__ = [
+    "AuxPrompts",
     "Defaults",
     "MotleySupportedTool",
     "MotleyAgentFactory",
diff --git a/motleycrew/common/aux_prompts.py b/motleycrew/common/aux_prompts.py
new file mode 100644
index 00000000..7c638661
--- /dev/null
+++ b/motleycrew/common/aux_prompts.py
@@ -0,0 +1,44 @@
+from typing import List, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from motleycrew.common import MotleyTool
+
+
+class AuxPrompts:
+    """Singleton containing miscellaneous auxiliary prompts.
+    In rare cases where you need to customize these, you can modify them before instantiating your agents.
+    """
+
+    DIRECT_OUTPUT_ERROR_WITH_SINGLE_OUTPUT_HANDLER = (
+        "You must call the `{output_handler}` tool to return the final output."
+    )
+    DIRECT_OUTPUT_ERROR_WITH_MULTIPLE_OUTPUT_HANDLERS = (
+        "You must call one of the following tools to return the final output: {output_handlers}"
+    )
+    AMBIGUOUS_OUTPUT_HANDLER_CALL_ERROR = (
+        "You attempted to return output by calling `{current_output_handler}` tool, "
+        "but included other tool calls in your response. "
+        "You must only call one of the following tools to return: {output_handlers}."
+    )
+
+    @staticmethod
+    def get_direct_output_error_message(output_handlers: List["MotleyTool"]) -> str:
+        if len(output_handlers) == 1:
+            message = AuxPrompts.DIRECT_OUTPUT_ERROR_WITH_SINGLE_OUTPUT_HANDLER.format(
+                output_handler=output_handlers[0].name
+            )
+        else:
+            message = AuxPrompts.DIRECT_OUTPUT_ERROR_WITH_MULTIPLE_OUTPUT_HANDLERS.format(
+                output_handlers=", ".join([f"`{handler.name}`" for handler in output_handlers])
+            )
+
+        return message
+
+    @staticmethod
+    def get_ambiguous_output_handler_call_error_message(
+        current_output_handler: "MotleyTool", output_handlers: List["MotleyTool"]
+    ) -> str:
+        return AuxPrompts.AMBIGUOUS_OUTPUT_HANDLER_CALL_ERROR.format(
+            current_output_handler=current_output_handler.name,
+            output_handlers=", ".join([f"`{handler.name}`" for handler in output_handlers]),
+        )
diff --git a/motleycrew/common/defaults.py b/motleycrew/common/defaults.py
index 3618dd23..0e06cb97 100644
--- a/motleycrew/common/defaults.py
+++ b/motleycrew/common/defaults.py
@@ -19,7 +19,7 @@ class Defaults:
         "lunary": "pip install lunary",
         "aider": "pip install aider-chat",
         "pglast": "pip install pglast",
-        "crewai_tools": "pip install 'crewai[tools]'"
+        "crewai_tools": "pip install 'crewai[tools]'",
     }
 
     DEFAULT_NUM_THREADS = 4
diff --git a/motleycrew/common/exceptions.py b/motleycrew/common/exceptions.py
index 5d7dd432..f00d8af0 100644
--- a/motleycrew/common/exceptions.py
+++ b/motleycrew/common/exceptions.py
@@ -122,28 +122,3 @@ class InvalidOutput(Exception):
     """Raised in output handlers when an agent's output is not accepted."""
 
     pass
-
-
-class OutputHandlerMaxIterationsExceeded(BaseException):
-    """Raised when the output handler iterations limit is exceeded."""
-
-    def __init__(
-        self,
-        last_call_args: tuple,
-        last_call_kwargs: Dict[str, Any],
-        last_exception: Exception,
-    ):
-        """
-        Args:
-            last_call_args: Positional arguments with which the output handler was last called.
-            last_call_kwargs: Keyword arguments with which the output handler was last called.
-            last_exception: Exception that occurred during the last output handler iteration.
-        """
-        self.last_call_args = last_call_args
-        self.last_call_kwargs = last_call_kwargs
-        self.last_exception = last_exception
-
-    def __str__(self):
-        return "Maximum number of output handler iterations exceeded. Last exception: {}".format(
-            self.last_exception
-        )
diff --git a/motleycrew/common/types.py b/motleycrew/common/types.py
index 73e8dbe4..55a0c930 100644
--- a/motleycrew/common/types.py
+++ b/motleycrew/common/types.py
@@ -47,5 +47,4 @@ class MotleyAgentFactory(Protocol[AgentType]):
     def __call__(
         self,
         tools: dict[str, MotleyTool],
-        output_handler: Optional[MotleyTool] = None,
     ) -> AgentType: ...
diff --git a/motleycrew/tools/__init__.py b/motleycrew/tools/__init__.py
index ecab3a6c..b30eb206 100644
--- a/motleycrew/tools/__init__.py
+++ b/motleycrew/tools/__init__.py
@@ -1,6 +1,7 @@
 """MotleyTool class and tools library."""
 
 from motleycrew.tools.tool import MotleyTool
+from motleycrew.tools.tool import DirectOutput
 
 from .autogen_chat_tool import AutoGenChatTool
 from .code.postgresql_linter import PostgreSQLLinterTool
diff --git a/motleycrew/tools/autogen_chat_tool.py b/motleycrew/tools/autogen_chat_tool.py
index a551b12b..87af92a8 100644
--- a/motleycrew/tools/autogen_chat_tool.py
+++ b/motleycrew/tools/autogen_chat_tool.py
@@ -1,4 +1,4 @@
-from typing import Optional, Type, Callable, Any
+from typing import Optional, Type, Callable, Any, List
 
 from langchain_core.prompts import PromptTemplate
 from langchain_core.prompts.base import BasePromptTemplate
@@ -33,6 +33,8 @@ def __init__(
         recipient: ConversableAgent,
         result_extractor: Callable[[ChatResult], Any] = get_last_message,
         input_schema: Optional[Type[BaseModel]] = None,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
     ):
         """
         Args:
@@ -58,7 +60,11 @@ def __init__(
             result_extractor=result_extractor,
             input_schema=input_schema,
         )
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 def create_autogen_chat_tool(
diff --git a/motleycrew/tools/code/aider_tool.py b/motleycrew/tools/code/aider_tool.py
index 9634cc26..6682669e 100644
--- a/motleycrew/tools/code/aider_tool.py
+++ b/motleycrew/tools/code/aider_tool.py
@@ -1,3 +1,5 @@
+from typing import Optional, List
+
 from motleycrew.common.utils import ensure_module_is_installed
 
 try:
@@ -17,7 +19,13 @@
 class AiderTool(MotleyTool):
     """Tool for code generation using Aider."""
 
-    def __init__(self, model: str = None, **kwargs):
+    def __init__(
+        self,
+        model: str = None,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+        **kwargs
+    ):
         ensure_module_is_installed("aider")
 
         model = model or Defaults.DEFAULT_LLM_NAME
@@ -25,7 +33,11 @@ def __init__(self, model: str = None, **kwargs):
         coder = Coder.create(main_model=llm_model, **kwargs)
 
         langchain_tool = create_aider_tool(coder)
-        super(AiderTool, self).__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 class AiderToolInput(BaseModel):
diff --git a/motleycrew/tools/code/postgresql_linter.py b/motleycrew/tools/code/postgresql_linter.py
index 4ddfc88d..81e39b5e 100644
--- a/motleycrew/tools/code/postgresql_linter.py
+++ b/motleycrew/tools/code/postgresql_linter.py
@@ -1,3 +1,5 @@
+from typing import Optional, List
+
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.tools import Tool
 
@@ -16,11 +18,19 @@
 class PostgreSQLLinterTool(MotleyTool):
     """PostgreSQL code verification tool."""
 
-    def __init__(self):
+    def __init__(
+        self,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ):
         ensure_module_is_installed("pglast")
 
         langchain_tool = create_pgsql_linter_tool()
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 class PostgreSQLLinterInput(BaseModel):
diff --git a/motleycrew/tools/code/python_linter.py b/motleycrew/tools/code/python_linter.py
index 53851982..a48066a4 100644
--- a/motleycrew/tools/code/python_linter.py
+++ b/motleycrew/tools/code/python_linter.py
@@ -1,5 +1,5 @@
 import os
-from typing import Union
+from typing import Union, Optional, List
 
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.tools import StructuredTool
@@ -16,11 +16,19 @@
 class PythonLinterTool(MotleyTool):
     """Python code verification tool"""
 
-    def __init__(self):
+    def __init__(
+        self,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ):
         ensure_module_is_installed("aider")
 
         langchain_tool = create_python_linter_tool()
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 class PythonLinterInput(BaseModel):
diff --git a/motleycrew/tools/html_render_tool.py b/motleycrew/tools/html_render_tool.py
index 51da9f5e..ca70f116 100644
--- a/motleycrew/tools/html_render_tool.py
+++ b/motleycrew/tools/html_render_tool.py
@@ -1,6 +1,6 @@
 from datetime import datetime
 from pathlib import Path
-from typing import Tuple, Optional
+from typing import Tuple, Optional, List
 
 from motleycrew.common.utils import ensure_module_is_installed
 
@@ -105,6 +105,8 @@ def __init__(
         chromedriver_path: str | None = None,
         headless: bool = True,
         window_size: Optional[Tuple[int, int]] = None,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
     ):
         """
         Args:
@@ -118,7 +120,11 @@ def __init__(
             window_size=window_size,
         )
         langchain_tool = create_render_tool(renderer)
-        super(HTMLRenderTool, self).__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 class HTMLRenderToolInput(BaseModel):
diff --git a/motleycrew/tools/image/dall_e.py b/motleycrew/tools/image/dall_e.py
index 7cd4f41d..9b6cbda3 100644
--- a/motleycrew/tools/image/dall_e.py
+++ b/motleycrew/tools/image/dall_e.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, List
 
 from langchain.agents import Tool
 from langchain.prompts import PromptTemplate
@@ -39,6 +39,8 @@ def __init__(
         quality: str = "standard",
         size: str = "1024x1024",
         style: Optional[str] = None,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
     ):
         """
         Args:
@@ -59,7 +61,11 @@ def __init__(
             size=size,
             style=style,
         )
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 class DallEToolInput(BaseModel):
diff --git a/motleycrew/tools/image/replicate_tool.py b/motleycrew/tools/image/replicate_tool.py
index 81f9fc46..de0c4b58 100644
--- a/motleycrew/tools/image/replicate_tool.py
+++ b/motleycrew/tools/image/replicate_tool.py
@@ -51,7 +51,14 @@ class ImageToolInput(BaseModel):
 
 
 class ReplicateImageGeneratorTool(MotleyTool):
-    def __init__(self, model_name: str, images_directory: Optional[str] = None, **kwargs):
+    def __init__(
+        self,
+        model_name: str,
+        images_directory: Optional[str] = None,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+        **kwargs,
+    ):
         """
         A tool for generating images from text descriptions using the Replicate API.
         :param model_name: one of "sdxl", "flux-pro", "flux-dev", "flux-schnell", or a full model name supported by replicate
@@ -64,7 +71,11 @@ def __init__(self, model_name: str, images_directory: Optional[str] = None, **kw
             model_name=model_name, images_directory=images_directory, **kwargs
         )
 
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 def create_replicate_image_generator_langchain_tool(
diff --git a/motleycrew/tools/llm_tool.py b/motleycrew/tools/llm_tool.py
index b208c0e7..8cbf2d17 100644
--- a/motleycrew/tools/llm_tool.py
+++ b/motleycrew/tools/llm_tool.py
@@ -1,4 +1,4 @@
-from typing import Optional, Type
+from typing import Optional, Type, List
 
 from langchain_core.language_models import BaseLanguageModel
 from langchain_core.prompts import PromptTemplate
@@ -21,6 +21,8 @@ def __init__(
         prompt: str | BasePromptTemplate,
         llm: Optional[BaseLanguageModel] = None,
         input_schema: Optional[Type[BaseModel]] = None,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
     ):
         """
         Args:
@@ -40,7 +42,11 @@ def __init__(
             llm=llm,
             input_schema=input_schema,
         )
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 def create_llm_langchain_tool(
diff --git a/motleycrew/tools/mermaid_evaluator_tool.py b/motleycrew/tools/mermaid_evaluator_tool.py
index 3e8540bd..b3e6e431 100644
--- a/motleycrew/tools/mermaid_evaluator_tool.py
+++ b/motleycrew/tools/mermaid_evaluator_tool.py
@@ -4,7 +4,7 @@
 import os.path
 import subprocess
 import tempfile
-from typing import Optional
+from typing import Optional, List
 
 from langchain_core.pydantic_v1 import create_model, Field
 from langchain_core.tools import Tool
@@ -13,7 +13,12 @@
 
 
 class MermaidEvaluatorTool(MotleyTool):
-    def __init__(self, format: Optional[str] = "svg"):
+    def __init__(
+        self,
+        format: Optional[str] = "svg",
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ):
         def eval_mermaid_partial(mermaid_code: str):
             return eval_mermaid(mermaid_code, format)
 
@@ -26,7 +31,11 @@ def eval_mermaid_partial(mermaid_code: str):
                 mermaid_code=(str, Field(description="The Mermaid code to evaluate.")),
             ),
         )
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 def eval_mermaid(mermaid_code: str, format: Optional[str] = "svg") -> io.BytesIO:
@@ -81,7 +90,7 @@ def eval_mermaid(mermaid_code: str, format: Optional[str] = "svg") -> io.BytesIO
         C --> E[End]
         D --> E
         E[End] --> F[End]
-        
+
         [[
     """
 
diff --git a/motleycrew/tools/python_repl.py b/motleycrew/tools/python_repl.py
index eee7b432..71b648aa 100644
--- a/motleycrew/tools/python_repl.py
+++ b/motleycrew/tools/python_repl.py
@@ -1,6 +1,7 @@
 from langchain.agents import Tool
 from langchain_experimental.utilities import PythonREPL
 from langchain_core.pydantic_v1 import BaseModel, Field
+from typing import Optional, List
 
 from .tool import MotleyTool
 
@@ -12,9 +13,17 @@ class PythonREPLTool(MotleyTool):
     Because of this, any data you want to be in the output should be printed using `print(...)`.
     """
 
-    def __init__(self):
+    def __init__(
+        self,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ):
         langchain_tool = create_repl_tool()
-        super().__init__(langchain_tool)
+        super().__init__(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
 
 class REPLToolInput(BaseModel):
diff --git a/motleycrew/tools/simple_retriever_tool.py b/motleycrew/tools/simple_retriever_tool.py
index cba1be25..b8db5659 100644
--- a/motleycrew/tools/simple_retriever_tool.py
+++ b/motleycrew/tools/simple_retriever_tool.py
@@ -1,11 +1,12 @@
-import os.path
+import os
+from typing import List, Optional
 
 from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.tools import StructuredTool
 from llama_index.core import (
-    VectorStoreIndex,
     SimpleDirectoryReader,
     StorageContext,
+    VectorStoreIndex,
     load_index_from_storage,
 )
 from llama_index.core.node_parser import SentenceSplitter
@@ -18,7 +19,14 @@
 class SimpleRetrieverTool(MotleyTool):
     """A simple retriever tool that retrieves relevant documents from a local knowledge base."""
 
-    def __init__(self, data_dir: str, persist_dir: str, return_strings_only: bool = False):
+    def __init__(
+        self,
+        data_dir: str,
+        persist_dir: str,
+        return_strings_only: bool = False,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ):
         """
         Args:
             data_dir: Path to the directory containing the documents.
@@ -28,7 +36,9 @@ def __init__(self, data_dir: str, persist_dir: str, return_strings_only: bool =
         tool = make_retriever_langchain_tool(
             data_dir, persist_dir, return_strings_only=return_strings_only
         )
-        super().__init__(tool)
+        super().__init__(
+            tool=tool, return_direct=return_direct, exceptions_to_reflect=exceptions_to_reflect
+        )
 
 
 class RetrieverToolInput(BaseModel, arbitrary_types_allowed=True):
diff --git a/motleycrew/tools/tool.py b/motleycrew/tools/tool.py
index 7f78d657..a43b72b0 100644
--- a/motleycrew/tools/tool.py
+++ b/motleycrew/tools/tool.py
@@ -1,9 +1,12 @@
 import functools
 import inspect
-from typing import Callable, Union, Optional, Dict, Any
+from typing import Callable, Union, Optional, Dict, Any, List
 
-from langchain.tools import BaseTool
+from langchain.tools import BaseTool, Tool, StructuredTool
 from langchain_core.runnables import Runnable, RunnableConfig
+from langchain_core.pydantic_v1 import BaseModel
+
+from motleycrew.common.exceptions import InvalidOutput
 
 try:
     from llama_index.core.tools import BaseTool as LlamaIndex__BaseTool
@@ -19,24 +22,66 @@
     CrewAI__BaseTool = None
     Crewai__Tool = None
 
+from motleycrew.common import logger
 from motleycrew.common.utils import ensure_module_is_installed
 from motleycrew.common.types import MotleySupportedTool
 from motleycrew.agents.abstract_parent import MotleyAgentAbstractParent
 
 
+class DirectOutput(BaseException):
+    """Auxiliary exception to return a tool's output directly.
+
+    When the tool returns an output, this exception is raised with the output.
+    It is then handled by the agent, who should gracefully return the output to the user.
+    """
+
+    def __init__(self, output: Any):
+        self.output = output
+
+
 class MotleyTool(Runnable):
     """Base tool class compatible with MotleyAgents.
 
     It is a wrapper for Langchain BaseTool, containing all necessary adapters and converters.
     """
 
-    def __init__(self, tool: BaseTool):
+    def __init__(
+        self,
+        tool: Optional[BaseTool] = None,
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        args_schema: Optional[BaseModel] = None,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ):
         """Initialize the MotleyTool.
 
         Args:
+            name: Name of the tool (required if tool is None).
+            description: Description of the tool (required if tool is None).
+            args_schema: Schema of the tool arguments (required if tool is None).
             tool: Langchain BaseTool to wrap.
+            return_direct: If True, the tool's output will be returned directly to the user.
+            exceptions_to_reflect: List of exceptions to reflect back to the agent.
         """
-        self.tool = tool
+        if tool is None:
+            assert name is not None
+            assert description is not None
+            self.tool = self._tool_from_run_method(
+                name=name, description=description, args_schema=args_schema
+            )
+        else:
+            self.tool = tool
+
+        self.return_direct = return_direct
+        self.exceptions_to_reflect = exceptions_to_reflect or []
+        if InvalidOutput not in self.exceptions_to_reflect:
+            self.exceptions_to_reflect = [InvalidOutput, *self.exceptions_to_reflect]
+
+        self._patch_tool_run()
+
+        self.agent: Optional[MotleyAgentAbstractParent] = None
+        self.agent_input: Optional[dict] = None
 
     def __repr__(self):
         return f"MotleyTool(name={self.name})"
@@ -59,6 +104,27 @@ def args_schema(self):
         """Schema of the tool arguments."""
         return self.tool.args_schema
 
+    def _patch_tool_run(self):
+        """Patch the tool run method to reflect exceptions."""
+
+        original_run = self.tool._run
+        signature = inspect.signature(original_run)
+
+        @functools.wraps(original_run)
+        def patched_run(*args, **kwargs):
+            try:
+                result = original_run(*args, **kwargs)
+                if self.return_direct:
+                    raise DirectOutput(result)
+                else:
+                    return result
+            except tuple(self.exceptions_to_reflect or []) as e:
+                # we need to return the exception to the agent
+                return f"{e.__class__.__name__}: {e}"
+
+        patched_run.__signature__ = signature
+        object.__setattr__(self.tool, "_run", patched_run)
+
     def invoke(
         self,
         input: Union[str, Dict],
@@ -67,43 +133,91 @@ def invoke(
     ) -> Any:
         return self.tool.invoke(input=input, config=config, **kwargs)
 
-    def _run(self, *args: tuple, **kwargs: Dict[str, Any]) -> Any:
-        return self.tool._run(*args, **kwargs)
+    def run(self, *args, **kwargs):
+        pass
+
+    def _tool_from_run_method(self, name: str, description: str, args_schema: BaseModel):
+        return StructuredTool.from_function(
+            name=name,
+            description=description,
+            args_schema=args_schema,
+            func=self.run,
+        )
 
     @staticmethod
-    def from_langchain_tool(langchain_tool: BaseTool) -> "MotleyTool":
+    def from_langchain_tool(
+        langchain_tool: BaseTool,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ) -> "MotleyTool":
         """Create a MotleyTool from a Langchain tool.
 
         Args:
             langchain_tool: Langchain tool to convert.
+            return_direct: If True, the tool's output will be returned directly to the user.
+            exceptions_to_reflect: List of exceptions to reflect back to the agent.
 
         Returns:
             MotleyTool instance.
         """
+        if langchain_tool.return_direct:
+            logger.warning(
+                "Please set `return_direct` in MotleyTool instead of the tool you're converting. "
+                "Automatic conversion will be removed in motleycrew v1."
+            )
+            return_direct = True
+            langchain_tool.return_direct = False
 
-        return MotleyTool(tool=langchain_tool)
+        return MotleyTool(
+            tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
     @staticmethod
-    def from_llama_index_tool(llama_index_tool: LlamaIndex__BaseTool) -> "MotleyTool":
+    def from_llama_index_tool(
+        llama_index_tool: LlamaIndex__BaseTool,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ) -> "MotleyTool":
         """Create a MotleyTool from a LlamaIndex tool.
 
         Args:
             llama_index_tool: LlamaIndex tool to convert.
+            return_direct: If True, the tool's output will be returned directly to the user.
+            exceptions_to_reflect: List of exceptions to reflect back to the agent.
 
         Returns:
             MotleyTool instance.
         """
-
         ensure_module_is_installed("llama_index")
+        if llama_index_tool.metadata and llama_index_tool.metadata.return_direct:
+            logger.warning(
+                "Please set `return_direct` in MotleyTool instead of the tool you're converting. "
+                "Automatic conversion will be removed in motleycrew v1."
+            )
+            return_direct = True
+            llama_index_tool.metadata.return_direct = False
+
         langchain_tool = llama_index_tool.to_langchain_tool()
-        return MotleyTool.from_langchain_tool(langchain_tool=langchain_tool)
+        return MotleyTool.from_langchain_tool(
+            langchain_tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
     @staticmethod
-    def from_crewai_tool(crewai_tool: CrewAI__BaseTool) -> "MotleyTool":
+    def from_crewai_tool(
+        crewai_tool: CrewAI__BaseTool,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ) -> "MotleyTool":
         """Create a MotleyTool from a CrewAI tool.
 
         Args:
             crewai_tool: CrewAI tool to convert.
+            return_direct: If True, the tool's output will be returned directly to the user.
+            exceptions_to_reflect: List of exceptions to reflect back to the agent.
 
         Returns:
             MotleyTool instance.
@@ -115,29 +229,83 @@ def from_crewai_tool(crewai_tool: CrewAI__BaseTool) -> "MotleyTool":
         for old_symbol, new_symbol in [(" ", "_"), ("'", "")]:
             langchain_tool.name = langchain_tool.name.replace(old_symbol, new_symbol)
 
-        return MotleyTool.from_langchain_tool(langchain_tool=langchain_tool)
+        return MotleyTool.from_langchain_tool(
+            langchain_tool=langchain_tool,
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
+
+    @staticmethod
+    def from_motley_agent(
+        agent: MotleyAgentAbstractParent,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ) -> "MotleyTool":
+        """Convert an agent to a tool to be used by other agents via delegation.
+
+        Returns:
+            The tool representation of the agent.
+        """
+
+        if not getattr(agent, "name", None) or not getattr(agent, "description", None):
+            raise ValueError("Agent must have a name and description to be called as a tool")
+
+        # To be specialized if we expect structured input
+        return MotleyTool.from_langchain_tool(
+            Tool(
+                name=agent.name.replace(
+                    " ", "_"
+                ).lower(),  # OpenAI doesn't accept spaces in function names
+                description=agent.description,
+                func=agent.call_as_tool,
+            ),
+            return_direct=return_direct,
+            exceptions_to_reflect=exceptions_to_reflect,
+        )
 
     @staticmethod
-    def from_supported_tool(tool: MotleySupportedTool) -> "MotleyTool":
+    def from_supported_tool(
+        tool: MotleySupportedTool,
+        return_direct: bool = False,
+        exceptions_to_reflect: Optional[List[Exception]] = None,
+    ) -> "MotleyTool":
         """Create a MotleyTool from any supported tool type.
 
         Args:
             tool: Tool of any supported type.
                 Currently, we support tools from Langchain, LlamaIndex,
                 as well as motleycrew agents.
+            return_direct: If True, the tool's output will be returned directly to the user.
+            exceptions_to_reflect: List of exceptions to reflect back to the agent.
         Returns:
             MotleyTool instance.
         """
         if isinstance(tool, MotleyTool):
             return tool
         elif isinstance(tool, BaseTool):
-            return MotleyTool.from_langchain_tool(tool)
+            return MotleyTool.from_langchain_tool(
+                tool,
+                return_direct=return_direct,
+                exceptions_to_reflect=exceptions_to_reflect,
+            )
         elif isinstance(tool, LlamaIndex__BaseTool):
-            return MotleyTool.from_llama_index_tool(tool)
+            return MotleyTool.from_llama_index_tool(
+                tool,
+                return_direct=return_direct,
+                exceptions_to_reflect=exceptions_to_reflect,
+            )
         elif isinstance(tool, MotleyAgentAbstractParent):
-            return tool.as_tool()
+            return MotleyTool.from_motley_agent(
+                tool,
+                return_direct=return_direct,
+                exceptions_to_reflect=exceptions_to_reflect,
+            )
         elif CrewAI__BaseTool is not None and isinstance(tool, CrewAI__BaseTool):
-            return MotleyTool.from_crewai_tool(tool)
+            return MotleyTool.from_crewai_tool(
+                tool,
+                return_direct=return_direct,
+                exceptions_to_reflect=exceptions_to_reflect,
+            )
         else:
             raise Exception(
                 f"Tool type `{type(tool)}` is not supported, please convert to MotleyTool first"
diff --git a/pyproject.toml b/pyproject.toml
index bd2e5c85..221f43c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "motleycrew"
-version = "0.1.15"
+version = "0.2.0"
 description = "A lightweight agent interaction framework."
 authors = ["MotleyCrew <github@motleycrew.ai>"]
 readme = "README.md"
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/014f08100321035f9fc4983f8fb53b83ed047e91d85042aeeb27435860717e4c.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/014f08100321035f9fc4983f8fb53b83ed047e91d85042aeeb27435860717e4c.pkl
deleted file mode 100644
index f4e58225..00000000
Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/014f08100321035f9fc4983f8fb53b83ed047e91d85042aeeb27435860717e4c.pkl and /dev/null differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/0712062172491cb03ec930fd2382707ef39602e86cb7a0c272154790d7ceae55.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/0712062172491cb03ec930fd2382707ef39602e86cb7a0c272154790d7ceae55.pkl
new file mode 100644
index 00000000..37a504be
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/0712062172491cb03ec930fd2382707ef39602e86cb7a0c272154790d7ceae55.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/136484230e3ced440251c5a7615a8c062eeb3bbc2757337f37ed8c59eb77ae4d.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/136484230e3ced440251c5a7615a8c062eeb3bbc2757337f37ed8c59eb77ae4d.pkl
new file mode 100644
index 00000000..34cfe7a3
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/136484230e3ced440251c5a7615a8c062eeb3bbc2757337f37ed8c59eb77ae4d.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/1df514460900b4dd5e3c7a12acbb867b0f32ddc04df6aaa9b1cda5a91016fc2a.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/1df514460900b4dd5e3c7a12acbb867b0f32ddc04df6aaa9b1cda5a91016fc2a.pkl
new file mode 100644
index 00000000..ce62e3b2
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/1df514460900b4dd5e3c7a12acbb867b0f32ddc04df6aaa9b1cda5a91016fc2a.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/1f5194e558951f9f97da80f424ed48d2ab893d86cb9694475351af10e3ff67ea.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/1f5194e558951f9f97da80f424ed48d2ab893d86cb9694475351af10e3ff67ea.pkl
new file mode 100644
index 00000000..bf17c415
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/1f5194e558951f9f97da80f424ed48d2ab893d86cb9694475351af10e3ff67ea.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/2a0096f2f607c8a8508af610312b17e44958aa7b17f2b23cb8da8d9f2415c430.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/2a0096f2f607c8a8508af610312b17e44958aa7b17f2b23cb8da8d9f2415c430.pkl
new file mode 100644
index 00000000..be17c3b4
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/2a0096f2f607c8a8508af610312b17e44958aa7b17f2b23cb8da8d9f2415c430.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/2f2c6603c6de08874685f1f2ccd958b57f46287a9c2899da4ae80c6c625f19c3.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/2f2c6603c6de08874685f1f2ccd958b57f46287a9c2899da4ae80c6c625f19c3.pkl
new file mode 100644
index 00000000..2f050e28
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/2f2c6603c6de08874685f1f2ccd958b57f46287a9c2899da4ae80c6c625f19c3.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/3bd792002401b967b5bbfaa59e8227524fa3014824deafd5a24f435d4137f8f2.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/3bd792002401b967b5bbfaa59e8227524fa3014824deafd5a24f435d4137f8f2.pkl
new file mode 100644
index 00000000..55c54068
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/3bd792002401b967b5bbfaa59e8227524fa3014824deafd5a24f435d4137f8f2.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/3bdd7a9b20a356a026e58cf86a8a5a12dfe5fa10c23ec4d620f46df59e1d47a4.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/3bdd7a9b20a356a026e58cf86a8a5a12dfe5fa10c23ec4d620f46df59e1d47a4.pkl
new file mode 100644
index 00000000..d38bb68a
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/3bdd7a9b20a356a026e58cf86a8a5a12dfe5fa10c23ec4d620f46df59e1d47a4.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5248701366e0e9f110aa0778ea9a725b338fd699f0f00c577b5dc02a2ad0227b.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5248701366e0e9f110aa0778ea9a725b338fd699f0f00c577b5dc02a2ad0227b.pkl
deleted file mode 100644
index f6db6807..00000000
Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5248701366e0e9f110aa0778ea9a725b338fd699f0f00c577b5dc02a2ad0227b.pkl and /dev/null differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5c79d621367285269ce96840ff46e5f2ec65114e6f000cd998f5843eebb500bf.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5c79d621367285269ce96840ff46e5f2ec65114e6f000cd998f5843eebb500bf.pkl
new file mode 100644
index 00000000..413282ef
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/5c79d621367285269ce96840ff46e5f2ec65114e6f000cd998f5843eebb500bf.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/6070d30adc54f1416f60ffbd439da0ee8f3d3b098b651793f22f7415d2a0259f.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/6070d30adc54f1416f60ffbd439da0ee8f3d3b098b651793f22f7415d2a0259f.pkl
new file mode 100644
index 00000000..8d1ded8f
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/6070d30adc54f1416f60ffbd439da0ee8f3d3b098b651793f22f7415d2a0259f.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/67d7a848b7d177f20f162450c158ee521946677b2a0346a5472d50f23c6116b5.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/67d7a848b7d177f20f162450c158ee521946677b2a0346a5472d50f23c6116b5.pkl
new file mode 100644
index 00000000..e3915fed
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/67d7a848b7d177f20f162450c158ee521946677b2a0346a5472d50f23c6116b5.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/6f37cc5bc8a7536c11c4b15e03fe6798884628de6384b70949514e88732e6f8b.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/6f37cc5bc8a7536c11c4b15e03fe6798884628de6384b70949514e88732e6f8b.pkl
new file mode 100644
index 00000000..2aa2cb39
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/6f37cc5bc8a7536c11c4b15e03fe6798884628de6384b70949514e88732e6f8b.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/8810f1025d35710afece279e37023e91adef962f5f75ef31c6573cc48cbf69e6.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/8810f1025d35710afece279e37023e91adef962f5f75ef31c6573cc48cbf69e6.pkl
new file mode 100644
index 00000000..aba77eb9
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/8810f1025d35710afece279e37023e91adef962f5f75ef31c6573cc48cbf69e6.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/905f34884ba0a5abafeecc7259f07e3c6de7b0eb64ebcb6cf746aefe61911e09.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/905f34884ba0a5abafeecc7259f07e3c6de7b0eb64ebcb6cf746aefe61911e09.pkl
new file mode 100644
index 00000000..ede12bec
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/905f34884ba0a5abafeecc7259f07e3c6de7b0eb64ebcb6cf746aefe61911e09.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/9d00c2f9d3e8552ad9b76505a4f8d077a87abf5709b9561992571584df2d4359.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/9d00c2f9d3e8552ad9b76505a4f8d077a87abf5709b9561992571584df2d4359.pkl
new file mode 100644
index 00000000..7481fb18
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/9d00c2f9d3e8552ad9b76505a4f8d077a87abf5709b9561992571584df2d4359.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/a453be175943e11876fe4cca7d2465c3c359ad979090e79e61f42578b680c6a6.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/a453be175943e11876fe4cca7d2465c3c359ad979090e79e61f42578b680c6a6.pkl
new file mode 100644
index 00000000..c84efdf0
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/a453be175943e11876fe4cca7d2465c3c359ad979090e79e61f42578b680c6a6.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/ad8e7269a5ec0fc96aa55673ca54e874b5fb6db926347deb35d9a52366a1e4fa.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/ad8e7269a5ec0fc96aa55673ca54e874b5fb6db926347deb35d9a52366a1e4fa.pkl
deleted file mode 100644
index e045932d..00000000
Binary files a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/ad8e7269a5ec0fc96aa55673ca54e874b5fb6db926347deb35d9a52366a1e4fa.pkl and /dev/null differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b5db899d0fb1decdb1a8e9f2ab828d1baf879a7c180947cecab8cc2fe4a4a82d.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b5db899d0fb1decdb1a8e9f2ab828d1baf879a7c180947cecab8cc2fe4a4a82d.pkl
new file mode 100644
index 00000000..0c1a98a5
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b5db899d0fb1decdb1a8e9f2ab828d1baf879a7c180947cecab8cc2fe4a4a82d.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b7eac472c165b5f8c7233d36a27733750fc42a46405cdc2001cd27a3c7913977.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b7eac472c165b5f8c7233d36a27733750fc42a46405cdc2001cd27a3c7913977.pkl
new file mode 100644
index 00000000..0fc96397
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/b7eac472c165b5f8c7233d36a27733750fc42a46405cdc2001cd27a3c7913977.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/c20ea0fd20a843463c02b54e1f00ffcba546040e37a8ef94b57824ce7e702982.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/c20ea0fd20a843463c02b54e1f00ffcba546040e37a8ef94b57824ce7e702982.pkl
new file mode 100644
index 00000000..5a4646d5
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/c20ea0fd20a843463c02b54e1f00ffcba546040e37a8ef94b57824ce7e702982.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/cd27af3d4d6d4cd6cb10ad4867cfdf8130fc8c2468ebd3b815368e912adb2b4f.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/cd27af3d4d6d4cd6cb10ad4867cfdf8130fc8c2468ebd3b815368e912adb2b4f.pkl
new file mode 100644
index 00000000..aecc3d51
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/cd27af3d4d6d4cd6cb10ad4867cfdf8130fc8c2468ebd3b815368e912adb2b4f.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d094bed3c5949fc49cc184d3e46e2c143832a1513f45eea1017f08a145c88ed6.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d094bed3c5949fc49cc184d3e46e2c143832a1513f45eea1017f08a145c88ed6.pkl
new file mode 100644
index 00000000..ba4c685c
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d094bed3c5949fc49cc184d3e46e2c143832a1513f45eea1017f08a145c88ed6.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d1300e99e32d0d8d912df7dbcf74f8ed509c013980552330f3e4c2989e58937a.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d1300e99e32d0d8d912df7dbcf74f8ed509c013980552330f3e4c2989e58937a.pkl
new file mode 100644
index 00000000..ce099366
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d1300e99e32d0d8d912df7dbcf74f8ed509c013980552330f3e4c2989e58937a.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d35e2c3b76b1ed8ae59b1d00d37438e9ba2f14394529abd412854af4833570c6.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d35e2c3b76b1ed8ae59b1d00d37438e9ba2f14394529abd412854af4833570c6.pkl
new file mode 100644
index 00000000..b3dcc470
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d35e2c3b76b1ed8ae59b1d00d37438e9ba2f14394529abd412854af4833570c6.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d67a3767551bd0eb48d8886ea73c042d5aab06ce80b02a19b9d185cb768f7853.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d67a3767551bd0eb48d8886ea73c042d5aab06ce80b02a19b9d185cb768f7853.pkl
new file mode 100644
index 00000000..57953d75
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/d67a3767551bd0eb48d8886ea73c042d5aab06ce80b02a19b9d185cb768f7853.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/e1a3bb23dc12367a769c2d8842c04caf9f3fe33660e3615c92083846293315f4.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/e1a3bb23dc12367a769c2d8842c04caf9f3fe33660e3615c92083846293315f4.pkl
new file mode 100644
index 00000000..74045639
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/e1a3bb23dc12367a769c2d8842c04caf9f3fe33660e3615c92083846293315f4.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/f765da6c57ffb7a40aff518bc0197ac7b450da3cec20c25d742dec4633276e86.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/f765da6c57ffb7a40aff518bc0197ac7b450da3cec20c25d742dec4633276e86.pkl
new file mode 100644
index 00000000..aec7c7d1
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/f765da6c57ffb7a40aff518bc0197ac7b450da3cec20c25d742dec4633276e86.pkl differ
diff --git a/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/fb926a35c88bec489b9b7f1996704dc265be0ab5ba6186b005786e3d3e86d269.pkl b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/fb926a35c88bec489b9b7f1996704dc265be0ab5ba6186b005786e3d3e86d269.pkl
new file mode 100644
index 00000000..2402c557
Binary files /dev/null and b/tests/itest_cache/advanced_output_handling_ipynb/api.openai.com/v1_chat_completions/fb926a35c88bec489b9b7f1996704dc265be0ab5ba6186b005786e3d3e86d269.pkl differ
diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/47cadb0df545a72626847d363aa11ec5911fb923cd16912ea6e3c5b0f1f4e8ba.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/47cadb0df545a72626847d363aa11ec5911fb923cd16912ea6e3c5b0f1f4e8ba.pkl
new file mode 100644
index 00000000..5a97cb5d
Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/47cadb0df545a72626847d363aa11ec5911fb923cd16912ea6e3c5b0f1f4e8ba.pkl differ
diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/4a150caa95de6680a612999caaa5a4e19dc8b79fc141a408c0656aef8dfaadd4.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/4a150caa95de6680a612999caaa5a4e19dc8b79fc141a408c0656aef8dfaadd4.pkl
deleted file mode 100644
index 3182d0b0..00000000
Binary files a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/4a150caa95de6680a612999caaa5a4e19dc8b79fc141a408c0656aef8dfaadd4.pkl and /dev/null differ
diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/73a30cf3c7dfcb9103d58861f34bc3188f8422ce69f3d5cb7396946e59bf7159.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/73a30cf3c7dfcb9103d58861f34bc3188f8422ce69f3d5cb7396946e59bf7159.pkl
new file mode 100644
index 00000000..427fa418
Binary files /dev/null and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/73a30cf3c7dfcb9103d58861f34bc3188f8422ce69f3d5cb7396946e59bf7159.pkl differ
diff --git a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/d4ca244caaf6952544bda13063a022efbe85314c8a1053edbdd202994ad7d2e1.pkl b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/d4ca244caaf6952544bda13063a022efbe85314c8a1053edbdd202994ad7d2e1.pkl
index e058a1e5..ca65ad36 100644
Binary files a/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/d4ca244caaf6952544bda13063a022efbe85314c8a1053edbdd202994ad7d2e1.pkl and b/tests/itest_cache/math_via_python_code_with_a_single_agent_ipynb/api.openai.com/v1_chat_completions/d4ca244caaf6952544bda13063a022efbe85314c8a1053edbdd202994ad7d2e1.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/0663f699c41bd5deabd7e17fdb9e0f28e261a57e060dbae105103cd04bda5e92.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/0663f699c41bd5deabd7e17fdb9e0f28e261a57e060dbae105103cd04bda5e92.pkl
new file mode 100644
index 00000000..be976b7f
Binary files /dev/null and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/0663f699c41bd5deabd7e17fdb9e0f28e261a57e060dbae105103cd04bda5e92.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/0d2cbb7d4698f271b67726a36ad6888892160387668977dbb95ac0269d70212b.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/0d2cbb7d4698f271b67726a36ad6888892160387668977dbb95ac0269d70212b.pkl
deleted file mode 100644
index 934ba7aa..00000000
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/0d2cbb7d4698f271b67726a36ad6888892160387668977dbb95ac0269d70212b.pkl and /dev/null differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1208f8c6f73f97c5ef7eae5e47ccd5ee4870301a7569dca0cf1981533b50967a.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1208f8c6f73f97c5ef7eae5e47ccd5ee4870301a7569dca0cf1981533b50967a.pkl
deleted file mode 100644
index 3e0805c7..00000000
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1208f8c6f73f97c5ef7eae5e47ccd5ee4870301a7569dca0cf1981533b50967a.pkl and /dev/null differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1992c5f8659c96a24fbd52e87e2ac2896101c53742f5465305388e0dd992733b.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1992c5f8659c96a24fbd52e87e2ac2896101c53742f5465305388e0dd992733b.pkl
index 6ba3b485..4380859b 100644
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1992c5f8659c96a24fbd52e87e2ac2896101c53742f5465305388e0dd992733b.pkl and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1992c5f8659c96a24fbd52e87e2ac2896101c53742f5465305388e0dd992733b.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1bddd825359810b3696808da7ffd5b248d119625f4e1b9e1dc2b673f3f2af80a.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1bddd825359810b3696808da7ffd5b248d119625f4e1b9e1dc2b673f3f2af80a.pkl
new file mode 100644
index 00000000..8d9c28ca
Binary files /dev/null and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/1bddd825359810b3696808da7ffd5b248d119625f4e1b9e1dc2b673f3f2af80a.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/266b1164bbff5e9e0921e47783406ea7d0d9c0bbb23f420a8628881c097bcac9.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/266b1164bbff5e9e0921e47783406ea7d0d9c0bbb23f420a8628881c097bcac9.pkl
index c084b560..9032342a 100644
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/266b1164bbff5e9e0921e47783406ea7d0d9c0bbb23f420a8628881c097bcac9.pkl and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/266b1164bbff5e9e0921e47783406ea7d0d9c0bbb23f420a8628881c097bcac9.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/7efe9c87006ead252b4e89753f6dbd78099ef723fed4c248f1c5cac47ae67fe4.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/7efe9c87006ead252b4e89753f6dbd78099ef723fed4c248f1c5cac47ae67fe4.pkl
deleted file mode 100644
index a4719fe2..00000000
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/7efe9c87006ead252b4e89753f6dbd78099ef723fed4c248f1c5cac47ae67fe4.pkl and /dev/null differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/80ee04d19d127829039a45102509b07a13b95bfbff9067fdd71972f121c3c537.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/80ee04d19d127829039a45102509b07a13b95bfbff9067fdd71972f121c3c537.pkl
new file mode 100644
index 00000000..1856eb8d
Binary files /dev/null and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/80ee04d19d127829039a45102509b07a13b95bfbff9067fdd71972f121c3c537.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/a4e03783c2923004596a616e0437c42912ab9e3096d76c1790379079af068be6.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/a4e03783c2923004596a616e0437c42912ab9e3096d76c1790379079af068be6.pkl
new file mode 100644
index 00000000..909ebfe8
Binary files /dev/null and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/a4e03783c2923004596a616e0437c42912ab9e3096d76c1790379079af068be6.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/5a5c08ebd048b63719e72c710bbb955aeadbc7562fe140de29ed8ec769af74c0.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/aac249bdc8c9fb5aec6c78017a978a7ab648e55ce18706546647d2584aef679d.pkl
similarity index 85%
rename from tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/5a5c08ebd048b63719e72c710bbb955aeadbc7562fe140de29ed8ec769af74c0.pkl
rename to tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/aac249bdc8c9fb5aec6c78017a978a7ab648e55ce18706546647d2584aef679d.pkl
index eb1acba9..968434d4 100644
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/5a5c08ebd048b63719e72c710bbb955aeadbc7562fe140de29ed8ec769af74c0.pkl and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/aac249bdc8c9fb5aec6c78017a978a7ab648e55ce18706546647d2584aef679d.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/9b2a58e95f8ade6fa4269357c71fac229d9061294e10fe0814bde7527c783259.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/ac81edb9262f0d9555b5dc890992350a9e548d76b336da77acd9381c36aced9c.pkl
similarity index 86%
rename from tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/9b2a58e95f8ade6fa4269357c71fac229d9061294e10fe0814bde7527c783259.pkl
rename to tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/ac81edb9262f0d9555b5dc890992350a9e548d76b336da77acd9381c36aced9c.pkl
index 2eb2a7c8..5349a3f6 100644
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/9b2a58e95f8ade6fa4269357c71fac229d9061294e10fe0814bde7527c783259.pkl and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/ac81edb9262f0d9555b5dc890992350a9e548d76b336da77acd9381c36aced9c.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/e63b66ec839298d91654a974a3104914215e5a94c26a87ad1d8dad3881459b11.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/e63b66ec839298d91654a974a3104914215e5a94c26a87ad1d8dad3881459b11.pkl
index 6e1c5ce7..45bdedf7 100644
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/e63b66ec839298d91654a974a3104914215e5a94c26a87ad1d8dad3881459b11.pkl and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/e63b66ec839298d91654a974a3104914215e5a94c26a87ad1d8dad3881459b11.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/fb3c5a9c7279a6e7ac1e197d8308ad7bd6fb92656d073273386ff065b1360b50.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/fb3c5a9c7279a6e7ac1e197d8308ad7bd6fb92656d073273386ff065b1360b50.pkl
deleted file mode 100644
index 040d0a07..00000000
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_chat_completions/fb3c5a9c7279a6e7ac1e197d8308ad7bd6fb92656d073273386ff065b1360b50.pkl and /dev/null differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/1551c2f735715ff74e73ddedff891d47760b8bb08ed97fbfac4279526f47aa37.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/1551c2f735715ff74e73ddedff891d47760b8bb08ed97fbfac4279526f47aa37.pkl
index 520e356b..8c23391b 100644
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/1551c2f735715ff74e73ddedff891d47760b8bb08ed97fbfac4279526f47aa37.pkl and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/1551c2f735715ff74e73ddedff891d47760b8bb08ed97fbfac4279526f47aa37.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/1a96aee6c5f7c33e66ca5205357db8af45ba31b8e81d8697242e10a6c3f38c99.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/1a96aee6c5f7c33e66ca5205357db8af45ba31b8e81d8697242e10a6c3f38c99.pkl
deleted file mode 100644
index 553bbda9..00000000
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/1a96aee6c5f7c33e66ca5205357db8af45ba31b8e81d8697242e10a6c3f38c99.pkl and /dev/null differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/32bdf511a02dbea94543e8b8b06b545e86c78cea55ce7d7f867bf25540899372.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/32bdf511a02dbea94543e8b8b06b545e86c78cea55ce7d7f867bf25540899372.pkl
new file mode 100644
index 00000000..578aaad5
Binary files /dev/null and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/32bdf511a02dbea94543e8b8b06b545e86c78cea55ce7d7f867bf25540899372.pkl differ
diff --git a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/af9a6801a473bb2052a04f58adb99b30be8a24fb8ec999cd1a2e99a5e50ee8f2.pkl b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/af9a6801a473bb2052a04f58adb99b30be8a24fb8ec999cd1a2e99a5e50ee8f2.pkl
index dc332fca..3b553f9b 100644
Binary files a/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/af9a6801a473bb2052a04f58adb99b30be8a24fb8ec999cd1a2e99a5e50ee8f2.pkl and b/tests/itest_cache/multi_step_research_agent_ipynb/api.openai.com/v1_embeddings/af9a6801a473bb2052a04f58adb99b30be8a24fb8ec999cd1a2e99a5e50ee8f2.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/230d818bb0662e39c2a693f0614757941a7f4e7092293e92976b383e08620413.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/230d818bb0662e39c2a693f0614757941a7f4e7092293e92976b383e08620413.pkl
new file mode 100644
index 00000000..62f591d6
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/230d818bb0662e39c2a693f0614757941a7f4e7092293e92976b383e08620413.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/2525910f9a8be68780cc96d9a3a51be1edb95c6391e94907be3586aa0c80faf5.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/2525910f9a8be68780cc96d9a3a51be1edb95c6391e94907be3586aa0c80faf5.pkl
deleted file mode 100644
index b36a7ab5..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/2525910f9a8be68780cc96d9a3a51be1edb95c6391e94907be3586aa0c80faf5.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/810e1ea90e234c3ed4f2177aa9b49333fe1268c5b5af9ca909e54561b295cfff.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/810e1ea90e234c3ed4f2177aa9b49333fe1268c5b5af9ca909e54561b295cfff.pkl
new file mode 100644
index 00000000..241c453c
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/810e1ea90e234c3ed4f2177aa9b49333fe1268c5b5af9ca909e54561b295cfff.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/8b7f29a38bd51c0f398155303c14b9b101cd681a69b0949cfa9583a7a4615b68.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/8b7f29a38bd51c0f398155303c14b9b101cd681a69b0949cfa9583a7a4615b68.pkl
deleted file mode 100644
index 392245ba..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/8b7f29a38bd51c0f398155303c14b9b101cd681a69b0949cfa9583a7a4615b68.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/8d9ae0e43b75e5010f64602f1c838d6ec25adb96334257e8e5ab5641614d141a.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/8d9ae0e43b75e5010f64602f1c838d6ec25adb96334257e8e5ab5641614d141a.pkl
new file mode 100644
index 00000000..f47a83f6
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/8d9ae0e43b75e5010f64602f1c838d6ec25adb96334257e8e5ab5641614d141a.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/cceea8f2031b1897f884fdda8583530eba45b0f87abcfa26f5fcc921baa7792b.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/cceea8f2031b1897f884fdda8583530eba45b0f87abcfa26f5fcc921baa7792b.pkl
new file mode 100644
index 00000000..1a39fbe6
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/cceea8f2031b1897f884fdda8583530eba45b0f87abcfa26f5fcc921baa7792b.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/ec502433baf8defc2c6c0cc1ca4268706faf7c0f5bb1fd216066f6d0a39841da.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/ec502433baf8defc2c6c0cc1ca4268706faf7c0f5bb1fd216066f6d0a39841da.pkl
new file mode 100644
index 00000000..81162850
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/ec502433baf8defc2c6c0cc1ca4268706faf7c0f5bb1fd216066f6d0a39841da.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/ecf3a8246794bf81f15a4d29a60b65e3945b045a51152805e434bc67312ccc5b.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/ecf3a8246794bf81f15a4d29a60b65e3945b045a51152805e434bc67312ccc5b.pkl
deleted file mode 100644
index a3553ae8..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/ecf3a8246794bf81f15a4d29a60b65e3945b045a51152805e434bc67312ccc5b.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/fcef4d2f2fcb27097d800ca9c013cf22c2d5809a34b09b4554abf047e7067111.pkl b/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/fcef4d2f2fcb27097d800ca9c013cf22c2d5809a34b09b4554abf047e7067111.pkl
deleted file mode 100644
index 694c82a1..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/api.openai.com/v1_chat_completions/fcef4d2f2fcb27097d800ca9c013cf22c2d5809a34b09b4554abf047e7067111.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/11b12de29343ad13c7e92e0c7d663865d626ca4d5942e557b06748f3b4e1c6f7.pkl b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/11b12de29343ad13c7e92e0c7d663865d626ca4d5942e557b06748f3b4e1c6f7.pkl
new file mode 100644
index 00000000..451a5452
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/11b12de29343ad13c7e92e0c7d663865d626ca4d5942e557b06748f3b4e1c6f7.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/23d71aa00691d8ad88207559f45cc39de0462708ffcef3edcfe098a91935baa9.pkl b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/23d71aa00691d8ad88207559f45cc39de0462708ffcef3edcfe098a91935baa9.pkl
deleted file mode 100644
index 52d2862f..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/23d71aa00691d8ad88207559f45cc39de0462708ffcef3edcfe098a91935baa9.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/48b9aea6597b2b544f9d167f8df886317a4fe5996c26ea69a49602e57c287b0d.pkl b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/48b9aea6597b2b544f9d167f8df886317a4fe5996c26ea69a49602e57c287b0d.pkl
new file mode 100644
index 00000000..8880bd20
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/48b9aea6597b2b544f9d167f8df886317a4fe5996c26ea69a49602e57c287b0d.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/4bf4bfbbfa93b08789016604fbfdfed9ebb824699c8ff1c7c9957d833ac22da1.pkl b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/4bf4bfbbfa93b08789016604fbfdfed9ebb824699c8ff1c7c9957d833ac22da1.pkl
index 5552575f..b260fd47 100644
Binary files a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/4bf4bfbbfa93b08789016604fbfdfed9ebb824699c8ff1c7c9957d833ac22da1.pkl and b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/4bf4bfbbfa93b08789016604fbfdfed9ebb824699c8ff1c7c9957d833ac22da1.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/69eb3c50fefdb30df9933fdb38dfdb83a3a3826cf6a4d52c0669f1e995909e3e.pkl b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/69eb3c50fefdb30df9933fdb38dfdb83a3a3826cf6a4d52c0669f1e995909e3e.pkl
deleted file mode 100644
index 4ad3ca6e..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/69eb3c50fefdb30df9933fdb38dfdb83a3a3826cf6a4d52c0669f1e995909e3e.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/7e65f2bfa0981b94c5e22eb8375d89682a7d53ce46f8e37e9a033804852df486.pkl b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/7e65f2bfa0981b94c5e22eb8375d89682a7d53ce46f8e37e9a033804852df486.pkl
deleted file mode 100644
index 42a293fa..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/7e65f2bfa0981b94c5e22eb8375d89682a7d53ce46f8e37e9a033804852df486.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/bb5cf6cd49399a3ff65e6504e84814fe7ad05388370491f7a283c2aed656b5fd.pkl b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/bb5cf6cd49399a3ff65e6504e84814fe7ad05388370491f7a283c2aed656b5fd.pkl
new file mode 100644
index 00000000..07c15df9
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/duckduckgo.com/bb5cf6cd49399a3ff65e6504e84814fe7ad05388370491f7a283c2aed656b5fd.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/180f2947df4587bf479b5a2b43ede090ff133904346d594bb64b55c0415a8e3b.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/180f2947df4587bf479b5a2b43ede090ff133904346d594bb64b55c0415a8e3b.pkl
new file mode 100644
index 00000000..fceb6b69
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/180f2947df4587bf479b5a2b43ede090ff133904346d594bb64b55c0415a8e3b.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/3f5ae169ab31275916f9ace32f21e2b33e283032499b1c3e889871151b26f815.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/3f5ae169ab31275916f9ace32f21e2b33e283032499b1c3e889871151b26f815.pkl
new file mode 100644
index 00000000..88ce8fa1
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/3f5ae169ab31275916f9ace32f21e2b33e283032499b1c3e889871151b26f815.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/4fc2a13e6ad38494603204ae36b1c8fe8821e1b0f9d731d9938c694e95f1965e.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/4fc2a13e6ad38494603204ae36b1c8fe8821e1b0f9d731d9938c694e95f1965e.pkl
deleted file mode 100644
index 4b358935..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/4fc2a13e6ad38494603204ae36b1c8fe8821e1b0f9d731d9938c694e95f1965e.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/79241783639f4441e833e51402a354e850c20633a9fd00222466050d3fdcb3cb.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/79241783639f4441e833e51402a354e850c20633a9fd00222466050d3fdcb3cb.pkl
new file mode 100644
index 00000000..6c9da007
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/79241783639f4441e833e51402a354e850c20633a9fd00222466050d3fdcb3cb.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/8fef518aaa95949f13c9afd8fe27e18f0e54aec16f3a24b37bc42fd98657582b.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/8fef518aaa95949f13c9afd8fe27e18f0e54aec16f3a24b37bc42fd98657582b.pkl
deleted file mode 100644
index bf7b16a2..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/8fef518aaa95949f13c9afd8fe27e18f0e54aec16f3a24b37bc42fd98657582b.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/be7cd8ed66ed983640c378bcde831b94f988a46c5644abcfeadd6b54c8c5144c.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/be7cd8ed66ed983640c378bcde831b94f988a46c5644abcfeadd6b54c8c5144c.pkl
new file mode 100644
index 00000000..2f7427ec
Binary files /dev/null and b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/be7cd8ed66ed983640c378bcde831b94f988a46c5644abcfeadd6b54c8c5144c.pkl differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/e54bf53ca349b9635f635b40a0e6eb3ed5a367cb21921989221950bb5ae76cbe.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/e54bf53ca349b9635f635b40a0e6eb3ed5a367cb21921989221950bb5ae76cbe.pkl
deleted file mode 100644
index f9b314b7..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/e54bf53ca349b9635f635b40a0e6eb3ed5a367cb21921989221950bb5ae76cbe.pkl and /dev/null differ
diff --git a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/e88a4aea1a736519ab60414bc4ab5f3876f514f82ee0eadc252d0e2136d8d018.pkl b/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/e88a4aea1a736519ab60414bc4ab5f3876f514f82ee0eadc252d0e2136d8d018.pkl
deleted file mode 100644
index a0d7c8e9..00000000
Binary files a/tests/itest_cache/validating_agent_output_ipynb/links.duckduckgo.com/d.js/e88a4aea1a736519ab60414bc4ab5f3876f514f82ee0eadc252d0e2136d8d018.pkl and /dev/null differ
diff --git a/tests/itest_golden_data/advanced_output_handling_ipynb.json b/tests/itest_golden_data/advanced_output_handling_ipynb.json
index ade25a3f..95569415 100644
--- a/tests/itest_golden_data/advanced_output_handling_ipynb.json
+++ b/tests/itest_golden_data/advanced_output_handling_ipynb.json
@@ -1 +1 @@
-"def bubble_sort(arr):\n    n = len(arr)\n    for i in range(n):\n        for j in range(0, n-i-1):\n            if arr[j] > arr[j+1]:\n                arr[j], arr[j+1] = arr[j+1], arr[j]\n    return arr\n\n# Test the bubble sort implementation\nsample_list = [64, 34, 25, 12, 22, 11, 90]\nsorted_list = bubble_sort(sample_list)\nprint(sorted_list)\n\nThe bubble sort algorithm repeatedly steps through the list, compares adjacent elements, and swaps them if they are in the wrong order. The pass through the list is repeated until the list is sorted.\n\nExplanation:\n1. The `bubble_sort` function takes a list `arr` as input.\n2. The variable `n` is set to the length of the list.\n3. The outer loop runs `n` times, where `i` ranges from 0 to `n-1`.\n4. The inner loop runs from 0 to `n-i-1`, comparing each pair of adjacent elements.\n5. If the element at position `j` is greater than the element at position `j+1`, they are swapped.\n6. This process is repeated until the list is sorted.\n7. The sorted list is returned."
\ No newline at end of file
+"from typing import List\n\ndef bubble_sort(arr: List[int]) -> None:\n    \"\"\"\n    Sorts a list of integers in ascending order using the bubble sort algorithm.\n    \n    Args:\n        arr (List[int]): The list of integers to be sorted.\n    \"\"\"\n    n = len(arr)\n    while n > 1:\n        new_n = 0\n        for i in range(1, n):\n            if arr[i-1] > arr[i]:\n                arr[i-1], arr[i] = arr[i], arr[i-1]\n                new_n = i\n        if new_n == 0:\n            break\n        n = new_n\n\ndef main() -> None:\n    \"\"\"\n    Test the bubble sort implementation with sample lists.\n    \"\"\"\n    test_cases = [\n        ([64, 34, 25, 12, 22, 11, 90], \"Sorted list\"),\n        ([], \"Sorted empty list\"),\n        ([1], \"Sorted single element list\"),\n        ([1, 2, 3, 4, 5], \"Sorted already sorted list\"),\n        ([3, 1, 2, 3, 1], \"Sorted list with duplicates\"),\n        ([3, -1, 2, -3, 1], \"Sorted list with negative numbers\"),\n        ([5, 5, 5, 5], \"Sorted list with identical elements\")\n    ]\n    \n    for lst, description in test_cases:\n        bubble_sort(lst)\n        print(f\"{description}: {lst}\")\n\nif __name__ == \"__main__\":\n    main()\n\nThe bubble sort implementation sorts a list of integers in ascending order by repeatedly comparing and swapping adjacent elements if they are in the wrong order. An optimization is added to detect the last position where a swap was made, allowing the loop to break early if the list is already sorted. The `main` function tests the implementation with various sample lists, including edge cases, and prints the sorted results."
\ No newline at end of file
diff --git a/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json b/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json
index 7ba567ce..ae8ecb0a 100644
--- a/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json
+++ b/tests/itest_golden_data/math_via_python_code_with_a_single_agent_ipynb.json
@@ -1 +1 @@
-"To solve the system of linear equations:\n\n\\[ 725x + 727y = 1500 \\]\n\\[ 729x + 731y = 1508 \\]\n\nwe use the sympy library to find the values of \\( x \\) and \\( y \\).\n\nFirst, we define the equations:\n\n\\[ 725x + 727y = 1500 \\]\n\\[ 729x + 731y = 1508 \\]\n\nUsing sympy to solve these equations, we find:\n\n\\[ x = -23 \\]\n\\[ y = 25 \\]\n\nTo find \\( x - y \\):\n\n\\[ x - y = -23 - 25 = -48 \\]\n\nThus, the values are:\n\n\\[ x = -23 \\]\n\\[ y = 25 \\]\n\\[ x - y = -48 \\]"
\ No newline at end of file
+"The system of linear equations is given by:\n\n\\[ 725x + 727y = 1500 \\]\n\\[ 729x + 731y = 1508 \\]\n\nTo solve for \\( x \\) and \\( y \\), we use the method of solving simultaneous equations. Using sympy, we find:\n\n\\[ x = -23 \\]\n\\[ y = 25 \\]\n\nNext, we calculate \\( x - y \\):\n\n\\[ x - y = -23 - 25 = -48 \\]\n\nThus, the values are:\n\n\\[ x = -23 \\]\n\\[ y = 25 \\]\n\\[ x - y = -48 \\]"
\ No newline at end of file
diff --git a/tests/itest_golden_data/multi_step_research_agent_ipynb.json b/tests/itest_golden_data/multi_step_research_agent_ipynb.json
index 09f281ec..545caa4f 100644
--- a/tests/itest_golden_data/multi_step_research_agent_ipynb.json
+++ b/tests/itest_golden_data/multi_step_research_agent_ipynb.json
@@ -1 +1 @@
-"Why did Arjuna kill Karna, his half-brother?\n\nKarna's past actions, such as the humiliation of Draupadi and the killing of Abhimanyu, played a significant role in Arjuna's decision to kill him, primarily through Krishna's influence. During the battle, when Karna's chariot got stuck in the mud, he appealed to Arjuna and Krishna to honorably allow him to fix it. Krishna responded by questioning the honor in Karna's past deeds, specifically mentioning the humiliation of Draupadi and the butchering of Abhimanyu. This reminder of Karna's dishonorable actions served to justify the killing in the context of the war's moral framework. When Karna attempted to use the Brahmastra but forgot the mantra, Krishna commanded Arjuna to kill him. Despite Arjuna's initial hesitation, seeing Karna defenseless, he ultimately obeyed Krishna's command and killed Karna. Thus, Krishna's invocation of Karna's past misdeeds and his direct order were crucial in Arjuna's decision to kill Karna."
\ No newline at end of file
+"Why did Arjuna kill Karna, his half-brother?\n\nKrishna played a pivotal role in Arjuna's decision to kill Karna. When Karna's chariot got stuck in the mud, he appealed to Arjuna and Krishna for honor, asking them to let him fix his chariot. Krishna responded by questioning Karna's sense of honor, reminding him of his past actions: \"What kind of honor was it to humiliate Draupadi? What honor was there in butchering Abhimanyu?\" These rhetorical questions highlighted Karna's dishonorable deeds, thereby justifying his death. Additionally, Krishna emphasized the importance of dharma (duty) over personal feelings, urging Arjuna to act according to his warrior duty. Ultimately, Krishna's arguments about Karna's past dishonorable actions and the necessity of fulfilling one's duty convinced Arjuna to kill Karna."
\ No newline at end of file
diff --git a/tests/itest_golden_data/validating_agent_output_ipynb.json b/tests/itest_golden_data/validating_agent_output_ipynb.json
index 1ceab2e4..32aee32d 100644
--- a/tests/itest_golden_data/validating_agent_output_ipynb.json
+++ b/tests/itest_golden_data/validating_agent_output_ipynb.json
@@ -1 +1 @@
-"### Comprehensive Analysis of AI Advancements in 2024\n\n#### Key Trends in AI in 2024\n\n1. **Explosive Growth of Generative AI and Multimodal AI**:\n   - Generative AI continues to evolve, becoming more accessible and useful for non-technical users. This trend is marked by the proliferation of small, specialized AI models that individuals and businesses can tinker with.\n   - Multimodal AI, which integrates multiple types of data (e.g., text, images, audio), is gaining traction, enabling more sophisticated and versatile AI applications.\n\n2. **Quantum AI**:\n   - Quantum AI is emerging as a significant trend, promising to revolutionize computational capabilities and solve complex problems that are currently intractable with classical computing.\n\n3. **Explainable AI (XAI)**:\n   - There is a growing emphasis on making AI models more transparent and understandable. Explainable AI aims to demystify the \"black box\" nature of many AI systems, enhancing trust and accountability.\n\n4. **Edge AI**:\n   - The deployment of AI at the edge (i.e., on devices rather than centralized servers) is expanding. This trend is driven by the need for real-time processing and reduced latency in applications such as autonomous vehicles and IoT devices.\n\n5. **AI Governance**:\n   - As AI becomes more integrated into various aspects of society, the importance of AI governance is increasing. This includes developing frameworks for ethical AI use, data privacy, and regulatory compliance.\n\n6. **Intersection of AI and Sustainability**:\n   - AI is being leveraged to address environmental challenges, from optimizing energy use to monitoring climate change. This trend highlights the role of AI in promoting sustainability.\n\n#### Breakthrough Technologies in AI in 2024\n\n1. **Generative AI**:\n   - Generative AI remains a breakthrough technology, with significant investments from enterprises. It is being used to create content, design products, and even generate code, demonstrating its versatility and impact.\n\n2. **AI in Automation**:\n   - Automation technologies powered by AI are transforming industries by streamlining processes, reducing costs, and increasing efficiency. This includes robotic process automation (RPA) and AI-driven decision-making systems.\n\n3. **AI in Personalization**:\n   - AI technologies are enhancing personalization in various sectors, particularly in e-commerce and marketing. Advanced AI algorithms analyze user behavior to deliver tailored experiences and recommendations.\n\n4. **AI in Healthcare**:\n   - Breakthroughs in AI are revolutionizing healthcare, from diagnostic tools to personalized treatment plans. AI is improving patient outcomes and operational efficiency in medical facilities.\n\n5. **AI in Cybersecurity**:\n   - AI is playing a critical role in cybersecurity, helping to detect and respond to threats in real-time. Advanced AI models are being developed to identify vulnerabilities and protect against cyberattacks.\n\n6. **AI in Physics**:\n   - AI is making significant strides in the field of physics. Researchers have developed AI techniques to classify phase transitions in materials more efficiently than existing methods. AI is also being used to formulate physical theories by recognizing patterns in complex data sets, simplifying interactions in physical systems.\n\n#### Potential Industry Impacts of AI Advancements in 2024\n\n1. **Manufacturing**:\n   - The manufacturing industry is expected to see the largest financial impact from AI. AI-driven automation and predictive maintenance are enhancing productivity and reducing downtime.\n\n2. **Healthcare**:\n   - AI is transforming healthcare by enabling early diagnosis, personalized treatments, and efficient management of healthcare resources. This leads to improved patient care and reduced healthcare costs.\n\n3. **Finance**:\n   - In the finance sector, AI is being used for fraud detection, risk management, and personalized financial services. AI-driven analytics provide deeper insights and enhance decision-making.\n\n4. **Retail and E-commerce**:\n   - AI is revolutionizing retail and e-commerce by enhancing customer experiences through personalized recommendations, inventory management, and supply chain optimization.\n\n5. **Entertainment**:\n   - AI technologies, such as deepfake and generative AI, are being used in the entertainment industry to create realistic visual effects, de-age actors, and generate content, pushing the boundaries of creativity.\n\n6. **Energy**:\n   - AI is contributing to the energy sector by optimizing energy consumption, integrating renewable energy sources, and improving grid management, promoting sustainability and efficiency.\n\n---\n\nThis comprehensive analysis highlights the significant advancements in AI in 2024, the key trends shaping the industry, breakthrough technologies, and their potential impacts across various sectors, including the field of physics."
\ No newline at end of file
+"**Comprehensive Analysis of the Latest Advancements in AI in 2024**\n\n**1. Key Trends in AI:**\n\n- **Generative AI:**\n  Generative AI has seen significant advancements in 2024, becoming more integrated into enterprise applications and daily life. According to the latest McKinsey Global Survey on AI, 65% of organizations are regularly using generative AI, nearly double the percentage from the previous year. Generative AI tools like ChatGPT have reached mass adoption, resetting the course of the industry. By 2024, 40% of enterprise applications are expected to come with conversational AI as an embedded feature, enhancing user interaction and productivity.\n\n- **Multimodal AI:**\n  Multimodal AI is evolving significantly, incorporating multiple input types such as text, images, and sound. This advancement mimics the human ability to process diverse sensory information, leading to more nuanced and holistic AI models. Multimodal AI is expected to enhance generative AI capabilities, allowing AI to support humans in performing more tasks across various environments. This trend is identified in the 2024 Gartner Hype Cycle for Generative AI, highlighting its potential for competitive advantage and time-to-market benefits.\n\n**2. Breakthrough Technologies:**\n\n- **Conversational AI:**\n  Conversational AI is becoming more intuitive, dynamic, and human-like, thanks to advancements in machine learning (ML) and natural language processing (NLP). These technologies are making AI-powered chatbots more sophisticated, offering personalized customer service and support 24/7. The rise of conversational AI is transforming customer service, providing faster and more accurate responses to customer queries.\n\n- **Generative AI-Augmented Apps and Services:**\n  Generative AI is being embedded into various applications and services, enhancing their functionality and user experience. This integration is making generative AI more accessible to non-tech users, allowing them to leverage AI capabilities in their daily tasks. The proliferation of generative AI-augmented apps is expected to drive innovation and efficiency across multiple industries.\n\n**3. Potential Industry Impacts:**\n\n- **Customer Service:**\n  The integration of generative AI and conversational AI is revolutionizing customer service. AI-powered chatbots are providing faster, more accurate, and personalized responses to customer queries, improving customer satisfaction and reducing operational costs for businesses.\n\n- **Healthcare:**\n  Multimodal AI is expected to have a significant impact on healthcare, enabling more accurate diagnostics and personalized treatment plans. By processing diverse data types, AI can provide a comprehensive understanding of patient conditions, leading to better healthcare outcomes.\n\n- **Enterprise Applications:**\n  The embedding of generative AI into enterprise applications is enhancing productivity and efficiency. AI-powered tools are automating routine tasks, providing intelligent insights, and improving decision-making processes. This trend is expected to drive significant business value and competitive advantage.\n\n- **Physics:**\n  AI is making substantial contributions to the field of physics. Researchers are using generative AI to develop physics-informed techniques for classifying phase transitions in materials, which are more efficient than existing machine-learning approaches. AI is also aiding theoretical investigations in pure mathematics and theoretical physics, and assisting with complex tasks in particle physics and astrophysics. These applications are accelerating research and providing deeper insights into fundamental physical phenomena.\n\n**Conclusion:**\nThe advancements in AI in 2024 are marked by the rise of generative AI, the evolution of multimodal AI, and their integration into various applications and industries. These trends are driving innovation, improving efficiency, and transforming customer experiences. As AI continues to evolve, its impact on industries and daily life is expected to grow, offering new possibilities and opportunities for businesses and individuals alike."
\ No newline at end of file
diff --git a/tests/test_agents/test_agents.py b/tests/test_agents/test_agents.py
index 675898eb..b8bac0d7 100644
--- a/tests/test_agents/test_agents.py
+++ b/tests/test_agents/test_agents.py
@@ -8,7 +8,6 @@
 from motleycrew.agents.llama_index.llama_index_react import ReActLlamaIndexMotleyAgent
 from motleycrew.common.exceptions import AgentNotMaterialized, CannotModifyMaterializedAgent
 from motleycrew.tools.python_repl import create_repl_tool
-from motleycrew.tools.tool import MotleyTool
 from tests.test_agents import MockTool
 
 os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY"
@@ -83,11 +82,6 @@ def test_materialized(self, agent):
             tool = create_repl_tool()
             agent.add_tools([tool])
 
-    @pytest.mark.parametrize("agent", test_agents_names, indirect=True)
-    def test_as_tool(self, agent):
-        tool = agent.as_tool()
-        assert isinstance(tool, MotleyTool)
-
     @pytest.mark.parametrize("agent", test_agents_names, indirect=True)
     def test_compose_prompt(self, agent):
         task_prompt = ChatPromptTemplate.from_template("What are the latest {topic} trends?")
diff --git a/tests/test_agents/test_langchain_output_handler.py b/tests/test_agents/test_langchain_output_handler.py
index 4008bb60..36723654 100644
--- a/tests/test_agents/test_langchain_output_handler.py
+++ b/tests/test_agents/test_langchain_output_handler.py
@@ -1,17 +1,23 @@
 import pytest
 from langchain_core.agents import AgentFinish, AgentAction
 
-from motleycrew.agents import MotleyOutputHandler
 from motleycrew.agents.langchain.tool_calling_react import ReActToolCallingMotleyAgent
-from motleycrew.agents.parent import DirectOutput
-from motleycrew.common.exceptions import InvalidOutput, OutputHandlerMaxIterationsExceeded
+from motleycrew.common.exceptions import InvalidOutput
+from motleycrew.tools import MotleyTool, DirectOutput
 from tests.test_agents import MockTool
 
 invalid_output = "Add more information about AI applications in medicine."
 
 
-class ReportOutputHandler(MotleyOutputHandler):
-    def handle_output(self, output: str):
+class ReportOutputHandler(MotleyTool):
+    def __init__(self):
+        super().__init__(
+            name="output_handler",
+            description="Output handler",
+            return_direct=True,
+        )
+
+    def run(self, output: str):
         if "medical" not in output.lower():
             raise InvalidOutput(invalid_output)
 
@@ -38,10 +44,10 @@ def fake_agent_take_next_step(
 @pytest.fixture
 def agent():
     agent = ReActToolCallingMotleyAgent(
-        tools=[MockTool()],
+        tools=[MockTool(), ReportOutputHandler()],
         verbose=True,
         chat_history=True,
-        output_handler=ReportOutputHandler(max_iterations=5),
+        force_output_handler=True,
     )
     agent.materialize()
     object.__setattr__(agent._agent, "plan", fake_agent_plan)
@@ -71,17 +77,20 @@ def run_kwargs(agent):
 
 def test_agent_plan(agent):
     agent_executor = agent.agent
-    agent_action = AgentAction("tool", "tool_input", "tool_log")
-    step = agent_executor.plan([], agent_action)
-    assert agent_action == step
+    agent_actions = [AgentAction("tool", "tool_input", "tool_log")]
+    step = agent_executor.plan([], agent_actions)
+    assert agent_actions == step
 
     return_values = {"output": "test_output"}
     agent_finish = AgentFinish(return_values=return_values, log="test_output")
 
     step = agent_executor.plan([], agent_finish)
     assert isinstance(step, AgentAction)
-    assert step.tool == agent._agent_finish_blocker_tool.name
-    assert step.tool_input == "test_output"
+    assert step.tool == agent._agent_error_tool.name
+    assert step.tool_input == {
+        "error_message": "You must call the `output_handler` tool to return the final output.",
+        "message": "test_output",
+    }
 
 
 def test_agent_take_next_step(agent, run_kwargs):
@@ -100,14 +109,3 @@ def test_agent_take_next_step(agent, run_kwargs):
     assert isinstance(step_result.return_values, dict)
     output_result = step_result.return_values.get("output")
     assert output_result == {"checked_output": input_data}
-
-
-def test_output_handler_max_iteration(agent, run_kwargs):
-    input_data = "Latest advancements in AI in 2024."
-    run_kwargs["inputs"] = input_data
-
-    with pytest.raises(OutputHandlerMaxIterationsExceeded):
-        for iteration in range(agent.output_handler.max_iterations + 1):
-            agent.agent._take_next_step(**run_kwargs)
-
-    assert iteration == agent.output_handler.max_iterations
diff --git a/tests/test_agents/test_llama_index_output_handler.py b/tests/test_agents/test_llama_index_output_handler.py
index fc98a1c3..f8fa72fb 100644
--- a/tests/test_agents/test_llama_index_output_handler.py
+++ b/tests/test_agents/test_llama_index_output_handler.py
@@ -5,9 +5,9 @@
 from langchain_core.tools import StructuredTool
 
 try:
+    from llama_index.core.agent.runner.base import TaskState
     from llama_index.core.agent.types import Task, TaskStep, TaskStepOutput
     from llama_index.core.chat_engine.types import AgentChatResponse
-    from llama_index.core.agent.runner.base import TaskState
 except ImportError:
     Task = None
     TaskStep = None
@@ -16,19 +16,22 @@
     TaskState = None
 
 from motleycrew.agents.llama_index import ReActLlamaIndexMotleyAgent
-from motleycrew.agents import MotleyOutputHandler
-from motleycrew.common.exceptions import (
-    InvalidOutput,
-    OutputHandlerMaxIterationsExceeded,
-)
+from motleycrew.common.exceptions import InvalidOutput
+from motleycrew.tools import MotleyTool
 from tests.test_agents import MockTool
 
-
 invalid_output = "Add more information about AI applications in medicine."
 
 
-class ReportOutputHandler(MotleyOutputHandler):
-    def handle_output(self, output: str):
+class ReportOutputHandler(MotleyTool):
+    def __init__(self):
+        super().__init__(
+            name="output_handler",
+            description="Output handler",
+            return_direct=True,
+        )
+
+    def run(self, output: str):
         if "medical" not in output.lower():
             raise InvalidOutput(invalid_output)
 
@@ -40,7 +43,7 @@ def fake_run_step(*args, **kwargs):
     output_handler = kwargs.get("output_handler")
     output_handler_input = kwargs.get("output_handler_input")
     if output_handler:
-        output_handler_result = output_handler._run(output_handler_input, config=None)
+        output_handler_result = output_handler.invoke(output_handler_input)
         task_step_output.output = AgentChatResponse(response=output_handler_result)
 
     return task_step_output
@@ -51,8 +54,8 @@ def agent():
 
     agent = ReActLlamaIndexMotleyAgent(
         description="Your goal is to uncover cutting-edge developments in AI and data science",
-        tools=[MockTool()],
-        output_handler=ReportOutputHandler(max_iterations=5),
+        tools=[MockTool(), ReportOutputHandler()],
+        force_output_handler=True,
         verbose=True,
     )
     agent.materialize()
@@ -112,20 +115,15 @@ def test_run_step(agent, task_data):
 
     assert task_step_output == cur_step_output
     assert not cur_step_output.is_last
-    assert cur_step_output.next_steps
 
     step_queue = agent._agent.state.get_step_queue(task.task_id)
     _task_step = step_queue.pop()
 
     assert _task_step.task_id == task.task_id
-    assert _task_step.input == "You must call the `{}` tool to return the output.".format(
-        agent.output_handler.name
-    )
+    assert _task_step.input == "You must call the `output_handler` tool to return the final output."
 
     # test direct output
-    output_handler = find_output_handler(agent)
-    if output_handler is None:
-        return
+    output_handler = ReportOutputHandler()
 
     # test wrong output
     output_handler_input = "Latest advancements in AI in 2024."
@@ -151,26 +149,3 @@ def test_run_step(agent, task_data):
     )
     assert hasattr(agent, "direct_output")
     assert agent.direct_output.output == {"checked_output": output_handler_input}
-
-
-def test_output_handler_max_iteration(agent, task_data):
-    if agent is None:
-        return
-
-    task, task_step_output = task_data
-
-    output_handler = find_output_handler(agent)
-    if output_handler is None:
-        return
-
-    output_handler_input = "Latest advancements in AI in 2024."
-    with pytest.raises(OutputHandlerMaxIterationsExceeded):
-        for iteration in range(agent.output_handler.max_iterations + 1):
-
-            agent._agent._run_step(
-                "",
-                task_step_output=task_step_output,
-                output_handler=output_handler,
-                output_handler_input=output_handler_input,
-            )
-    assert iteration == agent.output_handler.max_iterations
diff --git a/tests/test_tools/test_tool.py b/tests/test_tools/test_tool.py
index 87251db6..674d9f93 100644
--- a/tests/test_tools/test_tool.py
+++ b/tests/test_tools/test_tool.py
@@ -1,14 +1,13 @@
 import pytest
-
-from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain.tools import BaseTool
+from langchain_core.pydantic_v1 import BaseModel, Field
 
 try:
     from crewai_tools import Tool as CrewAiTool
 except ImportError:
     CrewAiTool = None
 
-from motleycrew.tools import MotleyTool
+from motleycrew.tools import DirectOutput, MotleyTool
 
 
 @pytest.fixture
@@ -20,6 +19,9 @@ class MockToolInput(BaseModel):
 
 
 def mock_tool_function(mock_input: str):
+    if mock_input == "raise":
+        raise ValueError("test")
+
     return mock_input
 
 
@@ -60,11 +62,37 @@ def crewai_tool(mock_tool_args_schema):
         name="mock_tool",
         description="mock_description",
         func=mock_tool_function,
-        args_schema=mock_tool_args_schema
+        args_schema=mock_tool_args_schema,
+    )
+
+
+@pytest.fixture
+def motley_agent(langchain_tool):
+    from motleycrew.agents.langchain import ReActToolCallingMotleyAgent
+
+    return ReActToolCallingMotleyAgent(
+        name="mock_agent",
+        description="mock_description",
+        tools=[langchain_tool],
     )
 
 
 class TestMotleyTool:
+    def test_tool_return_direct(self, langchain_tool, mock_input):
+        motley_tool = MotleyTool.from_supported_tool(langchain_tool, return_direct=True)
+
+        with pytest.raises(DirectOutput) as e:
+            motley_tool.invoke(mock_input)
+
+        assert e.value.output == mock_input.get("mock_input")
+
+    def test_tool_reflect_exception(self, langchain_tool, mock_input):
+        motley_tool = MotleyTool.from_supported_tool(
+            langchain_tool, exceptions_to_reflect=[ValueError]
+        )
+        output = motley_tool.invoke({"mock_input": "raise"})
+        assert output == "ValueError: test"
+
     def test_langchain_tool_conversion(self, langchain_tool, mock_input):
         motley_tool = MotleyTool.from_supported_tool(langchain_tool)
         assert isinstance(motley_tool.tool, BaseTool)
@@ -100,13 +128,21 @@ def test_motley_tool_self_conversion(self, langchain_tool):
 
         assert motley_tool.name == motley_tool_2.name
 
+    def test_motley_agent_conversion(self, motley_agent, mock_input):
+        motley_tool = MotleyTool.from_supported_tool(motley_agent)
+
+        assert isinstance(motley_tool.tool, BaseTool)
+        assert motley_tool.name == motley_agent.name
+        assert motley_tool.description == motley_agent.description
+
     def test_autogen_tool_conversion(self, langchain_tool, mock_input):
         motley_tool = MotleyTool.from_supported_tool(langchain_tool)
         assert isinstance(motley_tool.tool, BaseTool)
 
         converted_autogen_tool = motley_tool.to_autogen_tool()
-        assert converted_autogen_tool(mock_input.get("mock_input")) == motley_tool.invoke(mock_input)
-
+        assert converted_autogen_tool(mock_input.get("mock_input")) == motley_tool.invoke(
+            mock_input
+        )
 
     def test_crewai_tool_conversion(self, crewai_tool, mock_input):
         if crewai_tool is None:
@@ -119,8 +155,6 @@ def test_crewai_tool_conversion(self, crewai_tool, mock_input):
         assert isinstance(converted_crewai_tool, CrewAiTool)
         assert motley_tool.name == converted_crewai_tool.name
         assert crewai_tool.name == converted_crewai_tool.name
-        assert (
-                crewai_tool.description == converted_crewai_tool.description
-        )
+        assert crewai_tool.description == converted_crewai_tool.description
         assert crewai_tool.args_schema == converted_crewai_tool.args_schema
         assert crewai_tool.run(**mock_input) == converted_crewai_tool.run(**mock_input)
diff --git a/tests/test_tools/test_tool_chain.py b/tests/test_tools/test_tool_chain.py
index 80c2efeb..3d6a8002 100644
--- a/tests/test_tools/test_tool_chain.py
+++ b/tests/test_tools/test_tool_chain.py
@@ -7,6 +7,9 @@ class ToolMock:
     def invoke(self, input: dict, *args, **kwargs):
         return input
 
+    def _run(self, input: dict, *args, **kwargs):
+        return input
+
 
 @pytest.fixture
 def tools():