Skip to content

Commit

Permalink
use copy to split testing and training on crews (#1491)
Browse files Browse the repository at this point in the history
* use copy to split testing and training on crews

* make tests handle new copy functionality on train and test

* fix last test

* fix test
  • Loading branch information
bhancockio authored Oct 23, 2024
1 parent 4687779 commit 9cd4ff0
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 68 deletions.
19 changes: 11 additions & 8 deletions src/crewai/crew.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,15 +435,16 @@ def train(
self, n_iterations: int, filename: str, inputs: Optional[Dict[str, Any]] = {}
) -> None:
"""Trains the crew for a given number of iterations."""
self._setup_for_training(filename)
train_crew = self.copy()
train_crew._setup_for_training(filename)

for n_iteration in range(n_iterations):
self._train_iteration = n_iteration
self.kickoff(inputs=inputs)
train_crew._train_iteration = n_iteration
train_crew.kickoff(inputs=inputs)

training_data = CrewTrainingHandler(TRAINING_DATA_FILE).load()

for agent in self.agents:
for agent in train_crew.agents:
result = TaskEvaluator(agent).evaluate_training_data(
training_data=training_data, agent_id=str(agent.id)
)
Expand Down Expand Up @@ -987,17 +988,19 @@ def test(
inputs: Optional[Dict[str, Any]] = None,
) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
self._test_execution_span = self._telemetry.test_execution_span(
self,
test_crew = self.copy()

self._test_execution_span = test_crew._telemetry.test_execution_span(
test_crew,
n_iterations,
inputs,
openai_model_name, # type: ignore[arg-type]
) # type: ignore[arg-type]
evaluator = CrewEvaluator(self, openai_model_name) # type: ignore[arg-type]
evaluator = CrewEvaluator(test_crew, openai_model_name) # type: ignore[arg-type]

for i in range(1, n_iterations + 1):
evaluator.set_iteration(i)
self.kickoff(inputs=inputs)
test_crew.kickoff(inputs=inputs)

evaluator.print_crew_evaluation_result()

Expand Down
94 changes: 34 additions & 60 deletions tests/crew_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import instructor
import pydantic_core
import pytest

from crewai.agent import Agent
from crewai.agents.cache import CacheHandler
from crewai.crew import Crew
Expand Down Expand Up @@ -497,6 +498,7 @@ def multiplier(first_number: int, second_number: int) -> float:
@pytest.mark.vcr(filter_headers=["authorization"])
def test_api_calls_throttling(capsys):
from unittest.mock import patch

from crewai_tools import tool

@tool
Expand Down Expand Up @@ -779,11 +781,14 @@ def test_async_task_execution_call_count():
list_important_history.output = mock_task_output
write_article.output = mock_task_output

with patch.object(
Task, "execute_sync", return_value=mock_task_output
) as mock_execute_sync, patch.object(
Task, "execute_async", return_value=mock_future
) as mock_execute_async:
with (
patch.object(
Task, "execute_sync", return_value=mock_task_output
) as mock_execute_sync,
patch.object(
Task, "execute_async", return_value=mock_future
) as mock_execute_async,
):
crew.kickoff()

assert mock_execute_async.call_count == 2
Expand Down Expand Up @@ -1105,6 +1110,7 @@ def crew_callback(_):
@pytest.mark.vcr(filter_headers=["authorization"])
def test_crew_function_calling_llm():
from unittest.mock import patch

from crewai_tools import tool

llm = "gpt-4o"
Expand Down Expand Up @@ -1448,52 +1454,6 @@ def test_crew_does_not_interpolate_without_inputs():
interpolate_task_inputs.assert_not_called()


# def test_crew_partial_inputs():
# agent = Agent(
# role="{topic} Researcher",
# goal="Express hot takes on {topic}.",
# backstory="You have a lot of experience with {topic}.",
# )

# task = Task(
# description="Give me an analysis around {topic}.",
# expected_output="{points} bullet points about {topic}.",
# )

# crew = Crew(agents=[agent], tasks=[task], inputs={"topic": "AI"})
# inputs = {"topic": "AI"}
# crew._interpolate_inputs(inputs=inputs) # Manual call for now

# assert crew.tasks[0].description == "Give me an analysis around AI."
# assert crew.tasks[0].expected_output == "{points} bullet points about AI."
# assert crew.agents[0].role == "AI Researcher"
# assert crew.agents[0].goal == "Express hot takes on AI."
# assert crew.agents[0].backstory == "You have a lot of experience with AI."


# def test_crew_invalid_inputs():
# agent = Agent(
# role="{topic} Researcher",
# goal="Express hot takes on {topic}.",
# backstory="You have a lot of experience with {topic}.",
# )

# task = Task(
# description="Give me an analysis around {topic}.",
# expected_output="{points} bullet points about {topic}.",
# )

# crew = Crew(agents=[agent], tasks=[task], inputs={"subject": "AI"})
# inputs = {"subject": "AI"}
# crew._interpolate_inputs(inputs=inputs) # Manual call for now

# assert crew.tasks[0].description == "Give me an analysis around {topic}."
# assert crew.tasks[0].expected_output == "{points} bullet points about {topic}."
# assert crew.agents[0].role == "{topic} Researcher"
# assert crew.agents[0].goal == "Express hot takes on {topic}."
# assert crew.agents[0].backstory == "You have a lot of experience with {topic}."


def test_task_callback_on_crew():
from unittest.mock import MagicMock, patch

Expand Down Expand Up @@ -1770,7 +1730,10 @@ def testing_tool(first_number: int, second_number: int) -> int:
@patch("crewai.crew.Crew.kickoff")
@patch("crewai.crew.CrewTrainingHandler")
@patch("crewai.crew.TaskEvaluator")
def test_crew_train_success(task_evaluator, crew_training_handler, kickoff):
@patch("crewai.crew.Crew.copy")
def test_crew_train_success(
copy_mock, task_evaluator, crew_training_handler, kickoff_mock
):
task = Task(
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
expected_output="5 bullet points with a paragraph for each idea.",
Expand All @@ -1781,9 +1744,19 @@ def test_crew_train_success(task_evaluator, crew_training_handler, kickoff):
agents=[researcher, writer],
tasks=[task],
)

# Create a mock for the copied crew
copy_mock.return_value = crew

crew.train(
n_iterations=2, inputs={"topic": "AI"}, filename="trained_agents_data.pkl"
)

# Ensure kickoff is called on the copied crew
kickoff_mock.assert_has_calls(
[mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
)

task_evaluator.assert_has_calls(
[
mock.call(researcher),
Expand Down Expand Up @@ -1822,10 +1795,6 @@ def test_crew_train_success(task_evaluator, crew_training_handler, kickoff):
]
)

kickoff.assert_has_calls(
[mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
)


def test_crew_train_error():
task = Task(
Expand All @@ -1840,7 +1809,7 @@ def test_crew_train_error():
)

with pytest.raises(TypeError) as e:
crew.train()
crew.train() # type: ignore purposefully throwing err
assert "train() missing 1 required positional argument: 'n_iterations'" in str(
e
)
Expand Down Expand Up @@ -2536,8 +2505,9 @@ def test_conditional_should_execute():


@mock.patch("crewai.crew.CrewEvaluator")
@mock.patch("crewai.crew.Crew.copy")
@mock.patch("crewai.crew.Crew.kickoff")
def test_crew_testing_function(mock_kickoff, crew_evaluator):
def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
task = Task(
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
expected_output="5 bullet points with a paragraph for each idea.",
Expand All @@ -2548,11 +2518,15 @@ def test_crew_testing_function(mock_kickoff, crew_evaluator):
agents=[researcher],
tasks=[task],
)

# Create a mock for the copied crew
copy_mock.return_value = crew

n_iterations = 2
crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})

assert len(mock_kickoff.mock_calls) == n_iterations
mock_kickoff.assert_has_calls(
# Ensure kickoff is called on the copied crew
kickoff_mock.assert_has_calls(
[mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
)

Expand Down

0 comments on commit 9cd4ff0

Please sign in to comment.