Add integration test

letta-ai · Nov 16, 2024 · 2f8d349 · 2f8d349
1 parent bf13629
commit 2f8d349
Show file tree

Hide file tree

Showing 10 changed files with 114 additions and 44 deletions.
diff --git a/.github/workflows/test_anthropic.yml b/.github/workflows/test_anthropic.yml
@@ -29,7 +29,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_valid_first_message
         echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -38,7 +38,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_keyword
         echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -47,7 +47,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_uses_external_tool
         echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -56,7 +56,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_recall_chat_memory
         echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -65,7 +65,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_archival_memory_retrieval
         echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -74,7 +74,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_edit_core_memory
         echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 

diff --git a/.github/workflows/test_azure.yml b/.github/workflows/test_azure.yml
@@ -31,7 +31,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_valid_first_message
         echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -41,7 +41,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_keyword
         echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -51,7 +51,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_uses_external_tool
         echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -61,7 +61,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_recall_chat_memory
         echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -71,7 +71,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_archival_memory_retrieval
         echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -81,7 +81,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_edit_core_memory
         echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 

diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml
@@ -29,7 +29,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_valid_first_message
         echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -38,7 +38,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_keyword
         echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -47,7 +47,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_uses_external_tool
         echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -56,7 +56,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_recall_chat_memory
         echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -65,7 +65,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_archival_memory_retrieval
         echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -74,7 +74,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_edit_core_memory
         echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 

diff --git a/.github/workflows/test_memgpt_hosted.yml b/.github/workflows/test_memgpt_hosted.yml
@@ -23,9 +23,9 @@ jobs:
 
     - name: Test LLM endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_letta_hosted
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_letta_hosted
       continue-on-error: true
 
     - name: Test embedding endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_letta_hosted
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_letta_hosted
diff --git a/.github/workflows/test_ollama.yml b/.github/workflows/test_ollama.yml
@@ -34,11 +34,11 @@ jobs:
 
     - name: Test LLM endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_ollama
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_ollama
 
     - name: Test embedding endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_ollama
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_ollama
 
     - name: Test provider
       run: |

diff --git a/.github/workflows/test_openai.yml b/.github/workflows/test_openai.yml
@@ -29,53 +29,53 @@ jobs:
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_valid_first_message
 
     - name: Test model sends message with keyword
       id: test_keyword_message
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_keyword
 
     - name: Test model uses external tool correctly
       id: test_external_tool
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_uses_external_tool
 
     - name: Test model recalls chat memory
       id: test_chat_memory
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_recall_chat_memory
 
     - name: Test model uses 'archival_memory_search' to find secret
       id: test_archival_memory_search
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_retrieval
 
     - name: Test model uses 'archival_memory_insert' to insert archival memories
       id: test_archival_memory_insert
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_insert
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_insert
 
     - name: Test model can edit core memories
       id: test_core_memory
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_edit_core_memory
 
     - name: Test embedding endpoint
       id: test_embedding_endpoint
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_openai
diff --git a/letta/agent.py b/letta/agent.py
@@ -48,6 +48,7 @@
 from letta.schemas.usage import LettaUsageStatistics
 from letta.services.source_manager import SourceManager
 from letta.services.user_manager import UserManager
+from letta.streaming_interface import StreamingRefreshCLIInterface
 from letta.system import (
     get_heartbeat,
     get_initial_boot_messages,
@@ -229,7 +230,7 @@ def update_state(self) -> AgentState:
 class Agent(BaseAgent):
     def __init__(
         self,
-        interface: Optional[AgentInterface],
+        interface: Optional[Union[AgentInterface, StreamingRefreshCLIInterface]],
         # agents can be created from providing agent_state
         agent_state: AgentState,
         tools: List[Tool],

diff --git a/...figs/llm_model_configs/claude-3-opus.json → ...s/llm_model_configs/claude-3-5-haiku.json b/...figs/llm_model_configs/claude-3-opus.json → ...s/llm_model_configs/claude-3-5-haiku.json
@@ -1,6 +1,6 @@
 {
     "context_window": 200000,
-    "model": "claude-3-opus-20240229",
+    "model": "claude-3-5-haiku-20241022",
     "model_endpoint_type": "anthropic",
     "model_endpoint": "https://api.anthropic.com/v1",
     "model_wrapper": null,

diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py
@@ -0,0 +1,68 @@
+import json
+import os
+import uuid
+
+import pytest
+
+from letta import create_client
+from letta.agent import Agent
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.llm_config import LLMConfig
+from letta.streaming_interface import StreamingRefreshCLIInterface
+from tests.helpers.endpoints_helper import EMBEDDING_CONFIG_PATH
+from tests.helpers.utils import cleanup
+
+# constants
+LLM_CONFIG_DIR = "tests/configs/llm_model_configs"
+SUMMARY_KEY_PHRASE = "The following is a summary"
+
+
+@pytest.mark.parametrize(
+    "config_filename",
+    [
+        "openai-gpt-4o.json",
+        "azure-gpt-4o-mini.json",
+        "claude-3-5-haiku.json",
+        # "groq.json", TODO: Support groq, rate limiting currently makes it impossible to test
+        # "gemini-pro.json", TODO: Gemini is broken
+    ],
+)
+def test_summarizer(config_filename):
+    namespace = uuid.NAMESPACE_DNS
+    agent_name = str(uuid.uuid5(namespace, f"integration-test-summarizer-{config_filename}"))
+
+    # Get the LLM config
+    filename = os.path.join(LLM_CONFIG_DIR, config_filename)
+    config_data = json.load(open(filename, "r"))
+
+    # Create client and clean up agents
+    llm_config = LLMConfig(**config_data)
+    embedding_config = EmbeddingConfig(**json.load(open(EMBEDDING_CONFIG_PATH)))
+    client = create_client()
+    client.set_default_llm_config(llm_config)
+    client.set_default_embedding_config(embedding_config)
+    cleanup(client=client, agent_uuid=agent_name)
+
+    # Create agent
+    agent_state = client.create_agent(name=agent_name, llm_config=llm_config, embedding_config=embedding_config)
+    tools = [client.get_tool(client.get_tool_id(name=tool_name)) for tool_name in agent_state.tools]
+    letta_agent = Agent(interface=StreamingRefreshCLIInterface(), agent_state=agent_state, tools=tools, first_message_verify_mono=False)
+
+    # Make conversation
+    messages = [
+        "Did you know that honey never spoils? Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still perfectly edible.",
+        "Octopuses have three hearts, and two of them stop beating when they swim.",
+    ]
+
+    for m in messages:
+        letta_agent.step_user_message(
+            user_message_str=m,
+            first_message=False,
+            skip_verify=False,
+            stream=False,
+            ms=client.server.ms,
+        )
+
+    # Invoke a summarize
+    letta_agent.summarize_messages_inplace(preserve_last_N_messages=False)
+    assert SUMMARY_KEY_PHRASE in letta_agent.messages[1]["content"], f"Test failed for config: {config_filename}"