Merge branch 'main' into o1

letta-ai · Oct 15, 2024 · f76c86e · f76c86e
2 parents 4375065 + cca1cb8
commit f76c86e
Show file tree

Hide file tree

Showing 12 changed files with 184 additions and 35 deletions.
diff --git a/compose.yaml b/compose.yaml
@@ -31,16 +31,18 @@ services:
       - LETTA_PG_PASSWORD=${LETTA_PG_PASSWORD:-letta}
       - LETTA_PG_HOST=pgvector_db
       - LETTA_PG_PORT=5432
-      - LETTA_LLM_ENDPOINT=${LETTA_LLM_ENDPOINT}
-      - LETTA_LLM_ENDPOINT_TYPE=${LETTA_LLM_ENDPOINT_TYPE}
-      - LETTA_LLM_MODEL=${LETTA_LLM_MODEL:-gpt-4}
-      - LETTA_LLM_CONTEXT_WINDOW=${LETTA_LLM_CONTEXT_WINDOW:-8192}
-      - LETTA_EMBEDDING_ENDPOINT=${LETTA_EMBEDDING_ENDPOINT}
-      - LETTA_EMBEDDING_ENDPOINT_TYPE=${LETTA_EMBEDDING_ENDPOINT_TYPE}
-      - LETTA_EMBEDDING_DIM=${LETTA_EMBEDDING_DIM:-1536}
-      - LETTA_EMBEDDING_MODEL=${LETTA_EMBEDDING_MODEL:-text-embedding-ada-002}
       - LETTA_DEBUG=True
       - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - GROQ_API_KEY=${GROQ_API_KEY}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL}
+      - AZURE_API_KEY=${AZURE_API_KEY}
+      - AZURE_BASE_URL=${AZURE_BASE_URL}
+      - AZURE_API_VERSION=${AZURE_API_VERSION}
+      - GEMINI_API_KEY=${GEMINI_API_KEY}
+      - VLLM_API_BASE=${VLLM_API_BASE}
+      - OPENLLM_AUTH_TYPE=${OPENLLM_AUTH_TYPE}
+      - OPENLLM_API_KEY=${OPENLLM_API_KEY}
    #volumes:
       #- ./configs/server_config.yaml:/root/.letta/config # config file
       #- ~/.letta/credentials:/root/.letta/credentials # credentials file

diff --git a/dev-compose.yaml b/dev-compose.yaml
@@ -28,18 +28,21 @@ services:
       - "8083:8083"
       - "8283:8283"
     environment:
+      - SERPAPI_API_KEY=${SERPAPI_API_KEY}
       - LETTA_PG_DB=${LETTA_PG_DB:-letta}
       - LETTA_PG_USER=${LETTA_PG_USER:-letta}
       - LETTA_PG_PASSWORD=${LETTA_PG_PASSWORD:-letta}
       - LETTA_PG_HOST=pgvector_db
       - LETTA_PG_PORT=5432
-      - LETTA_LLM_ENDPOINT=${LETTA_LLM_ENDPOINT}
-      - LETTA_LLM_ENDPOINT_TYPE=${LETTA_LLM_ENDPOINT_TYPE}
-      - LETTA_LLM_MODEL=${LETTA_LLM_MODEL:-gpt-4}
-      - LETTA_LLM_CONTEXT_WINDOW=${LETTA_LLM_CONTEXT_WINDOW:-None}
-      - LETTA_EMBEDDING_ENDPOINT=${LETTA_EMBEDDING_ENDPOINT}
-      - LETTA_EMBEDDING_ENDPOINT_TYPE=${LETTA_EMBEDDING_ENDPOINT_TYPE}
-      - LETTA_EMBEDDING_DIM=${LETTA_EMBEDDING_DIM:-None}
-      - LETTA_EMBEDDING_MODEL=${LETTA_EMBEDDING_MODEL}
+      - LETTA_DEBUG=True
       - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - SERPAPI_API_KEY=${SERPAPI_API_KEY}
+      - GROQ_API_KEY=${GROQ_API_KEY}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL}
+      - AZURE_API_KEY=${AZURE_API_KEY}
+      - AZURE_BASE_URL=${AZURE_BASE_URL}
+      - AZURE_API_VERSION=${AZURE_API_VERSION}
+      - GEMINI_API_KEY=${GEMINI_API_KEY}
+      - VLLM_API_BASE=${VLLM_API_BASE}
+      - OPENLLM_AUTH_TYPE=${OPENLLM_AUTH_TYPE}
+      - OPENLLM_API_KEY=${OPENLLM_API_KEY}
diff --git a/examples/composio_tool_usage.py b/examples/composio_tool_usage.py
@@ -2,6 +2,8 @@
 import uuid
 
 from letta import create_client
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import ChatMemory
 from letta.schemas.tool import Tool
 
@@ -10,6 +12,8 @@
 """
 # Create a `LocalClient` (you can also use a `RESTClient`, see the letta_rest_client.py example)
 client = create_client()
+client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini"))
+client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
 
 # Generate uuid for agent name for this example
 namespace = uuid.NAMESPACE_DNS
@@ -59,18 +63,18 @@ def main():
     """
 
     # Create an agent
-    agent_state = client.create_agent(name=agent_uuid, memory=ChatMemory(human="My name is Matt.", persona=persona), tools=[tool.name])
-    print(f"Created agent: {agent_state.name} with ID {str(agent_state.id)}")
+    agent = client.create_agent(name=agent_uuid, memory=ChatMemory(human="My name is Matt.", persona=persona), tools=[tool.name])
+    print(f"Created agent: {agent.name} with ID {str(agent.id)}")
 
     # Send a message to the agent
-    send_message_response = client.user_message(agent_id=agent_state.id, message="Star a repo composio with owner composiohq on GitHub")
+    send_message_response = client.user_message(agent_id=agent.id, message="Star a repo composio with owner composiohq on GitHub")
     for message in send_message_response.messages:
         response_json = json.dumps(message.model_dump(), indent=4)
         print(f"{response_json}\n")
 
     # Delete agent
-    client.delete_agent(agent_id=agent_state.id)
-    print(f"Deleted agent: {agent_state.name} with ID {str(agent_state.id)}")
+    client.delete_agent(agent_id=agent.id)
+    print(f"Deleted agent: {agent.name} with ID {str(agent.id)}")
 
 
 if __name__ == "__main__":

diff --git a/letta/__init__.py b/letta/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.4.1"
+__version__ = "0.5.0"
 
 # import clients
 from letta.client.admin import Admin

diff --git a/letta/agent.py b/letta/agent.py
@@ -23,14 +23,15 @@
 from letta.interface import AgentInterface
 from letta.llm_api.helpers import is_context_overflow_error
 from letta.llm_api.llm_api_tools import create
+from letta.local_llm.utils import num_tokens_from_messages
 from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
 from letta.metadata import MetadataStore
 from letta.persistence_manager import LocalStateManager
 from letta.schemas.agent import AgentState, AgentStepResponse
 from letta.schemas.block import Block
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import MessageRole, OptionState
-from letta.schemas.memory import Memory
+from letta.schemas.memory import ContextWindowOverview, Memory
 from letta.schemas.message import Message, UpdateMessage
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.schemas.openai.chat_completion_response import (
@@ -951,10 +952,9 @@ def inner_step(
                 self.summarize_messages_inplace()
 
                 # Try step again
-                print("Retrying step after summarizing")
-                assert isinstance(self.agent_state.memory, Memory), f"Memory object is not of type Memory: {type(self.agent_state.memory)}"
-                return self.step(
-                    user_message,
+
+                return self.inner_step(
+                    messages=messages,
                     first_message=first_message,
                     first_message_retry_limit=first_message_retry_limit,
                     skip_verify=skip_verify,
@@ -1424,6 +1424,71 @@ def retry_message(self) -> List[Message]:
         assert all(isinstance(msg, Message) for msg in messages), "step() returned non-Message objects"
         return messages
 
+    def get_context_window(self) -> ContextWindowOverview:
+        """Get the context window of the agent"""
+
+        system_prompt = self.agent_state.system  # TODO is this the current system or the initial system?
+        num_tokens_system = count_tokens(system_prompt)
+        core_memory = self.memory.compile()
+        num_tokens_core_memory = count_tokens(core_memory)
+
+        # conversion of messages to OpenAI dict format, which is passed to the token counter
+        messages_openai_format = self.messages
+
+        # Check if there's a summary message in the message queue
+        if (
+            len(self._messages) > 1
+            and self._messages[1].role == MessageRole.user
+            and isinstance(self._messages[1].text, str)
+            # TODO remove hardcoding
+            and "The following is a summary of the previous " in self._messages[1].text
+        ):
+            # Summary message exists
+            assert self._messages[1].text is not None
+            summary_memory = self._messages[1].text
+            num_tokens_summary_memory = count_tokens(self._messages[1].text)
+            # with a summary message, the real messages start at index 2
+            num_tokens_messages = (
+                num_tokens_from_messages(messages=messages_openai_format[2:], model=self.model) if len(messages_openai_format) > 2 else 0
+            )
+
+        else:
+            summary_memory = None
+            num_tokens_summary_memory = 0
+            # with no summary message, the real messages start at index 1
+            num_tokens_messages = (
+                num_tokens_from_messages(messages=messages_openai_format[1:], model=self.model) if len(messages_openai_format) > 1 else 0
+            )
+
+        num_archival_memory = self.persistence_manager.archival_memory.storage.size()
+        num_recall_memory = self.persistence_manager.recall_memory.storage.size()
+        external_memory_summary = compile_memory_metadata_block(
+            memory_edit_timestamp=get_utc_time(),  # dummy timestamp
+            archival_memory=self.persistence_manager.archival_memory,
+            recall_memory=self.persistence_manager.recall_memory,
+        )
+        num_tokens_external_memory_summary = count_tokens(external_memory_summary)
+
+        return ContextWindowOverview(
+            # context window breakdown (in messages)
+            num_messages=len(self._messages),
+            num_archival_memory=num_archival_memory,
+            num_recall_memory=num_recall_memory,
+            num_tokens_external_memory_summary=num_tokens_external_memory_summary,
+            # top-level information
+            context_window_size_max=self.agent_state.llm_config.context_window,
+            context_window_size_current=num_tokens_system + num_tokens_core_memory + num_tokens_summary_memory + num_tokens_messages,
+            # context window breakdown (in tokens)
+            num_tokens_system=num_tokens_system,
+            system_prompt=system_prompt,
+            num_tokens_core_memory=num_tokens_core_memory,
+            core_memory=core_memory,
+            num_tokens_summary_memory=num_tokens_summary_memory,
+            summary_memory=summary_memory,
+            num_tokens_messages=num_tokens_messages,
+            messages=self._messages,
+        )
+
 
 def save_agent(agent: Agent, ms: MetadataStore):
     """Save agent to metadata store"""

diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
@@ -33,6 +33,7 @@ class LLMConfig(BaseModel):
         "koboldcpp",
         "vllm",
         "hugging-face",
+        "mistral",
     ] = Field(..., description="The endpoint type for the model.")
     model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
     model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")

diff --git a/letta/schemas/memory.py b/letta/schemas/memory.py
@@ -8,6 +8,43 @@
     from letta.agent import Agent
 
 from letta.schemas.block import Block
+from letta.schemas.message import Message
+
+
+class ContextWindowOverview(BaseModel):
+    """
+    Overview of the context window, including the number of messages and tokens.
+    """
+
+    # top-level information
+    context_window_size_max: int = Field(..., description="The maximum amount of tokens the context window can hold.")
+    context_window_size_current: int = Field(..., description="The current number of tokens in the context window.")
+
+    # context window breakdown (in messages)
+    # (technically not in the context window, but useful to know)
+    num_messages: int = Field(..., description="The number of messages in the context window.")
+    num_archival_memory: int = Field(..., description="The number of messages in the archival memory.")
+    num_recall_memory: int = Field(..., description="The number of messages in the recall memory.")
+    num_tokens_external_memory_summary: int = Field(
+        ..., description="The number of tokens in the external memory summary (archival + recall metadata)."
+    )
+
+    # context window breakdown (in tokens)
+    # this should all add up to context_window_size_current
+
+    num_tokens_system: int = Field(..., description="The number of tokens in the system prompt.")
+    system_prompt: str = Field(..., description="The content of the system prompt.")
+
+    num_tokens_core_memory: int = Field(..., description="The number of tokens in the core memory.")
+    core_memory: str = Field(..., description="The content of the core memory.")
+
+    num_tokens_summary_memory: int = Field(..., description="The number of tokens in the summary memory.")
+    summary_memory: Optional[str] = Field(None, description="The content of the summary memory.")
+
+    num_tokens_messages: int = Field(..., description="The number of tokens in the messages list.")
+    # TODO make list of messages?
+    # messages: List[dict] = Field(..., description="The messages in the context window.")
+    messages: List[Message] = Field(..., description="The messages in the context window.")
 
 
 class Memory(BaseModel, validate_assignment=True):

diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py
@@ -19,6 +19,7 @@
 from letta.schemas.memory import (
     ArchivalMemorySummary,
     BasicBlockMemory,
+    ContextWindowOverview,
     CreateArchivalMemory,
     Memory,
     RecallMemorySummary,
@@ -51,6 +52,20 @@ def list_agents(
     return server.list_agents(user_id=actor.id)
 
 
+@router.get("/{agent_id}/context", response_model=ContextWindowOverview, operation_id="get_agent_context_window")
+def get_agent_context_window(
+    agent_id: str,
+    server: "SyncServer" = Depends(get_letta_server),
+    user_id: Optional[str] = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
+):
+    """
+    Retrieve the context window of a specific agent.
+    """
+    actor = server.get_user_or_default(user_id=user_id)
+
+    return server.get_agent_context_window(user_id=actor.id, agent_id=agent_id)
+
+
 @router.post("/", response_model=AgentState, operation_id="create_agent")
 def create_agent(
     agent: CreateAgent = Body(...),

diff --git a/letta/server/server.py b/letta/server/server.py
@@ -48,6 +48,7 @@
     AnthropicProvider,
     AzureProvider,
     GoogleAIProvider,
+    GroqProvider,
     LettaProvider,
     OllamaProvider,
     OpenAIProvider,
@@ -72,7 +73,12 @@
 from letta.schemas.job import Job
 from letta.schemas.letta_message import LettaMessage
 from letta.schemas.llm_config import LLMConfig
-from letta.schemas.memory import ArchivalMemorySummary, Memory, RecallMemorySummary
+from letta.schemas.memory import (
+    ArchivalMemorySummary,
+    ContextWindowOverview,
+    Memory,
+    RecallMemorySummary,
+)
 from letta.schemas.message import Message, MessageCreate, MessageRole, UpdateMessage
 from letta.schemas.organization import Organization, OrganizationCreate
 from letta.schemas.passage import Passage
@@ -297,6 +303,12 @@ def __init__(
                     base_url=model_settings.vllm_api_base,
                 )
             )
+        if model_settings.groq_api_key:
+            self._enabled_providers.append(
+                GroqProvider(
+                    api_key=model_settings.groq_api_key,
+                )
+            )
 
     def save_agents(self):
         """Saves all the agents that are in the in-memory object store"""
@@ -2031,3 +2043,13 @@ def add_llm_model(self, request: LLMConfig) -> LLMConfig:
 
     def add_embedding_model(self, request: EmbeddingConfig) -> EmbeddingConfig:
         """Add a new embedding model"""
+
+    def get_agent_context_window(
+        self,
+        user_id: str,
+        agent_id: str,
+    ) -> ContextWindowOverview:
+
+        # Get the current message
+        letta_agent = self._get_or_load_agent(agent_id=agent_id)
+        return letta_agent.get_context_window()