chain-ml · Winston-503 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/council/llm/llm_base.py b/council/llm/llm_base.py
@@ -23,6 +23,10 @@ def default_timeout(self) -> int:
 
 
 class LLMResult:
+    """
+    Represents a response from the LLM
+    """
+
     def __init__(
         self,
         choices: Sequence[str],

diff --git a/council/llm/llm_message.py b/council/llm/llm_message.py
@@ -91,6 +91,7 @@ def __init__(self, content: str) -> None:
 
     @staticmethod
     def ephemeral() -> LLMCacheControlData:
+        """Represents ephemeral cache type"""
         return LLMCacheControlData(content="ephemeral")
 
 

diff --git a/council/llm/llm_middleware.py b/council/llm/llm_middleware.py
@@ -29,6 +29,7 @@ def kwargs(self) -> Any:
 
     @staticmethod
     def default(messages: Sequence[LLMMessage], **kwargs: Any) -> LLMRequest:
+        """Creates a default LLMRequest with an empty context."""
         return LLMRequest(LLMContext.empty(), messages, **kwargs)
 
 
@@ -52,25 +53,37 @@ def duration(self) -> float:
 
     @staticmethod
     def empty(request: LLMRequest) -> LLMResponse:
+        """Creates an empty LLMResponse for a given request."""
         return LLMResponse(request, None, -1.0)
 
 
 ExecuteLLMRequest = Callable[[LLMRequest], LLMResponse]
 
 
 class LLMMiddleware(Protocol):
+    """
+    Protocol for defining LLM middleware.
+
+    Middleware can intercept and modify requests and responses between the client and the LLM, introducing custom logic.
+    """
+
     def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest) -> LLMResponse: ...
 
 
 class LLMMiddlewareChain:
+    """Manages a chain of LLM middlewares and executes requests through them."""
+
     def __init__(self, llm: LLMBase, middlewares: Optional[Sequence[LLMMiddleware]] = None) -> None:
         self._llm = llm
         self._middlewares: list[LLMMiddleware] = list(middlewares) if middlewares else []
 
     def add_middleware(self, middleware: LLMMiddleware) -> None:
+        """Add middleware to a chain."""
         self._middlewares.append(middleware)
 
     def execute(self, request: LLMRequest) -> LLMResponse:
+        """Execute middleware chain."""
+
         def execute_request(r: LLMRequest) -> LLMResponse:
             start = time.time()
             result = self._llm.post_chat_request(r.context, request.messages, **r.kwargs)
@@ -93,6 +106,8 @@ def wrapped(request: LLMRequest) -> LLMResponse:
 
 
 class LLMLoggingMiddleware:
+    """Middleware for logging LLM requests and responses."""
+
     def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest) -> LLMResponse:
         request.context.logger.info(
             f"Sending request with {len(request.messages)} message(s) to {llm.configuration.model_name()}"
@@ -106,6 +121,12 @@ def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest
 
 
 class LLMRetryMiddleware:
+    """
+    Middleware for implementing retry logic for LLM requests.
+
+    Attempts to retry failed requests a specified number of times with a delay between attempts.
+    """
+
     def __init__(self, retries: int, delay: float, exception_to_check: Optional[type[Exception]] = None) -> None:
         self._retries = retries
         self._delay = delay

diff --git a/council/llm/llm_response_parser.py b/council/llm/llm_response_parser.py
@@ -57,10 +57,10 @@ def _try_create(cls: Type[T], **kwargs) -> T:
 
 
 class CodeBlocksResponseParser(BaseModelResponseParser):
-    """Parser for responses containing multiple named code blocks"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing multiple named code blocks"""
         llm_response = response.value
         parsed_blocks: Dict[str, Any] = {}
 
@@ -74,10 +74,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
 
 
 class YAMLBlockResponseParser(BaseModelResponseParser):
-    """Parser for responses containing a single YAML code block"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing a single YAML code block"""
         llm_response = response.value
 
         yaml_block = CodeParser.find_first("yaml", llm_response)
@@ -89,10 +89,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
 
 
 class YAMLResponseParser(BaseModelResponseParser):
-    """Parser for responses containing raw YAML content"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing raw YAML content"""
         llm_response = response.value
 
         yaml_content = YAMLResponseParser.parse(llm_response)
@@ -107,10 +107,10 @@ def parse(content: str) -> Dict[str, Any]:
 
 
 class JSONBlockResponseParser(BaseModelResponseParser):
-    """Parser for responses containing a single JSON code block"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing a single JSON code block"""
         llm_response = response.value
 
         json_block = CodeParser.find_first("json", llm_response)
@@ -122,10 +122,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
 
 
 class JSONResponseParser(BaseModelResponseParser):
-    """Parser for responses containing raw JSON content"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing raw JSON content"""
         llm_response = response.value
 
         json_content = JSONResponseParser.parse(llm_response)

diff --git a/docs/source/reference/llm/gemini_llm.md b/docs/source/reference/llm/gemini_llm.md
@@ -1,4 +1,4 @@
-# AnthropicLLM
+# GeminiLLM
 
 ```{eval-rst}
 .. autoclasstree:: council.llm.GeminiLLM

diff --git a/docs/source/reference/llm/gemini_llm_configuration.md b/docs/source/reference/llm/gemini_llm_configuration.md
@@ -1,4 +1,4 @@
-# AnthropicLLMConfiguration
+# GeminiLLMConfiguration
 
 ```{eval-rst}
 .. autoclass:: council.llm.GeminiLLMConfiguration

diff --git a/docs/source/reference/llm/llm_base.rst b/docs/source/reference/llm/llm_base.rst
@@ -2,3 +2,8 @@ LLMBase
 -------
 
 .. autoclass:: council.llm.LLMBase
+
+LLMResult
+---------
+
+.. autoclass:: council.llm.LLMResult
diff --git a/docs/source/reference/llm/llm_function.rst b/docs/source/reference/llm/llm_function.rst
@@ -0,0 +1,102 @@
+LLMFunction
+-----------
+
+.. autoclass:: council.llm.LLMFunction
+
+Here's how you can use LLMFunction for a sample SQL generation task.
+
+.. code-block:: python
+
+    from __future__ import annotations
+
+    import os
+
+    # !pip install council-ai==0.0.24
+
+    from council import OpenAILLM
+    from council.llm import LLMParsingException, LLMResponse
+    from council.llm.llm_function import LLMFunction
+    from council.utils.code_parser import CodeParser
+
+    SYSTEM_PROMPT = """
+    You are a SQL expert producing SQL query to answer user question.
+
+    # Instructions
+    - Assess whether the question is reasonable and possible to solve
+    given the database schema.
+    - Follow `Response format` for output format
+    - Always use LIMIT in your SQL query
+
+    # Dataset info
+
+    The dataset contains information about Airbnb listings.
+
+    Table Name: listings
+
+    ### Columns
+    For each column, the name and data type are given as follows:
+    {name}: {data type}
+    name: TEXT
+    price: INTEGER
+
+    # Response format
+
+    Your entire response must be inside the following code blocks.
+    All code blocks are mandatory.
+
+    ```solved
+    True/False, indicating whether the task is solved based on the provided database schema
+    ```
+
+    ```sql
+    SQL query answering the question if the task could be solved; leave empty otherwise
+    ```
+    """
+
+
+    # Define a response type object with from_response() method
+    class SQLResult:
+        def __init__(self, solved: bool, sql: str) -> None:
+            self.solved = solved
+            self.sql = sql
+
+        @staticmethod
+        def from_response(response: LLMResponse) -> SQLResult:
+            response_str = response.value
+            solved_block = CodeParser.find_first("solved", response_str)
+            if solved_block is None:
+                raise LLMParsingException("No `solved` code block found!")
+
+            solved = solved_block.code.lower() == "true"
+            if not solved:
+                return SQLResult(solved=False, sql="")
+
+            sql_block = CodeParser.find_first("sql", response_str)
+            if sql_block is None:
+                raise LLMParsingException("No `sql` code block found!")
+
+            sql = sql_block.code
+
+            if "limit" not in sql.lower():
+                raise LLMParsingException("Generated SQL query should contain a LIMIT clause")
+
+            return SQLResult(solved=True, sql=sql)
+
+
+    os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
+    os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
+    llm = OpenAILLM.from_env()
+
+    # Create a function based on LLM, response parser and system prompt
+    llm_function: LLMFunction[SQLResult] = LLMFunction(
+        llm, SQLResult.from_response, SYSTEM_PROMPT
+    )
+
+    # Execute a function with user input
+    response = llm_function.execute(
+        user_message="Show me first 5 rows of the dataset ordered by price"
+    )
+    print(type(response))
+    print(response.sql)
+
+You can simplify this example with :class:`council.llm.llm_response_parser.CodeBlocksResponseParser`.
diff --git a/docs/source/reference/llm/llm_function_with_prompt.rst b/docs/source/reference/llm/llm_function_with_prompt.rst
@@ -0,0 +1,4 @@
+LLMFunctionWithPrompt
+---------------------
+
+.. autoclass:: council.llm.LLMFunctionWithPrompt
diff --git a/docs/source/reference/llm/llm_message.rst b/docs/source/reference/llm/llm_message.rst
@@ -2,3 +2,52 @@ LLMMessage
 ----------
 
 .. autoclass:: council.llm.LLMMessage
+
+LLMMessageData
+--------------
+
+.. autoclass:: council.llm.llm_message.LLMMessageData
+
+LLMCacheControlData
+-------------------
+
+.. autoclass:: council.llm.llm_message.LLMCacheControlData
+    :no-inherited-members:
+
+Here's how you can use Anthropic prompt caching with council.
+
+.. code-block:: python
+
+    import os
+
+    # !pip install council-ai==0.0.24
+
+    from council.llm import AnthropicLLM
+    from council.llm.llm_message import LLMMessage, LLMCacheControlData
+    from council.contexts import LLMContext
+
+    os.environ["ANTHROPIC_API_KEY"] = "sk-YOUR-KEY-HERE"
+    os.environ["ANTHROPIC_LLM_MODEL"] = "claude-3-haiku-20240307"
+
+    # Ensure that the number of tokens in a cacheable message exceeds
+    # the minimum cacheable token count, which is 2048 for Haiku;
+    # otherwise, the message will not be cached.
+    HUGE_STATIC_SYSTEM_PROMPT = ""
+
+    # Create a system message with ephemeral caching
+    system_message_with_cache = LLMMessage.system_message(
+        HUGE_STATIC_SYSTEM_PROMPT,
+        data=[LLMCacheControlData.ephemeral()]
+    )
+
+    # Initialize the messages list with cachable system message
+    messages = [
+        system_message_with_cache,
+        LLMMessage.user_message("What are benefits of using caching?")
+    ]
+
+    llm = AnthropicLLM.from_env()
+
+    result = llm.post_chat_request(LLMContext.empty(), messages)
+    print(result.first_choice)
+    print(result.raw_response["usage"])
diff --git a/docs/source/reference/llm/llm_middleware.rst b/docs/source/reference/llm/llm_middleware.rst
@@ -0,0 +1,29 @@
+LLMMiddleware
+-------------
+
+.. autoclass:: council.llm.LLMMiddleware
+
+LLMMiddlewareChain
+------------------
+
+.. autoclass:: council.llm.LLMMiddlewareChain
+
+LLMLoggingMiddleware
+--------------------
+
+.. autoclass:: council.llm.LLMLoggingMiddleware
+
+LLMRetryMiddleware
+------------------
+
+.. autoclass:: council.llm.LLMRetryMiddleware
+
+LLMRequest
+-----------
+
+.. autoclass:: council.llm.LLMRequest
+
+LLMResponse
+-----------
+
+.. autoclass:: council.llm.LLMResponse