From ea5f5a797b9e7c3fe36745c564e0f55754e4b377 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 09:15:44 -0400
Subject: [PATCH 01/12] Fix Gemini titles

---
 docs/source/reference/llm/gemini_llm.md               | 2 +-
 docs/source/reference/llm/gemini_llm_configuration.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/reference/llm/gemini_llm.md b/docs/source/reference/llm/gemini_llm.md
index 3e5d2804..039f8bc4 100644
--- a/docs/source/reference/llm/gemini_llm.md
+++ b/docs/source/reference/llm/gemini_llm.md
@@ -1,4 +1,4 @@
-# AnthropicLLM
+# GeminiLLM
 
 ```{eval-rst}
 .. autoclasstree:: council.llm.GeminiLLM
diff --git a/docs/source/reference/llm/gemini_llm_configuration.md b/docs/source/reference/llm/gemini_llm_configuration.md
index 12acb006..68d77268 100644
--- a/docs/source/reference/llm/gemini_llm_configuration.md
+++ b/docs/source/reference/llm/gemini_llm_configuration.md
@@ -1,4 +1,4 @@
-# AnthropicLLMConfiguration
+# GeminiLLMConfiguration
 
 ```{eval-rst}
 .. autoclass:: council.llm.GeminiLLMConfiguration

From 9eed493231ef3b6f95596584b2209800079cf6db Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 09:32:14 -0400
Subject: [PATCH 02/12] Add LLMResult docs

---
 council/llm/llm_base.py                | 4 ++++
 docs/source/reference/llm/llm_base.rst | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/council/llm/llm_base.py b/council/llm/llm_base.py
index b8de23b1..9e6e08d3 100644
--- a/council/llm/llm_base.py
+++ b/council/llm/llm_base.py
@@ -23,6 +23,10 @@ def default_timeout(self) -> int:
 
 
 class LLMResult:
+    """
+    Represents a response from the LLM
+    """
+
     def __init__(
         self,
         choices: Sequence[str],
diff --git a/docs/source/reference/llm/llm_base.rst b/docs/source/reference/llm/llm_base.rst
index 736411bc..8740f06f 100644
--- a/docs/source/reference/llm/llm_base.rst
+++ b/docs/source/reference/llm/llm_base.rst
@@ -2,3 +2,8 @@ LLMBase
 -------
 
 .. autoclass:: council.llm.LLMBase
+
+LLMResult
+---------
+
+.. autoclass:: council.llm.LLMResult

From 0454745a5f5117f98c42944af074a7d7f2066537 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 09:32:35 -0400
Subject: [PATCH 03/12] Update LLMMessage docs with Anthropic prompt caching
 example

---
 docs/source/reference/llm/llm_message.rst | 45 +++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/docs/source/reference/llm/llm_message.rst b/docs/source/reference/llm/llm_message.rst
index 324b22db..156d447f 100644
--- a/docs/source/reference/llm/llm_message.rst
+++ b/docs/source/reference/llm/llm_message.rst
@@ -2,3 +2,48 @@ LLMMessage
 ----------
 
 .. autoclass:: council.llm.LLMMessage
+
+LLMMessageData
+--------------
+
+.. autoclass:: council.llm.llm_message.LLMMessageData
+
+LLMCacheControlData
+-------------------
+
+.. autoclass:: council.llm.llm_message.LLMCacheControlData
+    :no-inherited-members:
+
+Here's how you can use Anthropic prompt caching with council.
+
+.. testcode::
+
+    import os
+
+    from council.llm import AnthropicLLM
+    from council.llm.llm_message import LLMMessage, LLMCacheControlData
+    from council.contexts import LLMContext
+
+    os.environ["ANTHROPIC_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["ANTHROPIC_LLM_MODEL"] = "claude-3-haiku-20240307"
+
+    # Create a system message with ephemeral caching
+    system_message_with_cache = LLMMessage.system_message(
+        HUGE_STATIC_SYSTEM_PROMPT,
+        data=[LLMCacheControlData.ephemeral()]
+    )
+    # Ensure that the number of tokens in a cacheable message exceeds
+    # the minimum cacheable token count, which is 2048 for Haiku;
+    # otherwise, the message will not be cached.
+
+    # Initialize the messages list with cachable system message
+    messages = [
+        system_message_with_cache,
+        LLMMessage.user_message("What are benefits of using caching?")
+    ]
+
+    llm = AnthropicLLM.from_env()
+
+    result = llm.post_chat_request(LLMContext.empty(), messages)
+    print(result.first_choice)
+    print(result.raw_response["usage"])

From c0fa381ce3811ce235e54e51bbf7153ab5ff45f8 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 09:49:28 -0400
Subject: [PATCH 04/12] Add docstring for LLMCacheControlData.ephemeral()

---
 council/llm/llm_message.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/council/llm/llm_message.py b/council/llm/llm_message.py
index c30a0bdb..02834ee1 100644
--- a/council/llm/llm_message.py
+++ b/council/llm/llm_message.py
@@ -91,6 +91,7 @@ def __init__(self, content: str) -> None:
 
     @staticmethod
     def ephemeral() -> LLMCacheControlData:
+        """Represents ephemeral cache type"""
         return LLMCacheControlData(content="ephemeral")
 
 

From dd0190a9817a07c84c62e96ee0cd4e7daab43ad4 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 09:49:41 -0400
Subject: [PATCH 05/12] Add LLMMiddleware docs

---
 council/llm/llm_middleware.py                | 21 ++++++++++++++
 docs/source/reference/llm/llm_middleware.rst | 29 ++++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 docs/source/reference/llm/llm_middleware.rst

diff --git a/council/llm/llm_middleware.py b/council/llm/llm_middleware.py
index c9f0384b..0693cd28 100644
--- a/council/llm/llm_middleware.py
+++ b/council/llm/llm_middleware.py
@@ -29,6 +29,7 @@ def kwargs(self) -> Any:
 
     @staticmethod
     def default(messages: Sequence[LLMMessage], **kwargs: Any) -> LLMRequest:
+        """Creates a default LLMRequest with an empty context."""
         return LLMRequest(LLMContext.empty(), messages, **kwargs)
 
 
@@ -52,6 +53,7 @@ def duration(self) -> float:
 
     @staticmethod
     def empty(request: LLMRequest) -> LLMResponse:
+        """Creates an empty LLMResponse for a given request."""
         return LLMResponse(request, None, -1.0)
 
 
@@ -59,18 +61,29 @@ def empty(request: LLMRequest) -> LLMResponse:
 
 
 class LLMMiddleware(Protocol):
+    """
+    Protocol for defining LLM middleware.
+
+    Middleware can intercept and modify requests and responses between the client and the LLM, introducing custom logic.
+    """
+
     def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest) -> LLMResponse: ...
 
 
 class LLMMiddlewareChain:
+    """Manages a chain of LLM middlewares and executes requests through them."""
+
     def __init__(self, llm: LLMBase, middlewares: Optional[Sequence[LLMMiddleware]] = None) -> None:
         self._llm = llm
         self._middlewares: list[LLMMiddleware] = list(middlewares) if middlewares else []
 
     def add_middleware(self, middleware: LLMMiddleware) -> None:
+        """Add middleware to a chain."""
         self._middlewares.append(middleware)
 
     def execute(self, request: LLMRequest) -> LLMResponse:
+        """Execute middleware chain."""
+
         def execute_request(r: LLMRequest) -> LLMResponse:
             start = time.time()
             result = self._llm.post_chat_request(r.context, request.messages, **r.kwargs)
@@ -93,6 +106,8 @@ def wrapped(request: LLMRequest) -> LLMResponse:
 
 
 class LLMLoggingMiddleware:
+    """Middleware for logging LLM requests and responses."""
+
     def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest) -> LLMResponse:
         request.context.logger.info(
             f"Sending request with {len(request.messages)} message(s) to {llm.configuration.model_name()}"
@@ -106,6 +121,12 @@ def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest
 
 
 class LLMRetryMiddleware:
+    """
+    Middleware for implementing retry logic for LLM requests.
+
+    Attempts to retry failed requests a specified number of times with a delay between attempts.
+    """
+
     def __init__(self, retries: int, delay: float, exception_to_check: Optional[type[Exception]] = None) -> None:
         self._retries = retries
         self._delay = delay
diff --git a/docs/source/reference/llm/llm_middleware.rst b/docs/source/reference/llm/llm_middleware.rst
new file mode 100644
index 00000000..62c7e499
--- /dev/null
+++ b/docs/source/reference/llm/llm_middleware.rst
@@ -0,0 +1,29 @@
+LLMMiddleware
+-------------
+
+.. autoclass:: council.llm.LLMMiddleware
+
+LLMMiddlewareChain
+------------------
+
+.. autoclass:: council.llm.LLMMiddlewareChain
+
+LLMLoggingMiddleware
+--------------------
+
+.. autoclass:: council.llm.LLMLoggingMiddleware
+
+LLMRetryMiddleware
+------------------
+
+.. autoclass:: council.llm.LLMRetryMiddleware
+
+LLMRequest
+-----------
+
+.. autoclass:: council.llm.LLMRequest
+
+LLMResponse
+-----------
+
+.. autoclass:: council.llm.LLMResponse

From 0c752d89c0208743e5b7acdb3b02138f7bd82b27 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 12:46:58 -0400
Subject: [PATCH 06/12] Add LLMFunction docs

---
 docs/source/reference/llm/llm_function.rst    | 100 ++++++++++++++++++
 .../llm/llm_function_with_prompt.rst          |   4 +
 2 files changed, 104 insertions(+)
 create mode 100644 docs/source/reference/llm/llm_function.rst
 create mode 100644 docs/source/reference/llm/llm_function_with_prompt.rst

diff --git a/docs/source/reference/llm/llm_function.rst b/docs/source/reference/llm/llm_function.rst
new file mode 100644
index 00000000..5fd8838f
--- /dev/null
+++ b/docs/source/reference/llm/llm_function.rst
@@ -0,0 +1,100 @@
+LLMFunction
+-----------
+
+.. autoclass:: council.llm.LLMFunction
+
+Here's how you can use LLMFunction for a sample SQL generation task.
+
+.. testcode::
+
+    from __future__ import annotations
+
+    import os
+
+    from council import OpenAILLM
+    from council.llm import LLMParsingException, LLMResponse
+    from council.llm.llm_function import LLMFunction
+    from council.utils.code_parser import CodeParser
+
+    SYSTEM_PROMPT = """
+    You are a SQL expert producing SQL query to answer user question.
+
+    # Instructions
+    - Assess whether the question is reasonable and possible to solve
+    given the database schema.
+    - Follow `Response format` for output format
+    - Always use LIMIT in your SQL query
+
+    # Dataset info
+
+    The dataset contains information about Airbnb listings.
+
+    Table Name: listings
+
+    ### Columns
+    For each column, the name and data type are given as follows:
+    {name}: {data type}
+    name: TEXT
+    price: INTEGER
+
+    # Response format
+
+    Your entire response must be inside the following code blocks.
+    All code blocks are mandatory.
+
+    ```solved
+    True/False, indicating whether the task is solved based on the provided database schema
+    ```
+
+    ```sql
+    SQL query answering the question if the task could be solved; leave empty otherwise
+    ```
+    """
+
+
+    # Define a response type object with from_response() method
+    class SQLResult:
+        def __init__(self, solved: bool, sql: str) -> None:
+            self.solved = solved
+            self.sql = sql
+
+        @staticmethod
+        def from_response(response: LLMResponse) -> SQLResult:
+            response_str = response.value
+            solved_block = CodeParser.find_first("solved", response_str)
+            if solved_block is None:
+                raise LLMParsingException("No `solved` code block found!")
+
+            solved = solved_block.code.lower() == "true"
+            if not solved:
+                return SQLResult(solved=False, sql="")
+
+            sql_block = CodeParser.find_first("sql", response_str)
+            if sql_block is None:
+                raise LLMParsingException("No `sql` code block found!")
+
+            sql = sql_block.code
+
+            if "limit" not in sql.lower():
+                raise LLMParsingException("Generated SQL query should contain a LIMIT clause")
+
+            return SQLResult(solved=True, sql=sql)
+
+
+    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
+    llm = OpenAILLM.from_env()
+
+    # Create a function based on LLM, response parser and system prompt
+    llm_function: LLMFunction[SQLResult] = LLMFunction(
+        llm, SQLResult.from_response, SYSTEM_PROMPT
+    )
+
+    # Execute a function with user input
+    response = llm_function.execute(
+        user_message="Show me first 5 rows of the dataset ordered by price"
+    )
+    print(type(response))
+    print(response.sql)
+
+You can simplify this example with :class:`council.llm.llm_response_parser.CodeBlocksResponseParser`.
diff --git a/docs/source/reference/llm/llm_function_with_prompt.rst b/docs/source/reference/llm/llm_function_with_prompt.rst
new file mode 100644
index 00000000..650a6d2e
--- /dev/null
+++ b/docs/source/reference/llm/llm_function_with_prompt.rst
@@ -0,0 +1,4 @@
+LLMFunctionWithPrompt
+---------------------
+
+.. autoclass:: council.llm.LLMFunctionWithPrompt

From 15f2ad0fb660e7a07298ca74ab8f622da90b1128 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 12:54:19 -0400
Subject: [PATCH 07/12] Add LLMResponseParsers docs

---
 council/llm/llm_response_parser.py            |  10 +-
 .../reference/llm/llm_response_parser.rst     | 170 ++++++++++++++++++
 2 files changed, 175 insertions(+), 5 deletions(-)
 create mode 100644 docs/source/reference/llm/llm_response_parser.rst

diff --git a/council/llm/llm_response_parser.py b/council/llm/llm_response_parser.py
index 2bef4337..96bda1be 100644
--- a/council/llm/llm_response_parser.py
+++ b/council/llm/llm_response_parser.py
@@ -57,10 +57,10 @@ def _try_create(cls: Type[T], **kwargs) -> T:
 
 
 class CodeBlocksResponseParser(BaseModelResponseParser):
-    """Parser for responses containing multiple named code blocks"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing multiple named code blocks"""
         llm_response = response.value
         parsed_blocks: Dict[str, Any] = {}
 
@@ -74,10 +74,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
 
 
 class YAMLBlockResponseParser(BaseModelResponseParser):
-    """Parser for responses containing a single YAML code block"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing a single YAML code block"""
         llm_response = response.value
 
         yaml_block = CodeParser.find_first("yaml", llm_response)
@@ -89,10 +89,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
 
 
 class YAMLResponseParser(BaseModelResponseParser):
-    """Parser for responses containing raw YAML content"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing raw YAML content"""
         llm_response = response.value
 
         yaml_content = YAMLResponseParser.parse(llm_response)
@@ -107,10 +107,10 @@ def parse(content: str) -> Dict[str, Any]:
 
 
 class JSONBlockResponseParser(BaseModelResponseParser):
-    """Parser for responses containing a single JSON code block"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing a single JSON code block"""
         llm_response = response.value
 
         json_block = CodeParser.find_first("json", llm_response)
@@ -122,10 +122,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
 
 
 class JSONResponseParser(BaseModelResponseParser):
-    """Parser for responses containing raw JSON content"""
 
     @classmethod
     def from_response(cls: Type[T], response: LLMResponse) -> T:
+        """LLMFunction ResponseParser for response containing raw JSON content"""
         llm_response = response.value
 
         json_content = JSONResponseParser.parse(llm_response)
diff --git a/docs/source/reference/llm/llm_response_parser.rst b/docs/source/reference/llm/llm_response_parser.rst
new file mode 100644
index 00000000..3bfc42aa
--- /dev/null
+++ b/docs/source/reference/llm/llm_response_parser.rst
@@ -0,0 +1,170 @@
+BaseModelResponseParser
+-----------------------
+
+.. autoclass:: council.llm.llm_response_parser.BaseModelResponseParser
+    :no-inherited-members:
+    :exclude-members: model_computed_fields, model_config, model_fields
+
+CodeBlocksResponseParser
+------------------------
+
+.. autoclass:: council.llm.llm_response_parser.CodeBlocksResponseParser
+    :no-inherited-members:
+    :exclude-members: model_computed_fields, model_config, model_fields
+
+Here's how you can simplify :class:`council.llm.LLMFunction` example for a sample SQL generation task.
+
+.. testcode::
+
+    import os
+
+    from council import OpenAILLM
+    from council.llm import LLMParsingException
+    from council.llm.llm_function import LLMFunction
+    from council.llm.llm_response_parser import CodeBlocksResponseParser
+
+
+    # CodeBlocksResponseParser will provide from_response() automatically for you
+    class SQLResultFromCodeBlocks(CodeBlocksResponseParser):
+        solved: bool
+        sql: str
+
+        def validator(self) -> None:
+            if "limit" not in self.sql.lower():
+                raise LLMParsingException("Generated SQL query should contain a LIMIT clause")
+
+
+    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
+    llm = OpenAILLM.from_env()
+
+    # All the remaining code stays the same
+    llm_function: LLMFunction[SQLResultFromCodeBlocks] = LLMFunction(
+        llm, SQLResultFromCodeBlocks.from_response, SYSTEM_PROMPT  # system prompt is the same
+    )
+
+    response = llm_function.execute(
+        user_message="Show me first 5 rows of the dataset ordered by price"
+    )
+    print(type(response))
+    print(response.sql)
+
+YAMLBlockResponseParser
+-----------------------
+
+.. autoclass:: council.llm.llm_response_parser.YAMLBlockResponseParser
+    :no-inherited-members:
+    :exclude-members: model_computed_fields, model_config, model_fields
+
+Usage example:
+
+.. testcode::
+
+    import os
+    from typing import Literal
+
+    from council import OpenAILLM
+    from council.llm.llm_function import LLMFunction
+    from council.llm.llm_response_parser import YAMLBlockResponseParser
+    from pydantic import Field
+
+    SYSTEM_PROMPT = """
+    Output RPG character info in the following YAML block:
+
+    ```yaml
+    character_class: # character's class (Warrior, Mage, Rogue, Bard or Tech Support)
+    name: # character's name
+    description: # character's tragic backstory, 50 chars minimum
+    health: # character's health, integer, from 1 to 100 points
+    ```
+    """
+
+
+    class RPGCharacterFromYAMLBlock(YAMLBlockResponseParser):
+        name: str
+        character_class: Literal["Warrior", "Mage", "Rogue", "Bard", "Tech Support"]
+        description: str = Field(..., min_length=50)
+        health: int = Field(..., ge=1, le=100)
+
+
+    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
+    llm = OpenAILLM.from_env()
+
+    llm_function: LLMFunction[RPGCharacterFromYAMLBlock] = LLMFunction(
+        llm, RPGCharacterFromYAMLBlock.from_response, SYSTEM_PROMPT
+    )
+
+    character = llm_function.execute(user_message="Create some wise mage")
+    print(type(character))
+    print(f"{character.name}, {character.character_class} ({character.health}/100 hp)")
+    print(character.description)
+
+
+YAMLResponseParser
+------------------
+
+.. autoclass:: council.llm.llm_response_parser.YAMLResponseParser
+    :no-inherited-members:
+    :exclude-members: model_computed_fields, model_config, model_fields
+
+JSONBlockResponseParser
+-----------------------
+
+.. autoclass:: council.llm.llm_response_parser.JSONBlockResponseParser
+    :no-inherited-members:
+    :exclude-members: model_computed_fields, model_config, model_fields
+
+JSONResponseParser
+------------------
+
+.. autoclass:: council.llm.llm_response_parser.JSONResponseParser
+    :no-inherited-members:
+    :exclude-members: model_computed_fields, model_config, model_fields
+
+Usage example with OpenAI json mode:
+
+.. testcode::
+
+    import os
+    from typing import Literal
+
+    from council import OpenAILLM
+    from council.llm.llm_function import LLMFunction
+    from council.llm.llm_response_parser import JSONResponseParser
+    from pydantic import Field
+
+    SYSTEM_PROMPT = """
+    Output RPG character info in the following JSON format:
+
+    {
+    character_class: # character's class (Warrior, Mage, Rogue, Bard or Tech Support)
+    name: # character's name
+    description: # character's tragic backstory, 50 chars minimum
+    health: # character's health, integer, from 1 to 100 points
+    }
+    """
+
+
+    class RPGCharacterFromJSON(JSONResponseParser):
+        name: str
+        character_class: Literal["Warrior", "Mage", "Rogue", "Bard", "Tech Support"]
+        description: str = Field(..., min_length=50)
+        health: int = Field(..., ge=1, le=100)
+
+
+    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
+    llm = OpenAILLM.from_env()
+
+    llm_function: LLMFunction[RPGCharacterFromJSON] = LLMFunction(
+        llm, RPGCharacterFromJSON.from_response, SYSTEM_PROMPT
+    )
+
+    character = llm_function.execute(
+        user_message="Create some wise mage",
+        response_format={"type": "json_object"}  # using OpenAI's json mode
+    )
+    print(type(character))
+    print(f"{character.name}, {character.character_class} ({character.health}/100 hp)")
+    print(character.description)

From ab3c19859a0bb61550cd22d42aedc10b2a59d8d7 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 14:00:29 -0400
Subject: [PATCH 08/12] Update key prefix

---
 docs/source/reference/llm/llm_function.rst        | 2 +-
 docs/source/reference/llm/llm_message.rst         | 2 +-
 docs/source/reference/llm/llm_response_parser.rst | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/reference/llm/llm_function.rst b/docs/source/reference/llm/llm_function.rst
index 5fd8838f..cdda36b4 100644
--- a/docs/source/reference/llm/llm_function.rst
+++ b/docs/source/reference/llm/llm_function.rst
@@ -81,7 +81,7 @@ Here's how you can use LLMFunction for a sample SQL generation task.
             return SQLResult(solved=True, sql=sql)
 
 
-    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
     os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
     llm = OpenAILLM.from_env()
 
diff --git a/docs/source/reference/llm/llm_message.rst b/docs/source/reference/llm/llm_message.rst
index 156d447f..3d9fb5a9 100644
--- a/docs/source/reference/llm/llm_message.rst
+++ b/docs/source/reference/llm/llm_message.rst
@@ -24,7 +24,7 @@ Here's how you can use Anthropic prompt caching with council.
     from council.llm.llm_message import LLMMessage, LLMCacheControlData
     from council.contexts import LLMContext
 
-    os.environ["ANTHROPIC_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["ANTHROPIC_API_KEY"] = "sk-YOUR-KEY-HERE"
     os.environ["ANTHROPIC_LLM_MODEL"] = "claude-3-haiku-20240307"
 
     # Create a system message with ephemeral caching
diff --git a/docs/source/reference/llm/llm_response_parser.rst b/docs/source/reference/llm/llm_response_parser.rst
index 3bfc42aa..0a72faf9 100644
--- a/docs/source/reference/llm/llm_response_parser.rst
+++ b/docs/source/reference/llm/llm_response_parser.rst
@@ -34,7 +34,7 @@ Here's how you can simplify :class:`council.llm.LLMFunction` example for a sampl
                 raise LLMParsingException("Generated SQL query should contain a LIMIT clause")
 
 
-    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
     os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
     llm = OpenAILLM.from_env()
 
@@ -87,7 +87,7 @@ Usage example:
         health: int = Field(..., ge=1, le=100)
 
 
-    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
     os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
     llm = OpenAILLM.from_env()
 
@@ -153,7 +153,7 @@ Usage example with OpenAI json mode:
         health: int = Field(..., ge=1, le=100)
 
 
-    os.environ["OPENAI_API_KEY"] = "YOUR-KEY-HERE"
+    os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
     os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
     llm = OpenAILLM.from_env()
 

From a92d67abe51c76cfad95761ef65512f39724cdb9 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 14:24:09 -0400
Subject: [PATCH 09/12] Code fixes for examples

---
 docs/source/reference/llm/llm_message.rst         | 8 +++++---
 docs/source/reference/llm/llm_response_parser.rst | 4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/source/reference/llm/llm_message.rst b/docs/source/reference/llm/llm_message.rst
index 3d9fb5a9..22a6add1 100644
--- a/docs/source/reference/llm/llm_message.rst
+++ b/docs/source/reference/llm/llm_message.rst
@@ -27,14 +27,16 @@ Here's how you can use Anthropic prompt caching with council.
     os.environ["ANTHROPIC_API_KEY"] = "sk-YOUR-KEY-HERE"
     os.environ["ANTHROPIC_LLM_MODEL"] = "claude-3-haiku-20240307"
 
+    # Ensure that the number of tokens in a cacheable message exceeds
+    # the minimum cacheable token count, which is 2048 for Haiku;
+    # otherwise, the message will not be cached.
+    HUGE_STATIC_SYSTEM_PROMPT = ""
+
     # Create a system message with ephemeral caching
     system_message_with_cache = LLMMessage.system_message(
         HUGE_STATIC_SYSTEM_PROMPT,
         data=[LLMCacheControlData.ephemeral()]
     )
-    # Ensure that the number of tokens in a cacheable message exceeds
-    # the minimum cacheable token count, which is 2048 for Haiku;
-    # otherwise, the message will not be cached.
 
     # Initialize the messages list with cachable system message
     messages = [
diff --git a/docs/source/reference/llm/llm_response_parser.rst b/docs/source/reference/llm/llm_response_parser.rst
index 0a72faf9..4ea0c576 100644
--- a/docs/source/reference/llm/llm_response_parser.rst
+++ b/docs/source/reference/llm/llm_response_parser.rst
@@ -23,6 +23,8 @@ Here's how you can simplify :class:`council.llm.LLMFunction` example for a sampl
     from council.llm.llm_function import LLMFunction
     from council.llm.llm_response_parser import CodeBlocksResponseParser
 
+    SYSTEM_PROMPT = "same system prompt as in LLMFunction example"
+
 
     # CodeBlocksResponseParser will provide from_response() automatically for you
     class SQLResultFromCodeBlocks(CodeBlocksResponseParser):
@@ -40,7 +42,7 @@ Here's how you can simplify :class:`council.llm.LLMFunction` example for a sampl
 
     # All the remaining code stays the same
     llm_function: LLMFunction[SQLResultFromCodeBlocks] = LLMFunction(
-        llm, SQLResultFromCodeBlocks.from_response, SYSTEM_PROMPT  # system prompt is the same
+        llm, SQLResultFromCodeBlocks.from_response, SYSTEM_PROMPT
     )
 
     response = llm_function.execute(

From dac37b61bd5fb38482fa38cbe55aeacc9b884364 Mon Sep 17 00:00:00 2001
From: Dmytro Nikolaiev <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 15:00:37 -0400
Subject: [PATCH 10/12] Change testcode to code-block and specify council
 version

---
 docs/source/reference/llm/llm_function.rst        |  4 +++-
 docs/source/reference/llm/llm_message.rst         |  4 +++-
 docs/source/reference/llm/llm_response_parser.rst | 12 +++++++++---
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/docs/source/reference/llm/llm_function.rst b/docs/source/reference/llm/llm_function.rst
index cdda36b4..692016b1 100644
--- a/docs/source/reference/llm/llm_function.rst
+++ b/docs/source/reference/llm/llm_function.rst
@@ -5,12 +5,14 @@ LLMFunction
 
 Here's how you can use LLMFunction for a sample SQL generation task.
 
-.. testcode::
+.. code-block:: python
 
     from __future__ import annotations
 
     import os
 
+    # !pip install council-ai==0.0.24
+
     from council import OpenAILLM
     from council.llm import LLMParsingException, LLMResponse
     from council.llm.llm_function import LLMFunction
diff --git a/docs/source/reference/llm/llm_message.rst b/docs/source/reference/llm/llm_message.rst
index 22a6add1..d6fcfc81 100644
--- a/docs/source/reference/llm/llm_message.rst
+++ b/docs/source/reference/llm/llm_message.rst
@@ -16,10 +16,12 @@ LLMCacheControlData
 
 Here's how you can use Anthropic prompt caching with council.
 
-.. testcode::
+.. code-block:: python
 
     import os
 
+    # !pip install council-ai==0.0.24
+
     from council.llm import AnthropicLLM
     from council.llm.llm_message import LLMMessage, LLMCacheControlData
     from council.contexts import LLMContext
diff --git a/docs/source/reference/llm/llm_response_parser.rst b/docs/source/reference/llm/llm_response_parser.rst
index 4ea0c576..80c9c364 100644
--- a/docs/source/reference/llm/llm_response_parser.rst
+++ b/docs/source/reference/llm/llm_response_parser.rst
@@ -14,10 +14,12 @@ CodeBlocksResponseParser
 
 Here's how you can simplify :class:`council.llm.LLMFunction` example for a sample SQL generation task.
 
-.. testcode::
+.. code-block:: python
 
     import os
 
+    # !pip install council-ai==0.0.24
+
     from council import OpenAILLM
     from council.llm import LLMParsingException
     from council.llm.llm_function import LLMFunction
@@ -60,11 +62,13 @@ YAMLBlockResponseParser
 
 Usage example:
 
-.. testcode::
+.. code-block:: python
 
     import os
     from typing import Literal
 
+    # !pip install council-ai==0.0.24
+
     from council import OpenAILLM
     from council.llm.llm_function import LLMFunction
     from council.llm.llm_response_parser import YAMLBlockResponseParser
@@ -126,11 +130,13 @@ JSONResponseParser
 
 Usage example with OpenAI json mode:
 
-.. testcode::
+.. code-block:: python
 
     import os
     from typing import Literal
 
+    # !pip install council-ai==0.0.24
+
     from council import OpenAILLM
     from council.llm.llm_function import LLMFunction
     from council.llm.llm_response_parser import JSONResponseParser

From 43e8c7d376ec5b15d691d9ff27d5881e2485b425 Mon Sep 17 00:00:00 2001
From: Nikolaiev Dmytro <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 15:30:42 -0400
Subject: [PATCH 11/12] Update council/llm/llm_message.py

Co-authored-by: Guillaume Koch <39165367+gkoch78@users.noreply.github.com>
---
 council/llm/llm_message.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/council/llm/llm_message.py b/council/llm/llm_message.py
index 02834ee1..2b520aa0 100644
--- a/council/llm/llm_message.py
+++ b/council/llm/llm_message.py
@@ -91,7 +91,7 @@ def __init__(self, content: str) -> None:
 
     @staticmethod
     def ephemeral() -> LLMCacheControlData:
-        """Represents ephemeral cache type"""
+        """Returns ephemeral cache type"""
         return LLMCacheControlData(content="ephemeral")
 
 

From 57f28fc4ba0abfeff3bd741e27b50065b8ed6e55 Mon Sep 17 00:00:00 2001
From: Nikolaiev Dmytro <nikolaevdmitryprog@gmail.com>
Date: Thu, 10 Oct 2024 15:31:06 -0400
Subject: [PATCH 12/12] Update docs/source/reference/llm/llm_message.rst

Co-authored-by: Guillaume Koch <39165367+gkoch78@users.noreply.github.com>
---
 docs/source/reference/llm/llm_message.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/reference/llm/llm_message.rst b/docs/source/reference/llm/llm_message.rst
index d6fcfc81..ae5e7470 100644
--- a/docs/source/reference/llm/llm_message.rst
+++ b/docs/source/reference/llm/llm_message.rst
@@ -29,7 +29,7 @@ Here's how you can use Anthropic prompt caching with council.
     os.environ["ANTHROPIC_API_KEY"] = "sk-YOUR-KEY-HERE"
     os.environ["ANTHROPIC_LLM_MODEL"] = "claude-3-haiku-20240307"
 
-    # Ensure that the number of tokens in a cacheable message exceeds
+    # Ensure that the number of tokens in a cacheable message exceeding
     # the minimum cacheable token count, which is 2048 for Haiku;
     # otherwise, the message will not be cached.
     HUGE_STATIC_SYSTEM_PROMPT = ""