Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature docs update #172

Merged
merged 12 commits into from
Oct 10, 2024
4 changes: 4 additions & 0 deletions council/llm/llm_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def default_timeout(self) -> int:


class LLMResult:
"""
Represents a response from the LLM
"""

def __init__(
self,
choices: Sequence[str],
Expand Down
1 change: 1 addition & 0 deletions council/llm/llm_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def __init__(self, content: str) -> None:

@staticmethod
def ephemeral() -> LLMCacheControlData:
"""Represents ephemeral cache type"""
return LLMCacheControlData(content="ephemeral")


Expand Down
21 changes: 21 additions & 0 deletions council/llm/llm_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def kwargs(self) -> Any:

@staticmethod
def default(messages: Sequence[LLMMessage], **kwargs: Any) -> LLMRequest:
"""Creates a default LLMRequest with an empty context."""
return LLMRequest(LLMContext.empty(), messages, **kwargs)


Expand All @@ -52,25 +53,37 @@ def duration(self) -> float:

@staticmethod
def empty(request: LLMRequest) -> LLMResponse:
"""Creates an empty LLMResponse for a given request."""
return LLMResponse(request, None, -1.0)


ExecuteLLMRequest = Callable[[LLMRequest], LLMResponse]


class LLMMiddleware(Protocol):
"""
Protocol for defining LLM middleware.

Middleware can intercept and modify requests and responses between the client and the LLM, introducing custom logic.
"""

def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest) -> LLMResponse: ...


class LLMMiddlewareChain:
"""Manages a chain of LLM middlewares and executes requests through them."""

def __init__(self, llm: LLMBase, middlewares: Optional[Sequence[LLMMiddleware]] = None) -> None:
self._llm = llm
self._middlewares: list[LLMMiddleware] = list(middlewares) if middlewares else []

def add_middleware(self, middleware: LLMMiddleware) -> None:
"""Add middleware to a chain."""
self._middlewares.append(middleware)

def execute(self, request: LLMRequest) -> LLMResponse:
"""Execute middleware chain."""

def execute_request(r: LLMRequest) -> LLMResponse:
start = time.time()
result = self._llm.post_chat_request(r.context, request.messages, **r.kwargs)
Expand All @@ -93,6 +106,8 @@ def wrapped(request: LLMRequest) -> LLMResponse:


class LLMLoggingMiddleware:
"""Middleware for logging LLM requests and responses."""

def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest) -> LLMResponse:
request.context.logger.info(
f"Sending request with {len(request.messages)} message(s) to {llm.configuration.model_name()}"
Expand All @@ -106,6 +121,12 @@ def __call__(self, llm: LLMBase, execute: ExecuteLLMRequest, request: LLMRequest


class LLMRetryMiddleware:
"""
Middleware for implementing retry logic for LLM requests.

Attempts to retry failed requests a specified number of times with a delay between attempts.
"""

def __init__(self, retries: int, delay: float, exception_to_check: Optional[type[Exception]] = None) -> None:
self._retries = retries
self._delay = delay
Expand Down
10 changes: 5 additions & 5 deletions council/llm/llm_response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ def _try_create(cls: Type[T], **kwargs) -> T:


class CodeBlocksResponseParser(BaseModelResponseParser):
"""Parser for responses containing multiple named code blocks"""

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""LLMFunction ResponseParser for response containing multiple named code blocks"""
llm_response = response.value
parsed_blocks: Dict[str, Any] = {}

Expand All @@ -74,10 +74,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:


class YAMLBlockResponseParser(BaseModelResponseParser):
"""Parser for responses containing a single YAML code block"""

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""LLMFunction ResponseParser for response containing a single YAML code block"""
llm_response = response.value

yaml_block = CodeParser.find_first("yaml", llm_response)
Expand All @@ -89,10 +89,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:


class YAMLResponseParser(BaseModelResponseParser):
"""Parser for responses containing raw YAML content"""

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""LLMFunction ResponseParser for response containing raw YAML content"""
llm_response = response.value

yaml_content = YAMLResponseParser.parse(llm_response)
Expand All @@ -107,10 +107,10 @@ def parse(content: str) -> Dict[str, Any]:


class JSONBlockResponseParser(BaseModelResponseParser):
"""Parser for responses containing a single JSON code block"""

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""LLMFunction ResponseParser for response containing a single JSON code block"""
llm_response = response.value

json_block = CodeParser.find_first("json", llm_response)
Expand All @@ -122,10 +122,10 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:


class JSONResponseParser(BaseModelResponseParser):
"""Parser for responses containing raw JSON content"""

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""LLMFunction ResponseParser for response containing raw JSON content"""
llm_response = response.value

json_content = JSONResponseParser.parse(llm_response)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/llm/gemini_llm.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# AnthropicLLM
# GeminiLLM

```{eval-rst}
.. autoclasstree:: council.llm.GeminiLLM
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/llm/gemini_llm_configuration.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# AnthropicLLMConfiguration
# GeminiLLMConfiguration

```{eval-rst}
.. autoclass:: council.llm.GeminiLLMConfiguration
Expand Down
5 changes: 5 additions & 0 deletions docs/source/reference/llm/llm_base.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@ LLMBase
-------

.. autoclass:: council.llm.LLMBase

LLMResult
---------

.. autoclass:: council.llm.LLMResult
102 changes: 102 additions & 0 deletions docs/source/reference/llm/llm_function.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
LLMFunction
-----------

.. autoclass:: council.llm.LLMFunction

Here's how you can use LLMFunction for a sample SQL generation task.

.. code-block:: python

from __future__ import annotations

import os

# !pip install council-ai==0.0.24

from council import OpenAILLM
from council.llm import LLMParsingException, LLMResponse
from council.llm.llm_function import LLMFunction
from council.utils.code_parser import CodeParser

SYSTEM_PROMPT = """
You are a SQL expert producing SQL query to answer user question.

# Instructions
- Assess whether the question is reasonable and possible to solve
given the database schema.
- Follow `Response format` for output format
- Always use LIMIT in your SQL query

# Dataset info

The dataset contains information about Airbnb listings.

Table Name: listings

### Columns
For each column, the name and data type are given as follows:
{name}: {data type}
name: TEXT
price: INTEGER

# Response format

Your entire response must be inside the following code blocks.
All code blocks are mandatory.

```solved
True/False, indicating whether the task is solved based on the provided database schema
```

```sql
SQL query answering the question if the task could be solved; leave empty otherwise
```
"""


# Define a response type object with from_response() method
class SQLResult:
def __init__(self, solved: bool, sql: str) -> None:
self.solved = solved
self.sql = sql

@staticmethod
def from_response(response: LLMResponse) -> SQLResult:
response_str = response.value
solved_block = CodeParser.find_first("solved", response_str)
if solved_block is None:
raise LLMParsingException("No `solved` code block found!")

solved = solved_block.code.lower() == "true"
if not solved:
return SQLResult(solved=False, sql="")

sql_block = CodeParser.find_first("sql", response_str)
if sql_block is None:
raise LLMParsingException("No `sql` code block found!")

sql = sql_block.code

if "limit" not in sql.lower():
raise LLMParsingException("Generated SQL query should contain a LIMIT clause")

return SQLResult(solved=True, sql=sql)


os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
llm = OpenAILLM.from_env()

# Create a function based on LLM, response parser and system prompt
llm_function: LLMFunction[SQLResult] = LLMFunction(
llm, SQLResult.from_response, SYSTEM_PROMPT
)

# Execute a function with user input
response = llm_function.execute(
user_message="Show me first 5 rows of the dataset ordered by price"
)
print(type(response))
print(response.sql)

You can simplify this example with :class:`council.llm.llm_response_parser.CodeBlocksResponseParser`.
4 changes: 4 additions & 0 deletions docs/source/reference/llm/llm_function_with_prompt.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
LLMFunctionWithPrompt
---------------------

.. autoclass:: council.llm.LLMFunctionWithPrompt
49 changes: 49 additions & 0 deletions docs/source/reference/llm/llm_message.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,52 @@ LLMMessage
----------

.. autoclass:: council.llm.LLMMessage

LLMMessageData
--------------

.. autoclass:: council.llm.llm_message.LLMMessageData

LLMCacheControlData
-------------------

.. autoclass:: council.llm.llm_message.LLMCacheControlData
:no-inherited-members:

Here's how you can use Anthropic prompt caching with council.

.. code-block:: python

import os

# !pip install council-ai==0.0.24

from council.llm import AnthropicLLM
from council.llm.llm_message import LLMMessage, LLMCacheControlData
from council.contexts import LLMContext

os.environ["ANTHROPIC_API_KEY"] = "sk-YOUR-KEY-HERE"
os.environ["ANTHROPIC_LLM_MODEL"] = "claude-3-haiku-20240307"

# Ensure that the number of tokens in a cacheable message exceeds
# the minimum cacheable token count, which is 2048 for Haiku;
# otherwise, the message will not be cached.
HUGE_STATIC_SYSTEM_PROMPT = ""

# Create a system message with ephemeral caching
system_message_with_cache = LLMMessage.system_message(
HUGE_STATIC_SYSTEM_PROMPT,
data=[LLMCacheControlData.ephemeral()]
)

# Initialize the messages list with cachable system message
messages = [
system_message_with_cache,
LLMMessage.user_message("What are benefits of using caching?")
]

llm = AnthropicLLM.from_env()

result = llm.post_chat_request(LLMContext.empty(), messages)
print(result.first_choice)
print(result.raw_response["usage"])
29 changes: 29 additions & 0 deletions docs/source/reference/llm/llm_middleware.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
LLMMiddleware
-------------

.. autoclass:: council.llm.LLMMiddleware

LLMMiddlewareChain
------------------

.. autoclass:: council.llm.LLMMiddlewareChain

LLMLoggingMiddleware
--------------------

.. autoclass:: council.llm.LLMLoggingMiddleware

LLMRetryMiddleware
------------------

.. autoclass:: council.llm.LLMRetryMiddleware

LLMRequest
-----------

.. autoclass:: council.llm.LLMRequest

LLMResponse
-----------

.. autoclass:: council.llm.LLMResponse
Loading
Loading