Skip to content

Commit

Permalink
Merge branch 'main' into feature-initial-response-parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
Winston-503 committed Sep 19, 2024
2 parents 02b1761 + 1550bde commit 582fdfe
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 69 deletions.
14 changes: 12 additions & 2 deletions council/llm/llm_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import abc
from typing import Any, Final, Generic, Optional, Sequence, TypeVar
from typing import Any, Dict, Final, Generic, Optional, Sequence, TypeVar

from council.contexts import Consumption, LLMContext, Monitorable

Expand All @@ -23,9 +23,15 @@ def default_timeout(self) -> int:


class LLMResult:
def __init__(self, choices: Sequence[str], consumptions: Optional[Sequence[Consumption]] = None) -> None:
def __init__(
self,
choices: Sequence[str],
consumptions: Optional[Sequence[Consumption]] = None,
raw_response: Optional[Dict[str, Any]] = None,
) -> None:
self._choices = list(choices)
self._consumptions = list(consumptions) if consumptions is not None else []
self._raw_response = raw_response if raw_response is not None else {}

@property
def first_choice(self) -> str:
Expand All @@ -39,6 +45,10 @@ def choices(self) -> Sequence[str]:
def consumptions(self) -> Sequence[Consumption]:
return self._consumptions

@property
def raw_response(self) -> Dict[str, Any]:
return self._raw_response


class LLMBase(Generic[T_Configuration], Monitorable, abc.ABC):
"""
Expand Down
41 changes: 30 additions & 11 deletions council/llm/openai_chat_completions_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,25 @@ def from_dict(obj: Any) -> Usage:

class OpenAIChatCompletionsResult:

def __init__(self, id: str, object: str, created: int, model: str, choices: List[Choice], usage: Usage) -> None:
def __init__(
self,
id: str,
object: str,
created: int,
model: str,
choices: List[Choice],
usage: Usage,
raw_response: Dict[str, Any],
) -> None:
self._id = id
self._object = object
self._usage = usage
self._model = model
self._choices = choices
self._created = created

self._raw_response = raw_response

@property
def id(self) -> str:
return self._id
Expand All @@ -106,6 +117,10 @@ def usage(self) -> Usage:
def choices(self) -> Sequence[Choice]:
return self._choices

@property
def raw_response(self) -> Dict[str, Any]:
return self._raw_response

def to_consumptions(self) -> Sequence[Consumption]:
return [
Consumption(1, "call", f"{self.model}"),
Expand All @@ -115,14 +130,14 @@ def to_consumptions(self) -> Sequence[Consumption]:
]

@staticmethod
def from_dict(obj: Any) -> OpenAIChatCompletionsResult:
_id = str(obj.get("id"))
_object = str(obj.get("object"))
_created = int(obj.get("created"))
_model = str(obj.get("model"))
_choices = [Choice.from_dict(y) for y in obj.get("choices")]
_usage = Usage.from_dict(obj.get("usage"))
return OpenAIChatCompletionsResult(_id, _object, _created, _model, _choices, _usage)
def from_response(response: Dict[str, Any]) -> OpenAIChatCompletionsResult:
_id = str(response.get("id"))
_object = str(response.get("object"))
_created = int(response.get("created", -1))
_model = str(response.get("model"))
_choices = [Choice.from_dict(y) for y in response.get("choices", [])]
_usage = Usage.from_dict(response.get("usage"))
return OpenAIChatCompletionsResult(_id, _object, _created, _model, _choices, _usage, response)


class OpenAIChatCompletionsModel(LLMBase[ChatGPTConfigurationBase]):
Expand Down Expand Up @@ -153,14 +168,18 @@ def _post_chat_request(self, context: LLMContext, messages: Sequence[LLMMessage]
context.logger.debug(
f'message="Got chat GPT completions result from {self._name}" id="{r.id}" model="{r.model}" {r.usage}'
)
return LLMResult(choices=[c.message.content for c in r.choices], consumptions=r.to_consumptions())
return LLMResult(
choices=[c.message.content for c in r.choices],
consumptions=r.to_consumptions(),
raw_response=r.raw_response,
)

def _post_request(self, payload) -> OpenAIChatCompletionsResult:
response = self._provider.__call__(payload)
if response.status_code != httpx.codes.OK:
raise LLMCallException(response.status_code, response.text, self._name)

return OpenAIChatCompletionsResult.from_dict(response.json())
return OpenAIChatCompletionsResult.from_response(response.json())

def _build_payload(self, messages: Sequence[LLMMessage]):
payload = self._configuration.build_default_payload()
Expand Down
2 changes: 1 addition & 1 deletion council/llm/openai_chat_gpt_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, api_key: str, api_host: str, model: str, timeout: Optional[in
"""
super().__init__()
self._model = Parameter.string(
name="model", required=True, value=model, validator=prefix_any_validator(["gpt-", "ft:gpt-"])
name="model", required=True, value=model, validator=prefix_any_validator(["gpt-", "ft:gpt-", "o1-"])
)
self._timeout = Parameter.int(
name="timeout", required=False, default=timeout or self.default_timeout, validator=greater_than_validator(0)
Expand Down
111 changes: 58 additions & 53 deletions council/llm/openai_token_counter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
from typing import List, Optional, Sequence
from typing import List, Mapping, Optional, Sequence

import tiktoken
from tiktoken import Encoding
Expand All @@ -11,13 +11,62 @@
logger = logging.getLogger(__name__)


class TokenInfo:
def __init__(self, *, tokens_limit: int, tokens_per_message: int, tokens_per_name: int) -> None:
self.tokens_limit = tokens_limit
self.tokens_per_message = tokens_per_message
self.tokens_per_name = tokens_per_name

@classmethod
def for_model(cls, model: str) -> Optional[TokenInfo]:
if model.startswith("gpt-3.5-turbo"):
return cls._for_gpt_35_family(model)
elif model.startswith("gpt-4"):
return cls._for_gpt_4_family(model)
elif model.startswith("gpt-4o"):
return cls._for_gpt_4o_family(model)
elif model.startswith("o1"):
return cls._for_o1_family(model)

return None

@staticmethod
def _for_gpt_35_family(model: str) -> TokenInfo:
tokens_limit = 4_096 if model == "gpt-3.5-turbo-instruct" else 16_385
return TokenInfo(tokens_limit=tokens_limit, tokens_per_message=3, tokens_per_name=1)

@staticmethod
def _for_gpt_4_family(model: str) -> TokenInfo:
tokens_limit = 8_192 if model in ["gpt-4-0613", "gpt-4-0314"] else 128_000
return TokenInfo(tokens_limit=tokens_limit, tokens_per_message=3, tokens_per_name=1)

@staticmethod
def _for_gpt_4o_family(model: str) -> TokenInfo:
return TokenInfo(tokens_limit=128_000, tokens_per_message=3, tokens_per_name=1)

@staticmethod
def _for_o1_family(model: str) -> TokenInfo:
return TokenInfo(tokens_limit=128_000, tokens_per_message=3, tokens_per_name=1)


class OpenAITokenCounter(LLMessageTokenCounterBase):
"""
See https://github.com/openai/openai-python/blob/main/chatml.md for information on
how messages are converted to tokens.
https://platform.openai.com/docs/models/overview for tokens
"""

LATEST_ALIASES: Mapping[str, str] = {
"gpt-3.5-turbo": "gpt-3.5-turbo-0125",
"gpt-4-turbo": "gpt-4-turbo-2024-04-09",
"gpt-4-turbo-preview": "gpt-4-0125-preview",
"gpt-4": "gpt-4-0613",
"gpt-4o": "gpt-4o-2024-05-13",
"gpt-4o-mini": "gpt-4o-mini-2024-07-18",
"o1-preview": "o1-preview-2024-09-12",
"o1-mini": "o1-mini-2024-09-12",
}

def __init__(
self, encoding: Encoding, model: str, limit: int = -1, tokens_per_message: int = 0, tokens_per_name: int = 0
) -> None:
Expand Down Expand Up @@ -115,63 +164,19 @@ def from_model(model: str) -> Optional[OpenAITokenCounter]:
logger.warning(f"model {model} not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")

if model in {
"gpt-3.5-turbo-0301",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo-16k-0613",
}:
tokens_limit = 16384 if ("-16k-" in model) or ("-1106" in model) else 4096
tokens_per_message = 3
tokens_per_name = 1
elif model in {
"gpt-4-0314",
"gpt-4-0613",
"gpt-4-32k-0314",
"gpt-4-32k-0613",
}:
tokens_limit = 32768 if "-32k-" in model else 8192
tokens_per_message = 3
tokens_per_name = 1
elif model in {
"gpt-4o-2024-05-13",
"gpt-4-1106-preview",
"gpt-4-0125-preview",
"gpt-4-turbo-2024-04-09",
"gpt-4-1106-vision-preview",
}:
tokens_limit = 128000
tokens_per_message = 3
tokens_per_name = 1
elif model == "gpt-3.5-turbo-0301":
tokens_limit = 4096
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif model == "gpt-3.5-turbo":
return OpenAITokenCounter._return_alias(model, "gpt-3.5-turbo-0613")
elif model == "gpt-3.5-turbo-16k":
return OpenAITokenCounter._return_alias(model, "gpt-3.5-turbo-16k-0613")
elif model == "gpt-4o":
return OpenAITokenCounter._return_alias(model, "gpt-4o-2024-05-13")
elif model == "gpt-4":
return OpenAITokenCounter._return_alias(model, "gpt-4-0613")
elif model == "gpt-4-turbo":
return OpenAITokenCounter._return_alias(model, "gpt-4-turbo-2024-04-09")
elif model == "gpt-4-32k":
return OpenAITokenCounter._return_alias(model, "gpt-4-32k-0613")
elif model == "gpt-4-turbo-preview":
return OpenAITokenCounter._return_alias(model, "gpt-4-1106-vision-preview")
elif model == "gpt-4-vision-preview":
return OpenAITokenCounter._return_alias(model, "gpt-4-0125-preview")
else:
if model in OpenAITokenCounter.LATEST_ALIASES:
return OpenAITokenCounter._return_alias(model, OpenAITokenCounter.LATEST_ALIASES[model])

info = TokenInfo.for_model(model)
if info is None:
return None

return OpenAITokenCounter(
encoding,
model=model,
limit=tokens_limit,
tokens_per_message=tokens_per_message,
tokens_per_name=tokens_per_name,
limit=info.tokens_limit,
tokens_per_message=info.tokens_per_message,
tokens_per_name=info.tokens_per_name,
)

@staticmethod
Expand Down
10 changes: 8 additions & 2 deletions tests/unit/llm/test_openai_token_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def test_token_counter_gpt_35(self):
counter = OpenAITokenCounter.from_model(model)
messages = self._get_messages()

self.assertEqual(counter.token_limit, 16385)
self.assertEqual(129, counter.count_messages_token(messages))

def test_token_counter_gpt_4_turbo(self):
Expand All @@ -21,6 +22,11 @@ def test_token_counter_gpt_4o(self):
counter = OpenAITokenCounter.from_model(model)
self.assertEqual(counter.token_limit, 128000)

def test_token_counter_o1(self):
model = "o1-preview"
counter = OpenAITokenCounter.from_model(model)
self.assertEqual(counter.token_limit, 128000)

def test_token_counter_exception(self):
model = "gpt-4"
counter = OpenAITokenCounter.from_model(model)
Expand All @@ -33,7 +39,7 @@ def test_token_counter_exception(self):
)

def test_filter_first_messages(self):
model = "gpt-3.5-turbo"
model = "gpt-3.5-turbo-instruct"
counter = OpenAITokenCounter.from_model(model)
messages = self._get_messages()

Expand All @@ -45,7 +51,7 @@ def test_filter_first_messages(self):
self.assertGreaterEqual(counter.token_limit - counter.count_messages_token(filtered), 4000)

def test_filter_last_messages(self):
model = "gpt-3.5-turbo"
model = "gpt-3.5-turbo-instruct"
counter = OpenAITokenCounter.from_model(model)
messages = self._get_messages()

Expand Down

0 comments on commit 582fdfe

Please sign in to comment.