Merge branch 'main' into feature-initial-response-parsers

chain-ml · Sep 19, 2024 · 582fdfe · 582fdfe
2 parents 02b1761 + 1550bde
commit 582fdfe
Show file tree

Hide file tree

Showing 5 changed files with 109 additions and 69 deletions.
diff --git a/council/llm/llm_base.py b/council/llm/llm_base.py
@@ -1,5 +1,5 @@
 import abc
-from typing import Any, Final, Generic, Optional, Sequence, TypeVar
+from typing import Any, Dict, Final, Generic, Optional, Sequence, TypeVar
 
 from council.contexts import Consumption, LLMContext, Monitorable
 
@@ -23,9 +23,15 @@ def default_timeout(self) -> int:
 
 
 class LLMResult:
-    def __init__(self, choices: Sequence[str], consumptions: Optional[Sequence[Consumption]] = None) -> None:
+    def __init__(
+        self,
+        choices: Sequence[str],
+        consumptions: Optional[Sequence[Consumption]] = None,
+        raw_response: Optional[Dict[str, Any]] = None,
+    ) -> None:
         self._choices = list(choices)
         self._consumptions = list(consumptions) if consumptions is not None else []
+        self._raw_response = raw_response if raw_response is not None else {}
 
     @property
     def first_choice(self) -> str:
@@ -39,6 +45,10 @@ def choices(self) -> Sequence[str]:
     def consumptions(self) -> Sequence[Consumption]:
         return self._consumptions
 
+    @property
+    def raw_response(self) -> Dict[str, Any]:
+        return self._raw_response
+
 
 class LLMBase(Generic[T_Configuration], Monitorable, abc.ABC):
     """

diff --git a/council/llm/openai_chat_completions_llm.py b/council/llm/openai_chat_completions_llm.py
@@ -82,14 +82,25 @@ def from_dict(obj: Any) -> Usage:
 
 class OpenAIChatCompletionsResult:
 
-    def __init__(self, id: str, object: str, created: int, model: str, choices: List[Choice], usage: Usage) -> None:
+    def __init__(
+        self,
+        id: str,
+        object: str,
+        created: int,
+        model: str,
+        choices: List[Choice],
+        usage: Usage,
+        raw_response: Dict[str, Any],
+    ) -> None:
         self._id = id
         self._object = object
         self._usage = usage
         self._model = model
         self._choices = choices
         self._created = created
 
+        self._raw_response = raw_response
+
     @property
     def id(self) -> str:
         return self._id
@@ -106,6 +117,10 @@ def usage(self) -> Usage:
     def choices(self) -> Sequence[Choice]:
         return self._choices
 
+    @property
+    def raw_response(self) -> Dict[str, Any]:
+        return self._raw_response
+
     def to_consumptions(self) -> Sequence[Consumption]:
         return [
             Consumption(1, "call", f"{self.model}"),
@@ -115,14 +130,14 @@ def to_consumptions(self) -> Sequence[Consumption]:
         ]
 
     @staticmethod
-    def from_dict(obj: Any) -> OpenAIChatCompletionsResult:
-        _id = str(obj.get("id"))
-        _object = str(obj.get("object"))
-        _created = int(obj.get("created"))
-        _model = str(obj.get("model"))
-        _choices = [Choice.from_dict(y) for y in obj.get("choices")]
-        _usage = Usage.from_dict(obj.get("usage"))
-        return OpenAIChatCompletionsResult(_id, _object, _created, _model, _choices, _usage)
+    def from_response(response: Dict[str, Any]) -> OpenAIChatCompletionsResult:
+        _id = str(response.get("id"))
+        _object = str(response.get("object"))
+        _created = int(response.get("created", -1))
+        _model = str(response.get("model"))
+        _choices = [Choice.from_dict(y) for y in response.get("choices", [])]
+        _usage = Usage.from_dict(response.get("usage"))
+        return OpenAIChatCompletionsResult(_id, _object, _created, _model, _choices, _usage, response)
 
 
 class OpenAIChatCompletionsModel(LLMBase[ChatGPTConfigurationBase]):
@@ -153,14 +168,18 @@ def _post_chat_request(self, context: LLMContext, messages: Sequence[LLMMessage]
         context.logger.debug(
             f'message="Got chat GPT completions result from {self._name}" id="{r.id}" model="{r.model}" {r.usage}'
         )
-        return LLMResult(choices=[c.message.content for c in r.choices], consumptions=r.to_consumptions())
+        return LLMResult(
+            choices=[c.message.content for c in r.choices],
+            consumptions=r.to_consumptions(),
+            raw_response=r.raw_response,
+        )
 
     def _post_request(self, payload) -> OpenAIChatCompletionsResult:
         response = self._provider.__call__(payload)
         if response.status_code != httpx.codes.OK:
             raise LLMCallException(response.status_code, response.text, self._name)
 
-        return OpenAIChatCompletionsResult.from_dict(response.json())
+        return OpenAIChatCompletionsResult.from_response(response.json())
 
     def _build_payload(self, messages: Sequence[LLMMessage]):
         payload = self._configuration.build_default_payload()

diff --git a/council/llm/openai_chat_gpt_configuration.py b/council/llm/openai_chat_gpt_configuration.py
@@ -38,7 +38,7 @@ def __init__(self, api_key: str, api_host: str, model: str, timeout: Optional[in
         """
         super().__init__()
         self._model = Parameter.string(
-            name="model", required=True, value=model, validator=prefix_any_validator(["gpt-", "ft:gpt-"])
+            name="model", required=True, value=model, validator=prefix_any_validator(["gpt-", "ft:gpt-", "o1-"])
         )
         self._timeout = Parameter.int(
             name="timeout", required=False, default=timeout or self.default_timeout, validator=greater_than_validator(0)

diff --git a/council/llm/openai_token_counter.py b/council/llm/openai_token_counter.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from typing import List, Optional, Sequence
+from typing import List, Mapping, Optional, Sequence
 
 import tiktoken
 from tiktoken import Encoding
@@ -11,13 +11,62 @@
 logger = logging.getLogger(__name__)
 
 
+class TokenInfo:
+    def __init__(self, *, tokens_limit: int, tokens_per_message: int, tokens_per_name: int) -> None:
+        self.tokens_limit = tokens_limit
+        self.tokens_per_message = tokens_per_message
+        self.tokens_per_name = tokens_per_name
+
+    @classmethod
+    def for_model(cls, model: str) -> Optional[TokenInfo]:
+        if model.startswith("gpt-3.5-turbo"):
+            return cls._for_gpt_35_family(model)
+        elif model.startswith("gpt-4"):
+            return cls._for_gpt_4_family(model)
+        elif model.startswith("gpt-4o"):
+            return cls._for_gpt_4o_family(model)
+        elif model.startswith("o1"):
+            return cls._for_o1_family(model)
+
+        return None
+
+    @staticmethod
+    def _for_gpt_35_family(model: str) -> TokenInfo:
+        tokens_limit = 4_096 if model == "gpt-3.5-turbo-instruct" else 16_385
+        return TokenInfo(tokens_limit=tokens_limit, tokens_per_message=3, tokens_per_name=1)
+
+    @staticmethod
+    def _for_gpt_4_family(model: str) -> TokenInfo:
+        tokens_limit = 8_192 if model in ["gpt-4-0613", "gpt-4-0314"] else 128_000
+        return TokenInfo(tokens_limit=tokens_limit, tokens_per_message=3, tokens_per_name=1)
+
+    @staticmethod
+    def _for_gpt_4o_family(model: str) -> TokenInfo:
+        return TokenInfo(tokens_limit=128_000, tokens_per_message=3, tokens_per_name=1)
+
+    @staticmethod
+    def _for_o1_family(model: str) -> TokenInfo:
+        return TokenInfo(tokens_limit=128_000, tokens_per_message=3, tokens_per_name=1)
+
+
 class OpenAITokenCounter(LLMessageTokenCounterBase):
     """
     See https://github.com/openai/openai-python/blob/main/chatml.md for information on
         how messages are converted to tokens.
         https://platform.openai.com/docs/models/overview for tokens
     """
 
+    LATEST_ALIASES: Mapping[str, str] = {
+        "gpt-3.5-turbo": "gpt-3.5-turbo-0125",
+        "gpt-4-turbo": "gpt-4-turbo-2024-04-09",
+        "gpt-4-turbo-preview": "gpt-4-0125-preview",
+        "gpt-4": "gpt-4-0613",
+        "gpt-4o": "gpt-4o-2024-05-13",
+        "gpt-4o-mini": "gpt-4o-mini-2024-07-18",
+        "o1-preview": "o1-preview-2024-09-12",
+        "o1-mini": "o1-mini-2024-09-12",
+    }
+
     def __init__(
         self, encoding: Encoding, model: str, limit: int = -1, tokens_per_message: int = 0, tokens_per_name: int = 0
     ) -> None:
@@ -115,63 +164,19 @@ def from_model(model: str) -> Optional[OpenAITokenCounter]:
             logger.warning(f"model {model} not found. Using cl100k_base encoding.")
             encoding = tiktoken.get_encoding("cl100k_base")
 
-        if model in {
-            "gpt-3.5-turbo-0301",
-            "gpt-3.5-turbo-0613",
-            "gpt-3.5-turbo-1106",
-            "gpt-3.5-turbo-16k-0613",
-        }:
-            tokens_limit = 16384 if ("-16k-" in model) or ("-1106" in model) else 4096
-            tokens_per_message = 3
-            tokens_per_name = 1
-        elif model in {
-            "gpt-4-0314",
-            "gpt-4-0613",
-            "gpt-4-32k-0314",
-            "gpt-4-32k-0613",
-        }:
-            tokens_limit = 32768 if "-32k-" in model else 8192
-            tokens_per_message = 3
-            tokens_per_name = 1
-        elif model in {
-            "gpt-4o-2024-05-13",
-            "gpt-4-1106-preview",
-            "gpt-4-0125-preview",
-            "gpt-4-turbo-2024-04-09",
-            "gpt-4-1106-vision-preview",
-        }:
-            tokens_limit = 128000
-            tokens_per_message = 3
-            tokens_per_name = 1
-        elif model == "gpt-3.5-turbo-0301":
-            tokens_limit = 4096
-            tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
-            tokens_per_name = -1  # if there's a name, the role is omitted
-        elif model == "gpt-3.5-turbo":
-            return OpenAITokenCounter._return_alias(model, "gpt-3.5-turbo-0613")
-        elif model == "gpt-3.5-turbo-16k":
-            return OpenAITokenCounter._return_alias(model, "gpt-3.5-turbo-16k-0613")
-        elif model == "gpt-4o":
-            return OpenAITokenCounter._return_alias(model, "gpt-4o-2024-05-13")
-        elif model == "gpt-4":
-            return OpenAITokenCounter._return_alias(model, "gpt-4-0613")
-        elif model == "gpt-4-turbo":
-            return OpenAITokenCounter._return_alias(model, "gpt-4-turbo-2024-04-09")
-        elif model == "gpt-4-32k":
-            return OpenAITokenCounter._return_alias(model, "gpt-4-32k-0613")
-        elif model == "gpt-4-turbo-preview":
-            return OpenAITokenCounter._return_alias(model, "gpt-4-1106-vision-preview")
-        elif model == "gpt-4-vision-preview":
-            return OpenAITokenCounter._return_alias(model, "gpt-4-0125-preview")
-        else:
+        if model in OpenAITokenCounter.LATEST_ALIASES:
+            return OpenAITokenCounter._return_alias(model, OpenAITokenCounter.LATEST_ALIASES[model])
+
+        info = TokenInfo.for_model(model)
+        if info is None:
             return None
 
         return OpenAITokenCounter(
             encoding,
             model=model,
-            limit=tokens_limit,
-            tokens_per_message=tokens_per_message,
-            tokens_per_name=tokens_per_name,
+            limit=info.tokens_limit,
+            tokens_per_message=info.tokens_per_message,
+            tokens_per_name=info.tokens_per_name,
         )
 
     @staticmethod

diff --git a/tests/unit/llm/test_openai_token_counter.py b/tests/unit/llm/test_openai_token_counter.py
@@ -9,6 +9,7 @@ def test_token_counter_gpt_35(self):
         counter = OpenAITokenCounter.from_model(model)
         messages = self._get_messages()
 
+        self.assertEqual(counter.token_limit, 16385)
         self.assertEqual(129, counter.count_messages_token(messages))
 
     def test_token_counter_gpt_4_turbo(self):
@@ -21,6 +22,11 @@ def test_token_counter_gpt_4o(self):
         counter = OpenAITokenCounter.from_model(model)
         self.assertEqual(counter.token_limit, 128000)
 
+    def test_token_counter_o1(self):
+        model = "o1-preview"
+        counter = OpenAITokenCounter.from_model(model)
+        self.assertEqual(counter.token_limit, 128000)
+
     def test_token_counter_exception(self):
         model = "gpt-4"
         counter = OpenAITokenCounter.from_model(model)
@@ -33,7 +39,7 @@ def test_token_counter_exception(self):
         )
 
     def test_filter_first_messages(self):
-        model = "gpt-3.5-turbo"
+        model = "gpt-3.5-turbo-instruct"
         counter = OpenAITokenCounter.from_model(model)
         messages = self._get_messages()
 
@@ -45,7 +51,7 @@ def test_filter_first_messages(self):
         self.assertGreaterEqual(counter.token_limit - counter.count_messages_token(filtered), 4000)
 
     def test_filter_last_messages(self):
-        model = "gpt-3.5-turbo"
+        model = "gpt-3.5-turbo-instruct"
         counter = OpenAITokenCounter.from_model(model)
         messages = self._get_messages()