Add cross-model fallback support and error handling

- Add model-agnostic fallback system in base ChatModel - Implement error handling with retryable errors - Add model tracking in responses - Add comprehensive test suite for fallback behavior - Update Gemini model to use new error handling system
wandb · Dec 26, 2024 · 0b36103 · 0b36103
1 parent f32e8dc
commit 0b36103
Show file tree

Hide file tree

Showing 21 changed files with 1,600 additions and 167 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ repository = "https://github.com/wandb/wandbot"
 include = ["src/**/*", "LICENSE", "README.md"]
 
 [tool.poetry.dependencies]
-python = ">=3.10.0,<=3.12.4"
+python = ">=3.10.0,<=3.12.8"
 numpy = "^1.26.1"
 pandas = "^2.1.2"
 pydantic-settings = "^2.0.3"
@@ -27,6 +27,8 @@ tree-sitter-languages = "^1.7.1"
 markdownify = "^0.11.6"
 uvicorn = "^0.24.0"
 openai = "^1.3.2"
+google-generativeai = ">=0.8.3"
+anthropic = "^0.18.1"
 weave = "^0.50.12"
 colorlog = "^6.8.0"
 litellm = "^1.15.1"
@@ -53,6 +55,16 @@ ragas = "^0.1.7"
 dataclasses-json = "^0.6.4"
 llama-index = "^0.10.30"
 
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.3.4"
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    'ignore:.*Type google._upb._message.*uses PyType_Spec.*:DeprecationWarning',
+    'ignore:.*custom tp_new.*:DeprecationWarning'
+]
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"

diff --git a/src/wandbot/chat/config.py b/src/wandbot/chat/config.py
@@ -36,7 +36,7 @@ class ChatConfig(BaseSettings):
     english_reranker_model: str = "rerank-english-v2.0"
     multilingual_reranker_model: str = "rerank-multilingual-v2.0"
     # Response synthesis settings
-    response_synthesizer_model: str = "gpt-4-0125-preview"
+    response_synthesizer_model: str = "openai/gpt-4-0125-preview"  # Format: provider/model_name
     response_synthesizer_temperature: float = 0.1
-    response_synthesizer_fallback_model: str = "gpt-4-0125-preview"
+    response_synthesizer_fallback_model: str = "openai/gpt-4-0125-preview"  # Format: provider/model_name
     response_synthesizer_fallback_temperature: float = 0.1
diff --git a/src/wandbot/chat/models/__init__.py b/src/wandbot/chat/models/__init__.py
@@ -0,0 +1,6 @@
+from .base import ChatModel
+from .openai_model import OpenAIChatModel
+from .gemini_model import GeminiChatModel
+from .anthropic_model import AnthropicChatModel
+
+__all__ = ["ChatModel", "OpenAIChatModel", "GeminiChatModel", "AnthropicChatModel"]
diff --git a/src/wandbot/chat/models/anthropic_model.py b/src/wandbot/chat/models/anthropic_model.py
@@ -0,0 +1,44 @@
+from typing import List, Dict, Any
+
+from anthropic import Anthropic
+
+from .base import ChatModel
+
+class AnthropicChatModel(ChatModel):
+    def __init__(self, model_name: str = "claude-3-opus-20240229", temperature: float = 0.1):
+        super().__init__(model_name, temperature)
+        self.client = Anthropic()
+
+    def generate_response(
+        self,
+        messages: List[Dict[str, str]],
+        max_tokens: int = 1000,
+    ) -> Dict[str, Any]:
+        # Convert messages to Anthropic format
+        anthropic_messages = []
+        for msg in messages:
+            role = msg["role"]
+            if role == "system":
+                anthropic_messages.append({"role": "assistant", "content": msg["content"]})
+            elif role == "user":
+                anthropic_messages.append({"role": "user", "content": msg["content"]})
+            elif role == "assistant":
+                anthropic_messages.append({"role": "assistant", "content": msg["content"]})
+
+        response = self.client.messages.create(
+            model=self.model_name,
+            messages=anthropic_messages,
+            temperature=self.temperature,
+            max_tokens=max_tokens,
+        )
+
+        return {
+            "content": response.content[0].text,
+            "total_tokens": response.usage.input_tokens + response.usage.output_tokens,
+            "prompt_tokens": response.usage.input_tokens,
+            "completion_tokens": response.usage.output_tokens,
+        }
+
+    @property
+    def system_role_key(self) -> str:
+        return "system"  # Will be converted to assistant role in generate_response