diff --git a/api/db/init_data.py b/api/db/init_data.py index 0854602a0ba..74244959536 100644 --- a/api/db/init_data.py +++ b/api/db/init_data.py @@ -162,6 +162,11 @@ def init_superuser(): "logo": "", "tags": "LLM,TEXT EMBEDDING", "status": "1", +},{ + "name": "Azure-OpenAI", + "logo": "", + "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION", + "status": "1", } # { # "name": "文心一言", @@ -646,6 +651,83 @@ def init_llm_factory(): "max_tokens": 8192, "model_type": LLMType.EMBEDDING }, + # ------------------------ Azure OpenAI ----------------------- + # Please ensure the llm_name is the same as the name in Azure + # OpenAI deployment name (e.g., azure-gpt-4o). And the llm_name + # must different from the OpenAI llm_name + # + # Each model must be deployed in the Azure OpenAI service, otherwise, + # you will receive an error message 'The API deployment for + # this resource does not exist' + { + "fid": factory_infos[15]["name"], + "llm_name": "azure-gpt-4o", + "tags": "LLM,CHAT,128K", + "max_tokens": 128000, + "model_type": LLMType.CHAT.value + "," + LLMType.IMAGE2TEXT.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-gpt-35-turbo", + "tags": "LLM,CHAT,4K", + "max_tokens": 4096, + "model_type": LLMType.CHAT.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-gpt-35-turbo-16k", + "tags": "LLM,CHAT,16k", + "max_tokens": 16385, + "model_type": LLMType.CHAT.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-text-embedding-ada-002", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8191, + "model_type": LLMType.EMBEDDING.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-text-embedding-3-small", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8191, + "model_type": LLMType.EMBEDDING.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-text-embedding-3-large", + "tags": "TEXT EMBEDDING,8K", + "max_tokens": 8191, + "model_type": LLMType.EMBEDDING.value + },{ + "fid": factory_infos[15]["name"], + "llm_name": "azure-whisper-1", + "tags": "SPEECH2TEXT", + "max_tokens": 25 * 1024 * 1024, + "model_type": LLMType.SPEECH2TEXT.value + }, + { + "fid": factory_infos[15]["name"], + "llm_name": "azure-gpt-4", + "tags": "LLM,CHAT,8K", + "max_tokens": 8191, + "model_type": LLMType.CHAT.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-gpt-4-turbo", + "tags": "LLM,CHAT,8K", + "max_tokens": 8191, + "model_type": LLMType.CHAT.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-gpt-4-32k", + "tags": "LLM,CHAT,32K", + "max_tokens": 32768, + "model_type": LLMType.CHAT.value + }, { + "fid": factory_infos[15]["name"], + "llm_name": "azure-gpt-4-vision-preview", + "tags": "LLM,CHAT,IMAGE2TEXT", + "max_tokens": 765, + "model_type": LLMType.IMAGE2TEXT.value + }, + ] for info in factory_infos: try: diff --git a/api/settings.py b/api/settings.py index 78f3d231aaa..7dbf2f610ba 100644 --- a/api/settings.py +++ b/api/settings.py @@ -69,6 +69,12 @@ "image2text_model": "gpt-4-vision-preview", "asr_model": "whisper-1", }, + "Azure-OpenAI": { + "chat_model": "azure-gpt-35-turbo", + "embedding_model": "azure-text-embedding-ada-002", + "image2text_model": "azure-gpt-4-vision-preview", + "asr_model": "azure-whisper-1", + }, "ZHIPU-AI": { "chat_model": "glm-3-turbo", "embedding_model": "embedding-2", diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 18adbfebdba..9127d0c979d 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -22,6 +22,7 @@ EmbeddingModel = { "Ollama": OllamaEmbed, "OpenAI": OpenAIEmbed, + "Azure-OpenAI": AzureEmbed, "Xinference": XinferenceEmbed, "Tongyi-Qianwen": QWenEmbed, "ZHIPU-AI": ZhipuEmbed, @@ -36,6 +37,7 @@ CvModel = { "OpenAI": GptV4, + "Azure-OpenAI": AzureGptV4, "Ollama": OllamaCV, "Xinference": XinferenceCV, "Tongyi-Qianwen": QWenCV, @@ -46,6 +48,7 @@ ChatModel = { "OpenAI": GptTurbo, + "Azure-OpenAI": AzureChat, "ZHIPU-AI": ZhipuChat, "Tongyi-Qianwen": QWenChat, "Ollama": OllamaChat, diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index f6c0666fe78..70e2906ac25 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from openai.lib.azure import AzureOpenAI from zhipuai import ZhipuAI from dashscope import Generation from abc import ABC @@ -94,6 +95,11 @@ def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepse if not base_url: base_url="https://api.deepseek.com/v1" super().__init__(key, model_name, base_url) +class AzureChat(Base): + def __init__(self, key, model_name, **kwargs): + self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01") + self.model_name = model_name + class BaiChuanChat(Base): def __init__(self, key, model_name="Baichuan3-Turbo", base_url="https://api.baichuan-ai.com/v1"): diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 5a6fdc6d94a..9f2d91a963f 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from openai.lib.azure import AzureOpenAI from zhipuai import ZhipuAI import io from abc import ABC @@ -87,6 +88,25 @@ def describe(self, image, max_tokens=300): ) return res.choices[0].message.content.strip(), res.usage.total_tokens +class AzureGptV4(Base): + def __init__(self, key, model_name, lang="Chinese", **kwargs): + self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01") + self.model_name = model_name + self.lang = lang + + def describe(self, image, max_tokens=300): + b64 = self.image2base64(image) + prompt = self.prompt(b64) + for i in range(len(prompt)): + for c in prompt[i]["content"]: + if "text" in c: c["type"] = "text" + + res = self.client.chat.completions.create( + model=self.model_name, + messages=prompt, + max_tokens=max_tokens, + ) + return res.choices[0].message.content.strip(), res.usage.total_tokens class QWenCV(Base): def __init__(self, key, model_name="qwen-vl-chat-v1", lang="Chinese", **kwargs): diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 596c1e3531f..eeba7f7b9ed 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -18,6 +18,7 @@ import threading import requests from huggingface_hub import snapshot_download +from openai.lib.azure import AzureOpenAI from zhipuai import ZhipuAI import os from abc import ABC @@ -110,6 +111,11 @@ def encode_queries(self, text): return np.array(res.data[0].embedding), res.usage.total_tokens +class AzureEmbed(Base): + def __init__(self, key, model_name, **kwargs): + self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01") + self.model_name = model_name + class BaiChuanEmbed(OpenAIEmbed): def __init__(self, key, model_name='Baichuan-Text-Embedding',