diff --git a/README.md b/README.md index 796346f88..521fcf16d 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ - [x] **多端部署:** 有多种部署方式可选择且功能完备,目前已支持个人微信,微信公众号和企业微信应用等部署方式 - [x] **基础对话:** 私聊及群聊的消息智能回复,支持多轮会话上下文记忆,支持 GPT-3.5, GPT-4, claude, 文心一言, 讯飞星火 - [x] **语音识别:** 可识别语音消息,通过文字或语音回复,支持 azure, baidu, google, openai等多种语音模型 -- [x] **图片生成:** 支持图片生成 和 图生图(如照片修复),可选择 Dell-E, stable diffusion, replicate, midjourney模型 +- [x] **图片生成:** 支持图片生成 和 图生图(如照片修复),可选择 Dall-E, stable diffusion, replicate, midjourney模型 - [x] **丰富插件:** 支持个性化插件扩展,已实现多角色切换、文字冒险、敏感词过滤、聊天记录总结、文档总结和对话等插件 - [X] **Tool工具:** 与操作系统和互联网交互,支持最新信息搜索、数学计算、天气和资讯查询、网页总结,基于 [chatgpt-tool-hub](https://github.com/goldfishh/chatgpt-tool-hub) 实现 - [x] **知识库:** 通过上传知识库文件自定义专属机器人,可作为数字分身、领域知识库、智能客服使用,基于 [LinkAI](https://chat.link-ai.tech/console) 实现 diff --git a/bot/chatgpt/chat_gpt_session.py b/bot/chatgpt/chat_gpt_session.py index 7b8ea6e63..e7dabecfd 100644 --- a/bot/chatgpt/chat_gpt_session.py +++ b/bot/chatgpt/chat_gpt_session.py @@ -62,10 +62,10 @@ def num_tokens_from_messages(messages, model): import tiktoken - if model in ["gpt-3.5-turbo-0301", "gpt-35-turbo"]: + if model in ["gpt-3.5-turbo-0301", "gpt-35-turbo", "gpt-3.5-turbo-1106"]: return num_tokens_from_messages(messages, model="gpt-3.5-turbo") elif model in ["gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613", "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-35-turbo-16k", const.GPT4_PREVIEW, const.GPT4_VISION_PREVIEW]: + "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-35-turbo-16k", const.GPT4_TURBO_PREVIEW, const.GPT4_VISION_PREVIEW]: return num_tokens_from_messages(messages, model="gpt-4") try: diff --git a/bot/linkai/link_ai_bot.py b/bot/linkai/link_ai_bot.py index 00ad0e9cf..3788c6bd4 100644 --- a/bot/linkai/link_ai_bot.py +++ b/bot/linkai/link_ai_bot.py @@ -15,7 +15,7 @@ from config import conf, pconf -class LinkAIBot(Bot, OpenAIImage): +class LinkAIBot(Bot): # authentication failed AUTH_FAILED_CODE = 401 NO_QUOTA_CODE = 406 @@ -193,6 +193,32 @@ def reply_text(self, session: ChatGPTSession, app_code="", retry_count=0) -> dic return self.reply_text(session, app_code, retry_count + 1) + def create_img(self, query, retry_count=0, api_key=None): + try: + logger.info("[LinkImage] image_query={}".format(query)) + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {conf().get('linkai_api_key')}" + } + data = { + "prompt": query, + "n": 1, + "model": conf().get("text_to_image") or "dall-e-2", + "response_format": "url", + "img_proxy": conf().get("image_proxy") + } + url = conf().get("linkai_api_base", "https://api.link-ai.chat") + "/v1/images/generations" + res = requests.post(url, headers=headers, json=data, timeout=(5, 90)) + t2 = time.time() + image_url = res.json()["data"][0]["url"] + logger.info("[OPEN_AI] image_url={}".format(image_url)) + return True, image_url + + except Exception as e: + logger.error(format(e)) + return False, "画图出现问题,请休息一下再问我吧" + + def _fetch_knowledge_search_suffix(self, response) -> str: try: if response.get("knowledge_base"): diff --git a/bot/openai/open_ai_image.py b/bot/openai/open_ai_image.py index 89449a2b3..974bf8256 100644 --- a/bot/openai/open_ai_image.py +++ b/bot/openai/open_ai_image.py @@ -24,7 +24,8 @@ def create_img(self, query, retry_count=0, api_key=None): api_key=api_key, prompt=query, # 图片描述 n=1, # 每次生成图片的数量 - size=conf().get("image_create_size", "256x256"), # 图片大小,可选有 256x256, 512x512, 1024x1024 + model=conf().get("text_to_image") or "dall-e-2", + # size=conf().get("image_create_size", "256x256"), # 图片大小,可选有 256x256, 512x512, 1024x1024 ) image_url = response["data"][0]["url"] logger.info("[OPEN_AI] image_url={}".format(image_url)) @@ -36,7 +37,7 @@ def create_img(self, query, retry_count=0, api_key=None): logger.warn("[OPEN_AI] ImgCreate RateLimit exceed, 第{}次重试".format(retry_count + 1)) return self.create_img(query, retry_count + 1) else: - return False, "提问太快啦,请休息一下再问我吧" + return False, "画图出现问题,请休息一下再问我吧" except Exception as e: logger.exception(e) - return False, str(e) + return False, "画图出现问题,请休息一下再问我吧" diff --git a/bridge/bridge.py b/bridge/bridge.py index 4add2b8d7..fe6984f4a 100644 --- a/bridge/bridge.py +++ b/bridge/bridge.py @@ -29,6 +29,10 @@ def __init__(self): self.btype["chat"] = const.XUNFEI if conf().get("use_linkai") and conf().get("linkai_api_key"): self.btype["chat"] = const.LINKAI + if not conf().get("voice_to_text") or conf().get("voice_to_text") in ["openai"]: + self.btype["voice_to_text"] = const.LINKAI + if not conf().get("text_to_voice") or conf().get("text_to_voice") in [const.TTS_1, const.TTS_1_HD]: + self.btype["text_to_voice"] = const.LINKAI if model_type in ["claude"]: self.btype["chat"] = const.CLAUDEAI self.bots = {} diff --git a/channel/chat_channel.py b/channel/chat_channel.py index 0122d8676..648eaaccc 100644 --- a/channel/chat_channel.py +++ b/channel/chat_channel.py @@ -91,6 +91,7 @@ def _compose_context(self, ctype: ContextType, content, **kwargs): # 消息内容匹配过程,并处理content if ctype == ContextType.TEXT: if first_in and "」\n- - - - - - -" in content: # 初次匹配 过滤引用消息 + logger.debug(content) logger.debug("[WX]reference query skipped") return None diff --git a/common/const.py b/common/const.py index 92fbfcaa8..6c790f794 100644 --- a/common/const.py +++ b/common/const.py @@ -7,9 +7,12 @@ LINKAI = "linkai" CLAUDEAI = "claude" -MODEL_LIST = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "wenxin", "wenxin-4", "xunfei", "claude"] - # model GPT4 = "gpt-4" -GPT4_PREVIEW = "gpt-4-1106-preview" +GPT4_TURBO_PREVIEW = "gpt-4-1106-preview" GPT4_VISION_PREVIEW = "gpt-4-vision-preview" +WHISPER_1 = "whisper-1" +TTS_1 = "tts-1" +TTS_1_HD = "tts-1-hd" + +MODEL_LIST = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "wenxin", "wenxin-4", "xunfei", "claude", "gpt-4-turbo", GPT4_TURBO_PREVIEW] diff --git a/config-template.json b/config-template.json index 699b96b32..f18f83b43 100644 --- a/config-template.json +++ b/config-template.json @@ -1,7 +1,10 @@ { + "channel_type": "wx", "open_ai_api_key": "YOUR API KEY", "model": "gpt-3.5-turbo", - "channel_type": "wx", + "text_to_image": "dall-e-2", + "voice_to_text": "openai", + "text_to_voice": "openai", "proxy": "", "hot_reload": false, "single_chat_prefix": [ @@ -22,9 +25,10 @@ "image_create_prefix": [ "画" ], - "speech_recognition": false, + "speech_recognition": true, "group_speech_recognition": false, "voice_reply_voice": false, + "tts_voice_id": "alloy", "conversation_max_tokens": 1000, "expires_in_seconds": 3600, "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", diff --git a/config.py b/config.py index 3dd27f5ef..f421db4b0 100644 --- a/config.py +++ b/config.py @@ -34,9 +34,11 @@ "group_chat_in_one_session": ["ChatGPT测试群"], # 支持会话上下文共享的群名称 "group_welcome_msg": "", # 配置新人进群固定欢迎语,不配置则使用随机风格欢迎 "trigger_by_self": False, # 是否允许机器人触发 + "text_to_image": "dall-e-2", # 图片生成模型,可选 dall-e-2, dall-e-3 + "image_proxy": True, # 是否需要图片代理,国内访问LinkAI时需要 "image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀 "concurrency_in_session": 1, # 同一会话最多有多少条消息在处理中,大于1可能乱序 - "image_create_size": "256x256", # 图片大小,可选有 256x256, 512x512, 1024x1024 + "image_create_size": "256x256", # 图片大小,可选有 256x256, 512x512, 1024x1024 (dall-e-3默认为1024x1024) # chatgpt会话参数 "expires_in_seconds": 3600, # 无操作会话的过期时间 # 人格描述 @@ -66,12 +68,13 @@ # wework的通用配置 "wework_smart": True, # 配置wework是否使用已登录的企业微信,False为多开 # 语音设置 - "speech_recognition": False, # 是否开启语音识别 + "speech_recognition": True, # 是否开启语音识别 "group_speech_recognition": False, # 是否开启群组语音识别 "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key "always_reply_voice": False, # 是否一直使用语音回复 "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure - "text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline),azure,elevenlabs + "text_to_voice": "tts-1", # 语音合成引擎,支持tts-1,tts-1-hd,baidu,google,pytts(offline),azure,elevenlabs + "tts_voice_id": "alloy", # baidu 语音api配置, 使用百度语音识别和语音合成时需要 "baidu_app_id": "", "baidu_api_key": "", diff --git a/plugins/godcmd/godcmd.py b/plugins/godcmd/godcmd.py index 1baf5b102..e1c47c803 100644 --- a/plugins/godcmd/godcmd.py +++ b/plugins/godcmd/godcmd.py @@ -271,7 +271,7 @@ def on_handle_context(self, e_context: EventContext): if args[0] not in const.MODEL_LIST: ok, result = False, "模型名称不存在" else: - conf()["model"] = args[0] + conf()["model"] = self.model_mapping(args[0]) Bridge().reset_bot() ok, result = True, "模型设置为: " + str(conf().get("model")) elif cmd == "id": @@ -467,3 +467,9 @@ def is_admin_in_group(self, context): if context["isgroup"]: return context.kwargs.get("msg").actual_user_id in global_config["admin_users"] return False + + + def model_mapping(self, model) -> str: + if model == "gpt-4-turbo": + return const.GPT4_TURBO_PREVIEW + return model diff --git a/plugins/linkai/config.json.template b/plugins/linkai/config.json.template index ccd896774..b6c7a0425 100644 --- a/plugins/linkai/config.json.template +++ b/plugins/linkai/config.json.template @@ -14,6 +14,7 @@ "summary": { "enabled": true, "group_enabled": true, - "max_file_size": 5000 + "max_file_size": 5000, + "type": ["FILE", "SHARING", "IMAGE"] } } diff --git a/plugins/linkai/linkai.py b/plugins/linkai/linkai.py index 126bbe95e..b590d49fa 100644 --- a/plugins/linkai/linkai.py +++ b/plugins/linkai/linkai.py @@ -46,19 +46,23 @@ def on_handle_context(self, e_context: EventContext): # filter content no need solve return - if context.type == ContextType.FILE and self._is_summary_open(context): + if context.type in [ContextType.FILE, ContextType.IMAGE] and self._is_summary_open(context): # 文件处理 context.get("msg").prepare() file_path = context.content if not LinkSummary().check_file(file_path, self.sum_config): return - _send_info(e_context, "正在为你加速生成摘要,请稍后") + if context.type != ContextType.IMAGE: + _send_info(e_context, "正在为你加速生成摘要,请稍后") res = LinkSummary().summary_file(file_path) if not res: - _set_reply_text("因为神秘力量无法获取文章内容,请稍后再试吧", e_context, level=ReplyType.TEXT) + _set_reply_text("因为神秘力量无法获取内容,请稍后再试吧", e_context, level=ReplyType.TEXT) return - USER_FILE_MAP[_find_user_id(context) + "-sum_id"] = res.get("summary_id") - _set_reply_text(res.get("summary") + "\n\n💬 发送 \"开启对话\" 可以开启与文件内容的对话", e_context, level=ReplyType.TEXT) + summary_text = res.get("summary") + if context.type != ContextType.IMAGE: + USER_FILE_MAP[_find_user_id(context) + "-sum_id"] = res.get("summary_id") + summary_text += "\n\n💬 发送 \"开启对话\" 可以开启与文件内容的对话" + _set_reply_text(summary_text, e_context, level=ReplyType.TEXT) os.remove(file_path) return @@ -187,6 +191,11 @@ def _is_summary_open(self, context) -> bool: return False if context.kwargs.get("isgroup") and not self.sum_config.get("group_enabled"): return False + support_type = self.sum_config.get("type") + if not support_type: + return True + if context.type.name not in support_type: + return False return True # LinkAI 对话任务处理 diff --git a/plugins/linkai/summary.py b/plugins/linkai/summary.py index 5ab052421..c945896b0 100644 --- a/plugins/linkai/summary.py +++ b/plugins/linkai/summary.py @@ -13,7 +13,8 @@ def summary_file(self, file_path: str): "file": open(file_path, "rb"), "name": file_path.split("/")[-1], } - res = requests.post(url=self.base_url() + "/v1/summary/file", headers=self.headers(), files=file_body, timeout=(5, 300)) + url = self.base_url() + "/v1/summary/file" + res = requests.post(url, headers=self.headers(), files=file_body, timeout=(5, 300)) return self._parse_summary_res(res) def summary_url(self, url: str): @@ -71,7 +72,7 @@ def check_file(self, file_path: str, sum_config: dict) -> bool: return False suffix = file_path.split(".")[-1] - support_list = ["txt", "csv", "docx", "pdf", "md"] + support_list = ["txt", "csv", "docx", "pdf", "md", "jpg", "jpeg", "png"] if suffix not in support_list: logger.warn(f"[LinkSum] unsupported file, suffix={suffix}, support_list={support_list}") return False diff --git a/voice/factory.py b/voice/factory.py index d591a4f39..8725e29d5 100644 --- a/voice/factory.py +++ b/voice/factory.py @@ -33,4 +33,8 @@ def create_voice(voice_type): from voice.elevent.elevent_voice import ElevenLabsVoice return ElevenLabsVoice() + + elif voice_type == "linkai": + from voice.linkai.linkai_voice import LinkAIVoice + return LinkAIVoice() raise RuntimeError diff --git a/voice/linkai/linkai_voice.py b/voice/linkai/linkai_voice.py new file mode 100644 index 000000000..55f0fcbc5 --- /dev/null +++ b/voice/linkai/linkai_voice.py @@ -0,0 +1,78 @@ +""" +google voice service +""" +import json +import os +import random + +import openai +import requests + +from bridge.reply import Reply, ReplyType +from common.log import logger +from config import conf +from voice.voice import Voice +from common import const +import datetime + +class LinkAIVoice(Voice): + def __init__(self): + pass + + def voiceToText(self, voice_file): + logger.debug("[LinkVoice] voice file name={}".format(voice_file)) + try: + url = conf().get("linkai_api_base", "https://api.link-ai.chat") + "/v1/audio/transcriptions" + headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")} + model = None + if not conf().get("text_to_voice") or conf().get("voice_to_text") == "openai": + model = const.WHISPER_1 + file = open(voice_file, "rb") + file_body = { + "file": file + } + data = { + "model": model + } + res = requests.post(url, files=file_body, headers=headers, data=data, timeout=(5, 60)) + if res.status_code == 200: + text = res.json().get("text") + else: + res_json = res.json() + logger.error(f"[LinkVoice] voiceToText error, status_code={res.status_code}, msg={res_json.get('message')}") + return None + reply = Reply(ReplyType.TEXT, text) + logger.info(f"[LinkVoice] voiceToText success, text={text}, file name={voice_file}") + except Exception as e: + logger.error(e) + reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~") + return reply + + def textToVoice(self, text): + try: + url = conf().get("linkai_api_base", "https://api.link-ai.chat") + "/v1/audio/speech" + headers = {"Authorization": "Bearer " + conf().get("linkai_api_key")} + model = const.TTS_1 + if not conf().get("text_to_voice") or conf().get("text_to_voice") in [const.TTS_1, const.TTS_1_HD]: + model = conf().get("text_to_voice") or const.TTS_1 + data = { + "model": model, + "input": text, + "voice": conf().get("tts_voice_id") + } + res = requests.post(url, headers=headers, json=data, timeout=(5, 120)) + if res.status_code == 200: + tmp_file_name = "tmp/" + datetime.datetime.now().strftime('%Y%m%d%H%M%S') + str(random.randint(0, 1000)) + ".mp3" + with open(tmp_file_name, 'wb') as f: + f.write(res.content) + reply = Reply(ReplyType.VOICE, tmp_file_name) + logger.info(f"[LinkVoice] textToVoice success, input={text}, model={model}, voice_id={data.get('voice')}") + return reply + else: + res_json = res.json() + logger.error(f"[LinkVoice] textToVoice error, status_code={res.status_code}, msg={res_json.get('message')}") + return None + except Exception as e: + logger.error(e) + reply = Reply(ReplyType.ERROR, "遇到了一点小问题,请稍后再问我吧") + return reply diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py index b02d92651..d159beed2 100644 --- a/voice/openai/openai_voice.py +++ b/voice/openai/openai_voice.py @@ -24,6 +24,6 @@ def voiceToText(self, voice_file): reply = Reply(ReplyType.TEXT, text) logger.info("[Openai] voiceToText text={} voice file name={}".format(text, voice_file)) except Exception as e: - reply = Reply(ReplyType.ERROR, str(e)) + reply = Reply(ReplyType.ERROR, "我暂时还无法听清您的语音,请稍后再试吧~") finally: return reply