From 4f692455a2774abcfab901b03c3170afea156a32 Mon Sep 17 00:00:00 2001 From: SedationH Date: Sun, 1 Dec 2024 21:17:15 +0800 Subject: [PATCH] feat: implement knowledge update functionality for bots - Added a new hook `useKnowledgeUpdate` to manage knowledge updates. - Integrated knowledge update feature in `BotCard` component with loading state. - Created `updateKnowledge` API in `BotsController` to handle knowledge updates. - Updated FastAPI router to include endpoint for knowledge updates, validating bot existence. - Refactored retrieval logic to check and update knowledge based on document changes. --- .../app/factory/list/components/BotCard.tsx | 43 +++++++++++++++---- client/app/hooks/useKnowledgeUpdate.ts | 9 ++++ client/app/services/BotsController.ts | 7 +++ petercat_utils/rag_helper/git_doc_task.py | 2 +- petercat_utils/rag_helper/retrieval.py | 37 ++++++++++++++++ server/rag/router.py | 43 ++++++++++++++++++- 6 files changed, 130 insertions(+), 11 deletions(-) create mode 100644 client/app/hooks/useKnowledgeUpdate.ts diff --git a/client/app/factory/list/components/BotCard.tsx b/client/app/factory/list/components/BotCard.tsx index 36c5fb71..6a1e67a4 100644 --- a/client/app/factory/list/components/BotCard.tsx +++ b/client/app/factory/list/components/BotCard.tsx @@ -25,6 +25,7 @@ import ErrorBadgeIcon from '@/public/icons/ErrorBadgeIcon'; import CheckBadgeIcon from '@/public/icons/CheckBadgeIcon'; import LoadingIcon from '@/public/icons/LoadingIcon'; import { RagTask } from '@/app/services/BotsController'; +import { useKnowledgeUpdate } from '@/app/hooks/useKnowledgeUpdate'; declare type Bot = Tables<'bots'>; @@ -34,6 +35,8 @@ const BotCard = (props: { bot: Bot }) => { const router = useRouter(); const { deleteBot, isLoading, isSuccess } = useBotDelete(); const { data: taskInfo } = useGetBotRagTask(bot.id, true, false); + const { mutate: updateKnowledge, isPending: isUpdating } = + useKnowledgeUpdate(); useEffect(() => { if (isSuccess) { @@ -69,6 +72,19 @@ const BotCard = (props: { bot: Bot }) => { ); }; + const handleUpdateKnowledge = () => { + updateKnowledge( + { + bot_id: bot.id, + }, + { + onSuccess: () => { + // TODO + }, + }, + ); + }; + return ( <> { placement="top" content={I18N.components.BotCard.gengXinZhiShiKu} classNames={{ - base: [ - // arrow color - 'before:bg-[#3F3F46] dark:before:bg-white', - ], + base: ['before:bg-[#3F3F46] dark:before:bg-white'], content: [ 'py-2 px-4 rounded-lg shadow-xl text-white', 'bg-[#3F3F46]', ], }} > - {I18N.components.BotCard.gengXinZhiShi} + {isUpdating ? ( +
+ + + +
+ ) : ( + { + e.stopPropagation(); + handleUpdateKnowledge(); + }} + alt={I18N.components.BotCard.gengXinZhiShi} + className="z-10 cursor-pointer" + /> + )} diff --git a/client/app/hooks/useKnowledgeUpdate.ts b/client/app/hooks/useKnowledgeUpdate.ts new file mode 100644 index 00000000..be9dd5fb --- /dev/null +++ b/client/app/hooks/useKnowledgeUpdate.ts @@ -0,0 +1,9 @@ +import { useMutation } from '@tanstack/react-query'; +import { updateKnowledge } from '../services/BotsController'; + +export function useKnowledgeUpdate() { + return useMutation({ + mutationKey: ['updateKnowledge'], + mutationFn: updateKnowledge + }); +} diff --git a/client/app/services/BotsController.ts b/client/app/services/BotsController.ts index 4908337c..d0db8a37 100644 --- a/client/app/services/BotsController.ts +++ b/client/app/services/BotsController.ts @@ -135,3 +135,10 @@ export async function bindBotToRepo(repsConfigs: BindBotToRepoConfig[]) { }); return response.data; } + +// Add knowledge update API +export async function updateKnowledge(config: { + bot_id: string; +}) { + return axios.post(`${apiDomain}/api/rag/update_knowledge`, config); +} diff --git a/petercat_utils/rag_helper/git_doc_task.py b/petercat_utils/rag_helper/git_doc_task.py index 74d16e0d..b278c0da 100644 --- a/petercat_utils/rag_helper/git_doc_task.py +++ b/petercat_utils/rag_helper/git_doc_task.py @@ -142,7 +142,7 @@ def handle_tree_node(self): ) def handle_blob_node(self): - retrieval.add_knowledge_by_doc( + retrieval.check_and_update_knowledge( RAGGitDocConfig( repo_name=self.repo_name, file_path=self.path, diff --git a/petercat_utils/rag_helper/retrieval.py b/petercat_utils/rag_helper/retrieval.py index 5a3207dd..fd988ac5 100644 --- a/petercat_utils/rag_helper/retrieval.py +++ b/petercat_utils/rag_helper/retrieval.py @@ -197,3 +197,40 @@ def get_chunk_list(repo_name: str, page_size: int, page_number: int): ) total_count = len(count_response.data) return {"rows": query.data, "total": total_count} + + +def check_and_update_knowledge(config: RAGGitDocConfig): + # 初始化 GitHub loader 获取最新的文件信息 + loader = init_github_file_loader(config) + latest_sha = loader.file_sha + + # 获取当前存储的文档 + client = get_client() + existing_docs = ( + client.table(TABLE_NAME) + .select("id, file_sha") + .eq("repo_name", config.repo_name) + .eq("file_path", config.file_path) + .execute() + ) + + if not existing_docs.data: + # 如果不存在文档,直接添加 + return add_knowledge_by_doc(config) + + # 检查 SHA 是否变化 + current_sha = existing_docs.data[0]["file_sha"] + + if current_sha == latest_sha: + return False + + # SHA 不同,需要更新 + # 1. 删除旧文档 + client.table(TABLE_NAME)\ + .delete()\ + .eq("repo_name", config.repo_name)\ + .eq("file_path", config.file_path)\ + .execute() + + # 2. 添加新文档 + return add_knowledge_by_doc(config) \ No newline at end of file diff --git a/server/rag/router.py b/server/rag/router.py index d15c2c2d..08e7f380 100644 --- a/server/rag/router.py +++ b/server/rag/router.py @@ -1,6 +1,8 @@ import json -from typing import Optional +from typing import Annotated, Optional +from pydantic import BaseModel +from auth.get_user_info import get_user_id from fastapi import APIRouter, Depends from petercat_utils.db.client.supabase import get_client @@ -116,3 +118,42 @@ def get_rag_task(repo_name: str): return response except Exception as e: return json.dumps({"success": False, "message": str(e)}) + +class UpdateKnowledgeRequest(BaseModel): + bot_id: str + +@router.post("/rag/update_knowledge", dependencies=[Depends(verify_rate_limit)]) +def update_knowledge(request: UpdateKnowledgeRequest, user_id: Annotated[str | None, Depends(get_user_id)] = None): + try: + # Get config from database using bot_id + supabase = get_client() + response = ( + supabase.table("bots") + .select("*") + .eq("id", request.bot_id) + .eq("uid", user_id) + .single() + .execute() + ) + + if not response.data: + return json.dumps({ + "success": False, + "message": f"Bot with id {request.bot_id} not found" + }) + + bot_config = RAGGitDocConfig(**response.data) + result = retrieval.check_and_update_knowledge(bot_config) + + if result: + return json.dumps({ + "success": True, + "message": "Knowledge updated successfully!" + }) + else: + return json.dumps({ + "success": False, + "message": "Knowledge not updated!" + }) + except Exception as e: + return json.dumps({"success": False, "message": str(e)})