add knowledge bank

FredericW · Apr 29, 2024 · 3ee3266 · 3ee3266
1 parent 0895b10
commit 3ee3266
Show file tree

Hide file tree

Showing 9 changed files with 323 additions and 417 deletions.
diff --git a/examples/conversation_with_RAG_agents/configs/agent_config.json b/examples/conversation_with_RAG_agents/configs/agent_config.json
@@ -8,29 +8,10 @@
       "model_config_name": "qwen_config",
       "emb_model_config_name": "qwen_emb_config",
       "rag_config": {
-            "index_config": [
-              {
-                "load_data": {
-                  "loader": {
-                    "create_object": true,
-                    "module": "llama_index.core",
-                    "class": "SimpleDirectoryReader",
-                    "init_args": {
-                      "input_dir": "../../docs/sphinx_doc/en/source/tutorial",
-                      "required_exts": [
-                        ".md"
-                      ]
-                    }
-                  }
-                }
-              }
-            ],
-            "chunk_size": 2048,
-            "chunk_overlap": 40,
+            "knowledge_id": "agentscope_tutorial_rag",
             "similarity_top_k": 5,
             "log_retrieval": false,
-            "recent_n_mem": 1,
-            "persist_dir": "./rag_storage/tutorial_assist"
+            "recent_n_mem": 1
       }
     }
   },
@@ -43,43 +24,10 @@
       "model_config_name": "qwen_config",
       "emb_model_config_name": "qwen_emb_config",
       "rag_config": {
-            "index_config": [
-              {
-                "load_data": {
-                  "loader": {
-                    "create_object": true,
-                    "module": "llama_index.core",
-                    "class": "SimpleDirectoryReader",
-                    "init_args": {
-                      "input_dir": "../../src/agentscope",
-                      "recursive": true,
-                      "required_exts": [
-                        ".py"
-                      ]
-                    }
-                  }
-                },
-                "store_and_index": {
-                  "transformations": [
-                    {
-                      "create_object": true,
-                      "module": "llama_index.core.node_parser",
-                      "class": "CodeSplitter",
-                      "init_args": {
-                        "language": "python",
-                        "chunk_lines": 100
-                      }
-                    }
-                  ]
-                }
-              }
-            ],
-            "chunk_size": 2048,
-            "chunk_overlap": 40,
+            "knowledge_id": "agentscope_code_rag",
             "similarity_top_k": 5,
             "log_retrieval": false,
-            "recent_n_mem": 1,
-            "persist_dir": "./rag_storage/code_assist"
+            "recent_n_mem": 1
       }
     }
   },
@@ -92,32 +40,13 @@
       "model_config_name": "qwen_config",
       "emb_model_config_name": "qwen_emb_config",
       "rag_config": {
-          "index_config": [
-            {
-              "load_data": {
-                "loader": {
-                  "create_object": true,
-                  "module": "llama_index.core",
-                  "class": "SimpleDirectoryReader",
-                  "init_args": {
-                    "input_dir": "../../docs/docstring_html/",
-                    "required_exts": [
-                      ".html"
-                    ]
-                  }
-                }
-              }
-            }
-          ],
-          "chunk_size": 2048,
-          "chunk_overlap": 40,
+          "knowledge_id": "agentscope_api_rag",
           "similarity_top_k": 3,
           "log_retrieval": true,
           "recent_n_mem": 1,
-          "persist_dir": "./rag_storage/api_assist",
           "repo_base": "../../",
           "file_dir": "../../docs/docstring_html/"
-          }
+      }
     }
   },
   {
@@ -139,54 +68,7 @@
       "model_config_name": "qwen_config",
       "emb_model_config_name": "qwen_emb_config",
       "rag_config": {
-            "index_config": [
-              {
-                "load_data": {
-                  "loader": {
-                    "create_object": true,
-                    "module": "llama_index.core",
-                    "class": "SimpleDirectoryReader",
-                    "init_args": {
-                      "input_dir": "../../docs/sphinx_doc/en/source/tutorial",
-                      "required_exts": [
-                        ".md"
-                      ]
-                    }
-                  }
-                }
-              },
-              {
-                "load_data": {
-                  "loader": {
-                    "create_object": true,
-                    "module": "llama_index.core",
-                    "class": "SimpleDirectoryReader",
-                    "init_args": {
-                      "input_dir": "../../src/agentscope",
-                      "recursive": true,
-                      "required_exts": [
-                        ".py"
-                      ]
-                    }
-                  }
-                },
-                "store_and_index": {
-                  "transformations": [
-                    {
-                      "create_object": true,
-                      "module": "llama_index.core.node_parser",
-                      "class": "CodeSplitter",
-                      "init_args": {
-                        "language": "python",
-                        "chunk_lines": 100
-                      }
-                    }
-                  ]
-                }
-              }
-            ],
-            "chunk_size": 2048,
-            "chunk_overlap": 40,
+            "knowledge_id": "agentscope_global_rag",
             "similarity_top_k": 5,
             "log_retrieval": false,
             "recent_n_mem": 1,

diff --git a/examples/conversation_with_RAG_agents/configs/detailed_rag_config_example.json b/examples/conversation_with_RAG_agents/configs/detailed_rag_config_example.json
@@ -0,0 +1,114 @@
+[
+  {
+    "knowledge_id": "agentscope_code_rag",
+    "persist_dir": "./rag_storage/searching_assist",
+    "chunk_size": 2048,
+    "chunk_overlap": 40,
+    "data_processing": [
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "llama_index.core",
+            "class": "SimpleDirectoryReader",
+            "init_args": {
+              "input_dir": "../../src/agentscope",
+              "recursive": true,
+              "required_exts": [
+                ".py"
+              ]
+            }
+          }
+        },
+        "store_and_index": {
+          "transformations": [
+            {
+              "create_object": true,
+              "module": "llama_index.core.node_parser",
+              "class": "CodeSplitter",
+              "init_args": {
+                "language": "python",
+                "chunk_lines": 100
+              }
+            }
+          ]
+        }
+      }
+    ]
+  },
+  {
+    "knowledge_id": "agentscope_api_rag",
+    "persist_dir": "./rag_storage/searching_assist",
+    "chunk_size": 2048,
+    "chunk_overlap": 40,
+    "data_processing": [
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "llama_index.core",
+            "class": "SimpleDirectoryReader",
+            "init_args": {
+              "input_dir": "../../docs/docstring_html/",
+              "required_exts": [
+                ".html"
+              ]
+            }
+          }
+        }
+      }
+    ]
+  },
+  {
+    "knowledge_id": "agentscope_global_rag",
+    "persist_dir": "./rag_storage/searching_assist",
+    "chunk_size": 2048,
+    "chunk_overlap": 40,
+    "data_processing": [
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "llama_index.core",
+            "class": "SimpleDirectoryReader",
+            "init_args": {
+              "input_dir": "../../docs/sphinx_doc/en/source/tutorial",
+              "required_exts": [
+                ".md"
+              ]
+            }
+          }
+        }
+      },
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "llama_index.core",
+            "class": "SimpleDirectoryReader",
+            "init_args": {
+              "input_dir": "../../src/agentscope",
+              "recursive": true,
+              "required_exts": [
+                ".py"
+              ]
+            }
+          }
+        },
+        "store_and_index": {
+          "transformations": [
+            {
+              "create_object": true,
+              "module": "llama_index.core.node_parser",
+              "class": "CodeSplitter",
+              "init_args": {
+                "language": "python",
+                "chunk_lines": 100
+              }
+            }
+          ]
+        }
+      }
+    ]
+  }
+]
diff --git a/examples/conversation_with_RAG_agents/rag_example.py b/examples/conversation_with_RAG_agents/rag_example.py
@@ -10,6 +10,7 @@
 
 import agentscope
 from agentscope.agents import UserAgent, DialogAgent, LlamaIndexAgent
+from agentscope.rag import KnowledgeBank
 
 
 AGENT_CHOICE_PROMPT = """
@@ -59,6 +60,31 @@ def main() -> None:
             config["api_key"] = f"{os.environ.get('DASHSCOPE_API_KEY')}"
     agentscope.init(model_configs=model_configs)
 
+    # initialize knowledge bank (for RAG)
+    knowledge_bank = KnowledgeBank()
+    # a simple example of importing data to RAG
+    knowledge_bank.add_data_for_rag(
+        knowledge_id="agentscope_tutorial_rag",
+        emb_model_name="qwen_emb_config",
+        data_dirs_and_types={
+            "../../docs/sphinx_doc/en/source/tutorial": [".md"],
+        },
+        persist_dir="./rag_storage/tutorial_assist",
+    )
+    # more detailed configuration can be achieved by loading config file
+    with open(
+        "configs/detailed_rag_config_example.json",
+        "r",
+        encoding="utf-8",
+    ) as f:
+        knowledge_configs = json.load(f)
+    for config in knowledge_configs:
+        knowledge_bank.add_data_for_rag(
+            knowledge_id=config["knowledge_id"],
+            emb_model_name="qwen_emb_config",
+            index_config=config,
+        )
+
     with open("configs/agent_config.json", "r", encoding="utf-8") as f:
         agent_configs = json.load(f)
 
@@ -76,13 +102,20 @@ def main() -> None:
 
     searching_agent = LlamaIndexAgent(**agent_configs[4]["args"])
 
-    rag_agents = [
+    rag_agent_list = [
         tutorial_agent,
         code_explain_agent,
         api_agent,
         searching_agent,
     ]
-    rag_agent_names = [agent.name for agent in rag_agents]
+    rag_agent_names = [agent.name for agent in rag_agent_list]
+
+    for rag_agent in rag_agent_list:
+        rag_agent.init_rag(
+            rag_module=knowledge_bank.get_rag(
+                rag_agent.rag_config["knowledge_id"],
+            ),
+        )
 
     # define a guide agent
     rag_agent_descriptions = [
@@ -91,7 +124,7 @@ def main() -> None:
         + "\n agent description："
         + agent.description
         + "\n"
-        for agent in rag_agents
+        for agent in rag_agent_list
     ]
     agent_configs[3]["args"].pop("description")
     agent_configs[3]["args"]["sys_prompt"] = agent_configs[3]["args"][
@@ -114,14 +147,14 @@ def main() -> None:
         x.role = "user"  # to enforce dashscope requirement on roles
         if len(x["content"]) == 0 or str(x["content"]).startswith("exit"):
             break
-        speak_list = filter_agents(x.get("content", ""), rag_agents)
+        speak_list = filter_agents(x.get("content", ""), rag_agent_list)
         if len(speak_list) == 0:
             guide_response = guide_agent(x)
             # Only one agent can be called in the current version,
             # we may support multi-agent conversation later
             speak_list = filter_agents(
                 guide_response.get("content", ""),
-                rag_agents,
+                rag_agent_list,
             )
         agent_name_list = [agent.name for agent in speak_list]
         for agent_name, agent in zip(agent_name_list, speak_list):