Refactor

Signed-off-by: Jael Gu <[email protected]>
DrJay7 · Jul 13, 2023 · 6aa7a09 · 6aa7a09
1 parent 8ec6749
commit 6aa7a09
Show file tree

Hide file tree

Showing 54 changed files with 46 additions and 47 deletions.
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -20,6 +20,6 @@ jobs:
     - name: Python pylint
       run: |
         pip install pylint==2.10.2
-        pylint --rcfile=.pylintrc --output-format=colorized towhee_src
-        pylint --rcfile=.pylintrc --output-format=colorized langchain_src
+        pylint --rcfile=.pylintrc --output-format=colorized src_towhee
+        pylint --rcfile=.pylintrc --output-format=colorized src_langchain
         pylint --rcfile=.pylintrc --output-format=colorized offline_tools
diff --git a/README.md b/README.md
@@ -58,34 +58,34 @@ It also supports different integrations of LLM service and databases:
 
 The option using Towhee simplifies the process of building a system by providing [pre-defined pipelines](https://towhee.io/tasks/pipeline). These built-in pipelines require less coding and make system building much easier. If you require customization, you can either simply modify configuration or create your own pipeline with rich options of [Towhee Operators](https://towhee.io/tasks/operator).
 
-- [Pipelines](./towhee_src/pipelines)
+- [Pipelines](./src_towhee/pipelines)
     - **Insert:**
         The insert pipeline builds a knowledge base by saving documents and corresponding data in database(s).
     - **Search:**
         The search pipeline enables the question-answering capability powered by information retrieval (semantic search and optional keyword match) and LLM service.
     - **Prompt:** a prompt operator prepares messages for LLM by assembling system message, chat history, and the user's query processed by template.
 
-- [Memory](./towhee_src/memory):
-    The memory storage stores chat history to support context in conversation. (available: [most SQL](./towhee_src/memory/sql.py))
+- [Memory](./src_towhee/memory):
+    The memory storage stores chat history to support context in conversation. (available: [most SQL](./src_towhee/memory/sql.py))
 
 
 ### Option 2: LangChain
 
 The option using LangChain employs the use of [Agent](https://python.langchain.com/docs/modules/agents) in order to enable LLM to utilize specific tools, resulting in a greater demand for LLM's ability to comprehend tasks and make informed decisions.
 
-- [Agent](./langchain_src/agent)
+- [Agent](./src_langchain/agent)
     - **ChatAgent:** agent ensembles all modules together to build up qa system.
     - Other agents (todo)
-- [LLM](./langchain_src/llm)
+- [LLM](./src_langchain/llm)
     - **ChatLLM:** large language model or service to generate answers.
-- [Embedding](./langchain_src/embedding/)
+- [Embedding](./src_langchain/embedding/)
     - **TextEncoder:** encoder converts each text input to a vector.
     - Other encoders (todo)
-- [Store](./langchain_src/store)
+- [Store](./src_langchain/store)
     - **VectorStore:** vector database stores document chunks in embeddings, and performs document retrieval via semantic search.
-    - **ScalarStore:** optional, database stores metadata for each document chunk, which supports additional information retrieval. (available: [Elastic](langchain_src/store/scalar_store/es.py))
+    - **ScalarStore:** optional, database stores metadata for each document chunk, which supports additional information retrieval. (available: [Elastic](src_langchain/store/scalar_store/es.py))
     - **MemoryStore:** memory storage stores chat history to support context in conversation.
-- [DataLoader](./langchain_src/data_loader/)
+- [DataLoader](./src_langchain/data_loader/)
     - **DataParser:** tool loads data from given source and then splits documents into processed doc chunks.
 
 ## Deployment
@@ -187,7 +187,7 @@ The option using LangChain employs the use of [Agent](https://python.langchain.c
 
 ## Load data
 
-The `insert` function in [operations](./langchain_src/operations.py) loads project data from url(s) or file(s).
+The `insert` function in [operations](./src_langchain/operations.py) loads project data from url(s) or file(s).
 
 There are 2 options to load project data:
 

diff --git a/gradio_demo.py b/gradio_demo.py
@@ -1,4 +1,3 @@
-import os
 import uuid
 import argparse
 import gradio as gr
@@ -17,9 +16,9 @@
     'The service should start with either "--langchain" or "--towhee".'
 
 if USE_LANGCHAIN:
-    from langchain_src.operations import chat, insert, check, drop, get_history, clear_history
+    from src_langchain.operations import chat, insert, check, drop, get_history, clear_history
 if USE_TOWHEE:
-    from towhee_src.operations import chat, insert, check, drop, get_history, clear_history
+    from src_towhee.operations import chat, insert, check, drop, get_history, clear_history
 
 
 def create_session_id():

diff --git a/main.py b/main.py
@@ -19,9 +19,9 @@
     'The service should start with either "--langchain" or "--towhee".'
 
 if USE_LANGCHAIN:
-    from langchain_src.operations import chat, insert, drop
+    from src_langchain.operations import chat, insert, drop
 if USE_TOWHEE:
-    from towhee_src.operations import chat, insert, drop
+    from src_towhee.operations import chat, insert, drop
 
 app = FastAPI()
 origins = ['*']

diff --git a/offline_tools/insert.py b/offline_tools/insert.py
@@ -10,7 +10,7 @@
 from offline_tools.utils.load_npy import langchain_load
 from offline_tools.utils.stackoverflow_json2csv import stackoverflow_json2csv
 from offline_tools.generator_questions import get_output_csv
-from langchain_src.embedding import TextEncoder
+from src_langchain.embedding import TextEncoder
 
 
 def split_df_by_row(df, n):

diff --git a/offline_tools/utils/load_npy.py b/offline_tools/utils/load_npy.py
@@ -5,7 +5,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 
-from langchain_src.store import DocStore
+from src_langchain.store import DocStore
 
 
 class DBReader(object):

diff --git a/langchain_src/__init__.py → src_langchain/__init__.py b/langchain_src/__init__.py → src_langchain/__init__.py
diff --git a/langchain_src/agent/README.md → src_langchain/agent/README.md b/langchain_src/agent/README.md → src_langchain/agent/README.md
diff --git a/langchain_src/agent/__init__.py → src_langchain/agent/__init__.py b/langchain_src/agent/__init__.py → src_langchain/agent/__init__.py
diff --git a/langchain_src/agent/chat_agent.py → src_langchain/agent/chat_agent.py b/langchain_src/agent/chat_agent.py → src_langchain/agent/chat_agent.py
diff --git a/langchain_src/agent/output_parser.py → src_langchain/agent/output_parser.py b/langchain_src/agent/output_parser.py → src_langchain/agent/output_parser.py
diff --git a/langchain_src/agent/prompt.py → src_langchain/agent/prompt.py b/langchain_src/agent/prompt.py → src_langchain/agent/prompt.py
diff --git a/langchain_src/data_loader/README.md → src_langchain/data_loader/README.md b/langchain_src/data_loader/README.md → src_langchain/data_loader/README.md
diff --git a/langchain_src/data_loader/__init__.py → src_langchain/data_loader/__init__.py b/langchain_src/data_loader/__init__.py → src_langchain/data_loader/__init__.py
diff --git a/langchain_src/data_loader/data_parser.py → src_langchain/data_loader/data_parser.py b/langchain_src/data_loader/data_parser.py → src_langchain/data_loader/data_parser.py
diff --git a/langchain_src/data_loader/data_splitter.py → src_langchain/data_loader/data_splitter.py b/langchain_src/data_loader/data_splitter.py → src_langchain/data_loader/data_splitter.py
diff --git a/langchain_src/embedding/README.md → src_langchain/embedding/README.md b/langchain_src/embedding/README.md → src_langchain/embedding/README.md
diff --git a/langchain_src/embedding/__init__.py → src_langchain/embedding/__init__.py b/langchain_src/embedding/__init__.py → src_langchain/embedding/__init__.py
diff --git a/...in_src/embedding/langchain_huggingface.py → ...gchain/embedding/langchain_huggingface.py b/...in_src/embedding/langchain_huggingface.py → ...gchain/embedding/langchain_huggingface.py
diff --git a/langchain_src/embedding/openai_embedding.py → src_langchain/embedding/openai_embedding.py b/langchain_src/embedding/openai_embedding.py → src_langchain/embedding/openai_embedding.py
diff --git a/langchain_src/llm/README.md → src_langchain/llm/README.md b/langchain_src/llm/README.md → src_langchain/llm/README.md
diff --git a/langchain_src/llm/__init__.py → src_langchain/llm/__init__.py b/langchain_src/llm/__init__.py → src_langchain/llm/__init__.py
diff --git a/langchain_src/llm/dolly_chat.py → src_langchain/llm/dolly_chat.py b/langchain_src/llm/dolly_chat.py → src_langchain/llm/dolly_chat.py
diff --git a/langchain_src/llm/ernie.py → src_langchain/llm/ernie.py b/langchain_src/llm/ernie.py → src_langchain/llm/ernie.py
diff --git a/langchain_src/llm/minimax_chat.py → src_langchain/llm/minimax_chat.py b/langchain_src/llm/minimax_chat.py → src_langchain/llm/minimax_chat.py
diff --git a/langchain_src/llm/openai_chat.py → src_langchain/llm/openai_chat.py b/langchain_src/llm/openai_chat.py → src_langchain/llm/openai_chat.py
diff --git a/langchain_src/operations.py → src_langchain/operations.py b/langchain_src/operations.py → src_langchain/operations.py
diff --git a/langchain_src/store/README.md → src_langchain/store/README.md b/langchain_src/store/README.md → src_langchain/store/README.md
diff --git a/langchain_src/store/__init__.py → src_langchain/store/__init__.py b/langchain_src/store/__init__.py → src_langchain/store/__init__.py
diff --git a/langchain_src/store/memory_store/__init__.py → src_langchain/store/memory_store/__init__.py b/langchain_src/store/memory_store/__init__.py → src_langchain/store/memory_store/__init__.py
diff --git a/langchain_src/store/memory_store/pg.py → src_langchain/store/memory_store/pg.py b/langchain_src/store/memory_store/pg.py → src_langchain/store/memory_store/pg.py
diff --git a/langchain_src/store/scalar_store/__init__.py → src_langchain/store/scalar_store/__init__.py b/langchain_src/store/scalar_store/__init__.py → src_langchain/store/scalar_store/__init__.py
diff --git a/langchain_src/store/scalar_store/es.py → src_langchain/store/scalar_store/es.py b/langchain_src/store/scalar_store/es.py → src_langchain/store/scalar_store/es.py
diff --git a/langchain_src/store/vector_store/__init__.py → src_langchain/store/vector_store/__init__.py b/langchain_src/store/vector_store/__init__.py → src_langchain/store/vector_store/__init__.py
diff --git a/langchain_src/store/vector_store/milvus.py → src_langchain/store/vector_store/milvus.py b/langchain_src/store/vector_store/milvus.py → src_langchain/store/vector_store/milvus.py
diff --git a/towhee_src/base.py → src_towhee/base.py b/towhee_src/base.py → src_towhee/base.py
diff --git a/towhee_src/memory/README.md → src_towhee/memory/README.md b/towhee_src/memory/README.md → src_towhee/memory/README.md
diff --git a/towhee_src/memory/__init__.py → src_towhee/memory/__init__.py b/towhee_src/memory/__init__.py → src_towhee/memory/__init__.py
diff --git a/towhee_src/memory/sql.py → src_towhee/memory/sql.py b/towhee_src/memory/sql.py → src_towhee/memory/sql.py
@@ -9,7 +9,7 @@
 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
 
 from config import MEMORYDB_CONFIG
-from towhee_src.base import BaseMemory
+from src_towhee.base import BaseMemory
 
 
 class MemoryStore(BaseMemory):

diff --git a/towhee_src/operations.py → src_towhee/operations.py b/towhee_src/operations.py → src_towhee/operations.py
@@ -4,8 +4,8 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 
-from towhee_src.memory import MemoryStore
-from towhee_src.pipelines import TowheePipelines
+from src_towhee.memory import MemoryStore
+from src_towhee.pipelines import TowheePipelines
 
 
 logger = logging.getLogger(__name__)

diff --git a/towhee_src/pipelines/README.md → src_towhee/pipelines/README.md b/towhee_src/pipelines/README.md → src_towhee/pipelines/README.md
diff --git a/towhee_src/pipelines/__init__.py → src_towhee/pipelines/__init__.py b/towhee_src/pipelines/__init__.py → src_towhee/pipelines/__init__.py
@@ -7,8 +7,8 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
 
-from towhee_src.base import BasePipelines
-from towhee_src.pipelines.prompts import PROMPT_OP
+from src_towhee.base import BasePipelines
+from src_towhee.pipelines.prompts import PROMPT_OP
 from config import (
     USE_SCALAR, LLM_OPTION,
     TEXTENCODER_CONFIG, CHAT_CONFIG,

diff --git a/towhee_src/pipelines/prompts.py → src_towhee/pipelines/prompts.py b/towhee_src/pipelines/prompts.py → src_towhee/pipelines/prompts.py
diff --git a/tests/unit_tests/langchain_src/agent/test_agent.py b/tests/unit_tests/langchain_src/agent/test_agent.py
@@ -7,7 +7,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
 
-from langchain_src.agent import ChatAgent
+from src_langchain.agent import ChatAgent
 
 
 class TestChatAgent(unittest.TestCase):

diff --git a/tests/unit_tests/langchain_src/agent/test_output_parser.py b/tests/unit_tests/langchain_src/agent/test_output_parser.py
@@ -6,8 +6,8 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
 
-from langchain_src.agent.prompt import FORMAT_INSTRUCTIONS
-from langchain_src.agent.output_parser import OutputParser
+from src_langchain.agent.prompt import FORMAT_INSTRUCTIONS
+from src_langchain.agent.output_parser import OutputParser
 
 
 class TestOutputParser(unittest.TestCase):

diff --git a/tests/unit_tests/langchain_src/data_loader/test_data_parser.py b/tests/unit_tests/langchain_src/data_loader/test_data_parser.py
@@ -10,7 +10,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../../..'))
 
-from langchain_src.data_loader import DataParser
+from src_langchain.data_loader import DataParser
 
 
 class TestDataParser(unittest.TestCase):

diff --git a/tests/unit_tests/langchain_src/data_loader/test_data_splitter.py b/tests/unit_tests/langchain_src/data_loader/test_data_splitter.py
@@ -4,7 +4,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../../..'))
 
-from langchain_src.data_loader.data_splitter import MarkDownSplitter
+from src_langchain.data_loader.data_splitter import MarkDownSplitter
 
 
 class TestMarkDownSplitter(unittest.TestCase):

diff --git a/tests/unit_tests/langchain_src/llm/test_dolly_chat.py b/tests/unit_tests/langchain_src/llm/test_dolly_chat.py
@@ -18,7 +18,7 @@ def __call__(self, prompt):
 
         with patch('transformers.pipeline') as mock_pipelines:
             mock_pipelines.return_value = MockGenerateText()
-            from langchain_src.llm.dolly_chat import ChatLLM
+            from src_langchain.llm.dolly_chat import ChatLLM
 
             chat_llm = ChatLLM(model_name='mock', device='cpu', )
             messages = [HumanMessage(content='hello')]

diff --git a/tests/unit_tests/langchain_src/llm/test_ernie.py b/tests/unit_tests/langchain_src/llm/test_ernie.py
@@ -18,7 +18,7 @@ def test_generate(self):
             mock_res2._content = b'{ "result" : "mock answer", "usage" : 2 }'
             mock_post.return_value = mock_res1
             mock_request.return_value = mock_res2
-            from langchain_src.llm.ernie import ChatLLM
+            from src_langchain.llm.ernie import ChatLLM
 
             chat_llm = ChatLLM(api_key='mock-key', secret_key='mock-key')
             messages = [HumanMessage(content='hello')]

diff --git a/tests/unit_tests/langchain_src/llm/test_openai_chat.py b/tests/unit_tests/langchain_src/llm/test_openai_chat.py
@@ -7,7 +7,7 @@
 
 class TestOpenAIChat(unittest.TestCase):
     def test_init(self):
-        from langchain_src.llm.openai_chat import ChatLLM
+        from src_langchain.llm.openai_chat import ChatLLM
         chat_llm = ChatLLM(openai_api_key='mock-key')
         self.assertEqual(chat_llm.__class__.__name__, 'ChatLLM')
 

diff --git a/tests/unit_tests/towhee_src/memory/test_sql.py b/tests/unit_tests/towhee_src/memory/test_sql.py
@@ -4,8 +4,8 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
 
-from towhee_src.memory.sql import MemoryStore
-from towhee_src.base import BaseMemory
+from src_towhee.memory.sql import MemoryStore
+from src_towhee.base import BaseMemory
 
 
 class TestSql(unittest.TestCase):

diff --git a/tests/unit_tests/towhee_src/pipelines/test_pipelines.py b/tests/unit_tests/towhee_src/pipelines/test_pipelines.py
@@ -9,8 +9,8 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
 
-from towhee_src.base import BasePipelines
-from towhee_src.pipelines import TowheePipelines
+from src_towhee.base import BasePipelines
+from src_towhee.pipelines import TowheePipelines
 from config import CHAT_CONFIG, TEXTENCODER_CONFIG, VECTORDB_CONFIG
 
 

diff --git a/tests/unit_tests/towhee_src/pipelines/test_prompts.py b/tests/unit_tests/towhee_src/pipelines/test_prompts.py
@@ -5,7 +5,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
 
-from towhee_src.pipelines.prompts import PROMPT_OP, QUERY_PROMPT, SYSTEM_PROMPT
+from src_towhee.pipelines.prompts import PROMPT_OP, QUERY_PROMPT, SYSTEM_PROMPT
 
 
 class TestPrompts(unittest.TestCase):

diff --git a/tests/unit_tests/towhee_src/test_operations.py b/tests/unit_tests/towhee_src/test_operations.py
@@ -67,21 +67,21 @@ class TestOperations(unittest.TestCase):
 
     def test_chat(self):
 
-        with patch('towhee_src.pipelines.TowheePipelines') as mock_pipelines, \
-             patch('towhee_src.memory.MemoryStore') as mock_memory:
+        with patch('src_towhee.pipelines.TowheePipelines') as mock_pipelines, \
+             patch('src_towhee.memory.MemoryStore') as mock_memory:
             mock_pipelines.return_value = MockPipeline()
             mock_memory.return_value = MockStore()
 
-            from towhee_src.pipelines import TowheePipelines
-            from towhee_src.memory import MemoryStore
+            from src_towhee.pipelines import TowheePipelines
+            from src_towhee.memory import MemoryStore
 
 
             with patch.object(TowheePipelines, 'search_pipeline', mock_pipelines.search_pipeline), \
                  patch.object(MemoryStore, 'add_history', mock_memory.add_history), \
                  patch.object(MemoryStore, 'get_history', mock_memory.get_history), \
                  patch.object(MemoryStore, 'drop', mock_memory.drop):
 
-                from towhee_src.operations import chat, get_history, clear_history
+                from src_towhee.operations import chat, get_history, clear_history
 
                 answer = chat(self.session_id, self.project, self.question)
                 assert answer == self.expect_answer
@@ -96,13 +96,13 @@ def test_chat(self):
 
     def test_insert(self):
 
-        with patch('towhee_src.pipelines.TowheePipelines') as mock_pipelines, \
-             patch('towhee_src.memory.MemoryStore') as mock_memory:
+        with patch('src_towhee.pipelines.TowheePipelines') as mock_pipelines, \
+             patch('src_towhee.memory.MemoryStore') as mock_memory:
             mock_pipelines.return_value = MockPipeline()
             mock_memory.return_value = MockStore()
 
-            from towhee_src.pipelines import TowheePipelines
-            from towhee_src.memory import MemoryStore
+            from src_towhee.pipelines import TowheePipelines
+            from src_towhee.memory import MemoryStore
 
             with patch.object(TowheePipelines, 'insert_pipeline', mock_pipelines.insert_pipeline), \
                 patch.object(TowheePipelines, 'count_entities', mock_pipelines.count_entities), \
@@ -111,7 +111,7 @@ def test_insert(self):
                 patch.object(MemoryStore, 'check', mock_memory.check), \
                 patch.object(MemoryStore, 'drop', mock_memory.drop):
 
-                from towhee_src.operations import insert, check, drop
+                from src_towhee.operations import insert, check, drop
 
                 count = insert(self.test_src, self.project)
                 assert count == self.expect_len