Skip to content
This repository was archived by the owner on Sep 13, 2024. It is now read-only.

Commit 129fa0c

Browse files
committed
feat: chain service
1 parent 3d800ce commit 129fa0c

13 files changed

+170
-45
lines changed

config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
PGPORT = os.getenv("PGPORT")
1515

1616
EMBED_MODEL_PATH = os.getenv("EMBED_MODEL_PATH")
17-
TEXT_GENERATION_MODEL_PATH = os.getenv("TEXT_GENERATION_MODEL_PATH")
17+
TEXT_GENERATION_MODEL = os.getenv("TEXT_GENERATION_MODEL")
1818
DOCUMENT_PATH = os.getenv("DOCUMENT_PATH")
1919
BASE_KNOWLEDGE_DOCUMENT_PATH = os.getenv("BASE_KNOWLEDGE_DOCUMENT_PATH")
2020
BASE_KNOWLEDGE_DOCUMENT_NAME = os.getenv("BASE_KNOWLEDGE_DOCUMENT_NAME")

example.env

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
PORT=5000
22
OPENAI_API_KEY=
33
HUGGINGFACEHUB_API_TOKEN=
4+
TOKENIZERS_PARALLELISM=true
45

5-
EMBED_MODEL_PATH=./src/commons/models/indo-sentence-bert-base
6-
BASE_KNOWLEDGE_DOCUMENT_PATH=./src/commons/documents/ITS-profile
6+
PGUSER=
7+
PGHOST=
8+
PGPASSWORD=
9+
PGDATABASE=
10+
PGPORT=5435
11+
12+
EMBED_MODEL_PATH=src/commons/models/indo-sentence-bert-base
13+
DOCUMENT_PATH=src/commons/documents
14+
BASE_KNOWLEDGE_DOCUMENT_PATH=src/commons/documents/ITS-profile
715
BASE_KNOWLEDGE_DOCUMENT_NAME=ITS-Profile.pdf
8-
TEXT_GENERATION_MODEL_PATH=
16+
TEXT_GENERATION_MODEL=gpt-3.5-turbo

installer.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ pip3 install pytest==8.2.1
1212
pip3 install pytest_cov==5.0.0
1313
pip3 install aiofiles==23.2.1
1414
pip3 install asyncpg==0.29.0
15-
pip3 install langchain_community==0.2.0
15+
pip3 install langchain_community==0.2.1
16+
pip3 install openai==1.30.2
17+
pip3 install langchain-openai==0.1.7
1618

1719
INSTALLATION WITH IF
1820

request.py

+62-27
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,64 @@
11
import requests
2+
from config import PORT
23

3-
url = f"http://localhost:5000"
4-
5-
# ping = requests.get(f'{url}/ping')
6-
# print(ping.text)
7-
8-
payload = {
9-
'id': "1",
10-
'question': 'hello world'
11-
}
12-
13-
headers = {
14-
'Content-Type': 'application/json'
15-
}
16-
17-
def get_stream(url):
18-
s = requests.Session()
19-
with s.post(url, headers=headers, json=payload, stream=True) as resp:
20-
# with s.post(url, headers={'Content-Type': 'application/json'}, stream=True) as resp:
21-
for line in resp.iter_lines():
22-
if line:
23-
print("triggered => ")
24-
yield line.decode('utf-8')
25-
26-
url = f'{url}/questions/stream-generator'
27-
# url = 'https://jsonplaceholder.typicode.com/posts/1'
28-
for line in get_stream(url):
29-
print(line)
4+
URL = f"http://localhost:{PORT}"
5+
6+
def get_ping():
7+
ping = requests.get(f'{URL}/ping')
8+
print(ping.text)
9+
10+
def post_stream_generator():
11+
payload = {
12+
'id': "1",
13+
'question': 'hello world'
14+
}
15+
16+
headers = {
17+
'Content-Type': 'application/json'
18+
}
19+
20+
def get_stream(url):
21+
s = requests.Session()
22+
with s.post(url, headers=headers, json=payload, stream=True) as resp:
23+
# with s.post(url, headers={'Content-Type': 'application/json'}, stream=True) as resp:
24+
for line in resp.iter_lines():
25+
if line:
26+
print("triggered => ")
27+
yield line.decode('utf-8')
28+
29+
url = f'{URL}/questions/stream-generator'
30+
# url = 'https://jsonplaceholder.typicode.com/posts/1'
31+
data_rcv = ''
32+
for line in get_stream(url):
33+
data_rcv += line[6:]
34+
print(f"{data_rcv}\n")
35+
36+
def post_questions(question):
37+
payload = {
38+
'id': "1",
39+
'question': question
40+
}
41+
42+
headers = {
43+
'Content-Type': 'application/json'
44+
}
45+
46+
def get_stream(url):
47+
s = requests.Session()
48+
with s.post(url, headers=headers, json=payload, stream=True) as resp:
49+
# with s.post(url, headers={'Content-Type': 'application/json'}, stream=True) as resp:
50+
for line in resp.iter_lines():
51+
if line:
52+
print("triggered => ")
53+
yield line.decode('utf-8')
54+
55+
url = f'{URL}/questions'
56+
# url = 'https://jsonplaceholder.typicode.com/posts/1'
57+
data_rcv = ''
58+
for line in get_stream(url):
59+
data_rcv += line[6:]
60+
print(f"{data_rcv}\n")
61+
62+
# get_ping()
63+
# post_stream_generator()
64+
post_questions("bagaimana Pengambilan MK non-Konversi di semester yang sama dengan pengambilan MK Konversi? jawab bahasa indonesia")

src/api/files/handler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ async def put_embed_files_handler(self, name: str = Form(...), file: UploadFile
2525
2626
7. update vectorstore
2727
28-
8. update chain
28+
8. update chain (tested does chain still need to update)
2929
"""
3030

3131
full_path:str = await self._file_storage_service.save_file_to_folder(name, file)

src/api/questions/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from src.api.questions.handler import QuestionsHandler
22
from src.api.questions.routes import routes
33

4-
def register(lorem_generator_service):
5-
questions_handler = QuestionsHandler(lorem_generator_service)
4+
def register(lorem_generator_service, chain_service):
5+
questions_handler = QuestionsHandler(lorem_generator_service, chain_service)
66
return routes(questions_handler)

src/api/questions/handler.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
from src.commons.types.questions_api_handler_type import PostQuestionStreamGeneratorType
2+
from sse_starlette.sse import EventSourceResponse
23

34
class QuestionsHandler:
4-
def __init__(self, lorem_generator_service):
5+
def __init__(self, lorem_generator_service, chain_service):
56
self._lorem_generator_service = lorem_generator_service
7+
self._chain_service = chain_service
68

79
async def post_question_stream_generator_handler(self, payload: PostQuestionStreamGeneratorType):
8-
# ignore the payload and question
10+
# ignore the id and question
911
return await self._lorem_generator_service.generate_lorem()
10-
12+
13+
async def post_question_stream_handler(self, payload: PostQuestionStreamGeneratorType):
14+
return EventSourceResponse(self._chain_streamer(payload.question, payload.id), media_type='text/event-stream')
15+
16+
async def _chain_streamer(self, question, id):
17+
async for chunk in self._chain_service.get_chain().astream(question):
18+
yield chunk
1119

src/api/questions/routes.py

+5
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,10 @@ def routes(handler) -> List[HandlerRequestType]:
88
method=Method.POST.value,
99
path="/questions/stream-generator",
1010
handler=handler.post_question_stream_generator_handler
11+
),
12+
HandlerRequestType(
13+
method=Method.POST.value,
14+
path="/questions",
15+
handler=handler.post_question_stream_handler
1116
)
1217
]
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
from typing import Optional
12
from pydantic import BaseModel
23

34
class PostQuestionStreamGeneratorType(BaseModel):
4-
id: str
5+
id: Optional[str] = None
56
question: str

src/server/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from src.services.postgres import PostgresDb
1414
from src.services.rag.embedding_service import EmbeddingService
1515
from src.services.rag.vectorstore_service import VectorstoreService
16+
from src.services.rag.chain_service import ChainService
1617

1718
# endpoints
1819
import src.api.questions as questions_endpoint
@@ -49,14 +50,15 @@ def configure_endpoint(self):
4950
files_db_service = FilesDbService(db)
5051
embedding_service = EmbeddingService(embedding_model)
5152
vectorstore_service = VectorstoreService(embedding_model, files_db_service)
53+
chain_service = ChainService(files_db_service, vectorstore_service)
5254

5355
# service builder
5456
vectorstore_service.load_all_local_embedding()
5557

5658
# routes initiation
5759
endpoint_factory = EndpointFactory(self._app)
5860
endpoint_factory.routes_creator(health_check_endpoint.register())
59-
endpoint_factory.routes_creator(questions_endpoint.register(lorem_generator_service))
61+
endpoint_factory.routes_creator(questions_endpoint.register(lorem_generator_service, chain_service))
6062
endpoint_factory.routes_creator(files_endpoint.register(file_storage_service, files_db_service, embedding_service, vectorstore_service))
6163

6264
def run(self):

src/services/rag/chain_service.py

+45-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,46 @@
1+
from langchain_openai import ChatOpenAI
2+
from langchain_core.runnables import RunnablePassthrough
3+
from langchain_core.prompts.chat import ChatPromptTemplate
4+
from langchain_core.output_parsers import StrOutputParser
5+
from langchain_community.vectorstores import FAISS
6+
7+
import os
8+
9+
from config import TEXT_GENERATION_MODEL, BASE_KNOWLEDGE_DOCUMENT_PATH
10+
111
class ChainService:
2-
def __init__(self) -> None:
3-
pass
12+
def __init__(self, files_db_service, vectorstore_service) -> None:
13+
self._files_db_service = files_db_service
14+
self._vectorstore_service = vectorstore_service
15+
self._chain = self._init_chain()
16+
17+
def _init_chain(self):
18+
return (
19+
{
20+
"context": self._vectorstore_service.get_retriever(),
21+
"question": RunnablePassthrough()
22+
}
23+
| self._init_prompt()
24+
| self._init_llm()
25+
| StrOutputParser()
26+
)
27+
28+
def _init_prompt(self):
29+
template = """Answer the question based only on the following context:
30+
{context}
31+
32+
Question: {question}
33+
"""
34+
35+
prompt = ChatPromptTemplate.from_template(template)
36+
return prompt
37+
38+
def _init_llm(self):
39+
return ChatOpenAI(model_name=TEXT_GENERATION_MODEL, temperature=0, streaming=True)
40+
41+
def get_chain(self):
42+
'''
43+
The chain will automatically update since vectorstore update even with no reinitialization
44+
'''
45+
return self._chain
46+

src/services/rag/vectorstore_service.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,24 @@
33
import os
44

55
from config import BASE_KNOWLEDGE_DOCUMENT_PATH
6-
from src.services.postgres.models.tables import Files
76

87
class VectorstoreService:
98
def __init__(self, embedding_model, files_db_service) -> None:
109
self._embedding_model = embedding_model
1110
self._vectorstore = FAISS.load_local(folder_path=f"{BASE_KNOWLEDGE_DOCUMENT_PATH}/embedding", embeddings=self._embedding_model, allow_dangerous_deserialization=True)
1211
self._files_db_service = files_db_service
1312

13+
def get_retriever(self):
14+
retriever = self._vectorstore.as_retriever(search_kwargs={"k": 1})
15+
if retriever is None:
16+
raise ValueError("Vectorstore as retriever returned None, expected a valid retriever.")
17+
return retriever
18+
19+
def get_vectorstore(self):
20+
if self._vectorstore is None:
21+
raise ValueError("Vectorstore is None, expected a valid vectorstore instance.")
22+
return self._vectorstore
23+
1424
def load_all_local_embedding(self):
1525
files = self._files_db_service.get_all_file()
1626
for file in files:
@@ -23,3 +33,7 @@ def add_vectostore(self, path):
2333
self._vectorstore.merge_from(local_vectorstore)
2434
else:
2535
self._files_db_service.delete_file_by_id(path)
36+
37+
def similarity_search(self, question):
38+
ss = self._vectorstore.similarity_search(question, k=1)
39+
return ss

version_checker.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,12 @@
6767
print(asyncpg.__version__)
6868

6969
import langchain_community
70-
# 0.2.0
71-
print(langchain_community.__version__)
70+
# 0.2.1
71+
print(langchain_community.__version__)
72+
73+
import openai
74+
# 1.30.2
75+
print(openai.__version__)
76+
77+
# checking by pip3 show langchain_openai
78+
# langchain_openai 0.1.7

0 commit comments

Comments
 (0)