-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathazure_app.py
154 lines (122 loc) · 5.32 KB
/
azure_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import shutil
import streamlit as st
import os
from dotenv import load_dotenv
import datetime
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain import PromptTemplate
load_dotenv()
print("Streamlit run has started")
# Title
st.title("Ask PDF")
def create_folder(folder_path):
if not os.path.exists(folder_path):
os.makedirs(folder_path)
def delete_files_in_folder(folder_path):
files = os.listdir(folder_path)
for file in files:
file_path = os.path.join(folder_path, file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception as e:
st.error(f"Error deleting file {file_path}: {e}")
def delete_folder(folder_path):
try:
os.rmdir(folder_path)
except Exception as e:
st.error(f"Error deleting folder {folder_path}: {e}")
TARGET_DIR = "./uploaded_data"
# Uploading Files
uploaded_files = st.file_uploader("Upload your files", type=['pdf'], accept_multiple_files=True)
@st.cache_resource
def load_uploaded_files(uploaded_files: list,target_dir:str):
# delete_files_in_folder("./docs")
# delete_folder("./docs")
# shutil.rmtree("./docs")
# try :
# os.remove("./docs/chroma/chroma.sqlite3")
# except :
# print("No such directory")
if len(uploaded_files) > 0:
# creates new folder for the uploaded data
create_folder(target_dir)
print("Created the directory for the uploaded files")
for uploaded_file in uploaded_files:
file_path = os.path.join(target_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.read())
print("Written all the files successfully")
st.write("Successfully Uploaded the files")
# Loading pdfs
docs = []
for file in os.listdir(target_dir):
loader = PyPDFLoader(target_dir + '/' + file)
docs.extend(loader.load())
print(f"length of the docs {len(docs)}")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100,separators=["\n\n","\n"])
splits = text_splitter.split_documents(docs)
print(f"length of the splits {len(splits)}")
embedding = OpenAIEmbeddings(deployment="ada-002")
print("loaded the Open AI Embeddings Function")
# Creating Vector Database and persisting it
persist_directory = './docs/chroma/'
vectordb = Chroma.from_documents(
documents=splits,
embedding=embedding,
persist_directory=persist_directory
)
# Creating a retriever
retriever = vectordb.as_retriever()
print("Created Vector DB ")
# Selecting a Model
current_date = datetime.datetime.now().date()
if current_date < datetime.date(2023, 9, 2):
llm_name = "gpt-3.5-turbo-0301"
else:
llm_name = "gpt-3.5-turbo"
print(llm_name)
print("chose the llm")
delete_files_in_folder(target_dir)
delete_folder(target_dir)
print("Deleted the files and directory")
return llm_name, retriever
if len(uploaded_files) > 0 :
PROMPT_TEMPLATE = """You are an AI Assistant. Please refer to the context given and answer your query.
Context: {context}
Question: {question}
As you respond to your query, adhere to these ground rules delimited by four hashtags:
####
Begin the conversation with a warm greetings with the user.
Maintain a consistently polite tone in all interactions.
Provide answers solely relevant to the context , If the question falls outside this scope, politely mention that it's beyond your knowledge.
Comprehend the context thoroughly and offer responses based solely on the provided information. Refrain from generating irrelevant or speculative answers.
#### """
input_variables = ['context','question']
prompt = PromptTemplate(template=PROMPT_TEMPLATE,input_variables=input_variables)
llm_name,retriever = load_uploaded_files(uploaded_files,target_dir=TARGET_DIR)
# llm = AzureChatOpenAI(model_name=llm_name, temperature=0)
llm = ChatOpenAI(model="gpt-4-32k", deployment_id="dse-copilot-gpt4-32k")
chat_history = []
qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever,
chain_type="stuff")
print("started Chatbot")
st.title("QA Bot")
if "messages" not in st.session_state.keys():
st.session_state.messages = [{"role": "Assistant", "content": "How can i help you ? "}]
for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])
question = st.chat_input("Ask a Question")
#
if question is not None:
st.session_state.messages.append({"role": "user", "content": question})
st.chat_message("user").write(question)
result = qa_chain({"question": question, "chat_history": chat_history})['answer']
chat_history = [(question, result)]
st.session_state.messages.append({"role": 'Assistant', "content": result})
st.chat_message("Assistant").write(result)