-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
145 lines (110 loc) · 5.22 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from html_template import css, bot_template, user_template
from langchain.llms import HuggingFaceHub
from streamlit_pdf_viewer import pdf_viewer
import os
from io import BytesIO
def get_pdf_text(pdf):
text = ""
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks):
embeddings = OpenAIEmbeddings()
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_conversation_chain(vectorstore):
llm = ChatOpenAI(temperature=0)
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
memory=memory,
)
return conversation_chain
def handle_userinput(user_question, show_user = False):
if(st.session_state.vector_store_created==False):
st.error("The PDFs where not loaded properly..... please try reloading them again", icon="🚨")
return
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history = response['chat_history']
for i, message in reversed(list(enumerate(st.session_state.chat_history))):
if i % 2 == 0 and show_user:
pass
st.write(user_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
elif i % 2 == 1:
st.write(bot_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
def handle_streamlit_config():
st.set_page_config(page_title="Chat with Arara",
page_icon="images/icon.png")
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
if "vector_store_created" not in st.session_state:
st.session_state.vector_store_created = False
st.header("Chat with Arara :parrot:")
def call_pdf_render(page, name):
pdf_viewer(name, width=700, height=500, pages_to_render=[page])
#handle_user_question()
def display_pdf_page(pdf_reader, current_page, uploaded_file):
# Get the total number of pages in the PDF file
num_pages = len(pdf_reader.pages)
# Display the current page number
st.write(f"Page {current_page + 1} of {num_pages}")
# Display pdf
call_pdf_render(current_page + 1, uploaded_file.name)
process_pdf(uploaded_file)
def process_pdf(uploaded_file):
# get pdf text
raw_text = get_pdf_text(uploaded_file)
# get the text chunks
text_chunks = get_text_chunks(raw_text)
# create vector store
vectorstore = get_vectorstore(text_chunks)
st.session_state.vector_store_created = True
# create conversation chain
st.session_state.conversation = get_conversation_chain(vectorstore)
def handle_user_question(): # edit here
b1 = st.button("Let's start learning!")
b2 = st.button("I want to build flashcards!")
# TODOS: histórico das respostas, esconder as perguntas de prompt, take a look on this matter of the memory
follow_up = "Give me topics to make flashcards about this material. Start by saying that you are happy for the journey with me and that I should keep up, say it's all about the analyzed pdf and that you are here to help on enhancing the understanding, without shortcuts"
initial_question = "Start saying hello and being cordial. Explaining why is it important to understand this concept for a computer scientist, say that you can comment slide by slide and enrich the experience using an iteractive apporach, say that you can ask any question for help as well, start by making a opening question to me to grasp by attention. Show me as well the main bullet points of the material"
if b1:
handle_userinput(initial_question)
if b2:
handle_userinput(follow_up)
user_question = st.text_input("Ask me aditional questions about this material: ") + ". Be friendly and act as a tutor and try making questions to make me engage with the material"
if user_question:
handle_userinput(user_question)
def handle_side_bar():
with st.sidebar:
logo = open('images/logo.png', 'rb').read()
st.image(logo)
st.subheader("Your documents")
uploaded_file = st.file_uploader("Upload your PDF here: ", type="pdf")
return uploaded_file