Initial Commit

ramachaitanya0 · Nov 7, 2023 · 4b23874 · 4b23874
1 parent c8fa331
commit 4b23874
Show file tree

Hide file tree

Showing 10 changed files with 179 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.env
+.idea
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Rama Chaitnaya Karanam
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -1 +1,44 @@
-# ask_pdf
+# Ask_Pdf
+
+This chatbot allows users to upload documents(pdf only), and then it leverages OPEN AI ChatGPT and Langchain
+to answer questions based on the content of the uploaded documents.
+Users can engage in interactive Q&A sessions with the chatbot, making it a powerful tool for document 
+exploration and retrieval.
+
+Key Features
+1. Document upload capability.
+2. Interactive and user-friendly chat interface
+3. Potential for customization and extension.
+
+# Installation
+
+1. Clone the Git Hub Repo into your local workspace using the below code.
+```sh
+git clone https://github.com/ramachaitanya0/ask_pdf.git 
+```
+
+2. Create a Conda Environment.
+```sh
+conda create -n <env_name> python=3.11.4
+```
+
+3. Install all the required Packages using requirements.txt file.
+```sh
+pip install -r requirements.txt
+```
+4. Add .env file in the Repo and add your OPEN AI Key in .env file.
+
+```sh
+OPENAI_API_KEY=<OPENAI_API_KEY>
+```
+
+# Usage
+
+Run the Stream lit app using below code.
+```sh
+streamlit run app.py
+```
+
+
+
+
diff --git a/app.py b/app.py
@@ -0,0 +1,102 @@
+import streamlit as st
+import os
+from dotenv import load_dotenv
+import datetime
+from langchain.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.chat_models import ChatOpenAI
+from langchain.chains import  ConversationalRetrievalChain
+load_dotenv()
+print("Streamlit run has started")
+# Title
+st.title("Ask PDF")
+
+
+target_dir = "./uploaded_data"
+try :
+    for file in os.listdir(target_dir):
+        os.remove(os.path.join(target_dir + "/" + file))
+except :
+    print("Error in accessing the target directory ")
+
+# Uploading Files
+uploaded_files = st.file_uploader("Upload your files", type=['pdf'], accept_multiple_files=True)
+
+
+@st.cache_resource
+def load_uploaded_files(uploaded_files: list,target_dir:str):
+    if len(uploaded_files) > 0:
+        for uploaded_file in uploaded_files:
+            file_path = os.path.join(target_dir, uploaded_file.name)
+            with open(file_path, "wb") as f:
+                f.write(uploaded_file.read())
+        print("Written all the files successfully")
+        st.write("Successfully Uploaded the files")
+
+        # Loading pdfs
+        docs = []
+        for file  in os.listdir(target_dir):
+            loader = PyPDFLoader(target_dir + '/' + file)
+            docs.extend(loader.load())
+        print(f"length of the docs {len(docs)}")
+
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        splits = text_splitter.split_documents(docs)
+        print(f"length of the splits {len(splits)}")
+        embedding = OpenAIEmbeddings()
+        print("loaded the Open AI Embeddings Function")
+
+        # Deleting the previous content
+        # shutil.rmtree("./docs/chroma/")
+        # print("Deleted the db")
+
+        persist_directory = 'docs/chroma/'
+        vectordb = Chroma.from_documents(
+            documents=splits,
+            embedding=embedding,
+            persist_directory=persist_directory
+        )
+        retriever = vectordb.as_retriever()
+
+        print("Created Vector DB ")
+        current_date = datetime.datetime.now().date()
+        if current_date < datetime.date(2023, 9, 2):
+            llm_name = "gpt-3.5-turbo-0301"
+        else:
+            llm_name = "gpt-3.5-turbo"
+        print(llm_name)
+        print("chose the llm")
+
+        return llm_name, retriever
+
+if len(uploaded_files) > 0 :
+    llm_name,retriever = load_uploaded_files(uploaded_files,target_dir=target_dir)
+
+    llm = ChatOpenAI(model_name=llm_name, temperature=0)
+    chat_history = []
+    qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever,
+                                                     chain_type="stuff")
+
+
+print("started Chatbot")
+st.title("QA Bot")
+if "messages" not in st.session_state.keys():
+    st.session_state.messages = [{"role": "Assistant", "content": "How can i help you"}]
+
+for msg in st.session_state.messages:
+    st.chat_message(msg["role"]).write(msg["content"])
+
+question = st.chat_input("Ask a Question")
+#
+if question is not None:
+    st.session_state.messages.append({"role": "user", "content": question})
+    st.chat_message("user").write(question)
+    result = qa_chain({"question": question, "chat_history": chat_history})['answer']
+    chat_history = [(question, result)]
+    st.session_state.messages.append({"role": 'Assistant', "content": result})
+    st.chat_message("Assistant").write(result)
+
+
+
diff --git a/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/data_level0.bin b/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/data_level0.bin
diff --git a/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/header.bin b/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/header.bin
diff --git a/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/length.bin b/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/length.bin
diff --git a/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/link_lists.bin b/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/link_lists.bin
diff --git a/docs/chroma/chroma.sqlite3 b/docs/chroma/chroma.sqlite3
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,10 @@
+streamlit
+openai==0.28.1
+langchain==0.0.331
+numpy
+pandas
+PyPDF2
+python-dotenv
+tiktoken
+pypdf
+chromadb