Added Azure Version of the Code

ramachaitanya0 · Feb 16, 2024 · cc458bf · cc458bf
1 parent 4e38ca3
commit cc458bf
Show file tree

Hide file tree

Showing 3 changed files with 165 additions and 0 deletions.
diff --git a/azure.env b/azure.env
@@ -0,0 +1,11 @@
+OPENAI_API_TYPE="azure"
+OPENAI_API_BASE="https://tvsmazoogoaidev03.openai.azure.com/"
+OPENAI_API_VERSION="2023-07-01-preview"
+OPENAI_API_KEY="155c5a32ffe04391a86ee48aac8e5f39"
+LANGCHAIN_TRACING_V2="true"
+LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+LANGCHAIN_API_KEY="ls__a5475b0b04fb458d8cc5b350be47b1bb"
+LANGCHAIN_PROJECT="ask_pdf"
+MAIL_ACCOUNT_NAME="[email protected]"
+MAIL_APP_PASSWORD="Kramvenu@1224"
+
diff --git a/azure_app.py b/azure_app.py
@@ -0,0 +1,154 @@
+import shutil
+
+import streamlit as st
+import os
+from dotenv import load_dotenv
+import datetime
+from langchain.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
+from langchain.chains import  ConversationalRetrievalChain
+from langchain import PromptTemplate
+load_dotenv("./azure.env")
+print("Streamlit run has started")
+# Title
+st.title("Ask PDF")
+
+def create_folder(folder_path):
+    if not os.path.exists(folder_path):
+        os.makedirs(folder_path)
+
+def delete_files_in_folder(folder_path):
+    files = os.listdir(folder_path)
+    for file in files:
+        file_path = os.path.join(folder_path, file)
+        try:
+            if os.path.isfile(file_path):
+                os.unlink(file_path)
+        except Exception as e:
+            st.error(f"Error deleting file {file_path}: {e}")
+
+def delete_folder(folder_path):
+    try:
+        os.rmdir(folder_path)
+    except Exception as e:
+        st.error(f"Error deleting folder {folder_path}: {e}")
+
+
+TARGET_DIR = "./uploaded_data"
+
+# Uploading Files
+uploaded_files = st.file_uploader("Upload your files", type=['pdf'], accept_multiple_files=True)
+
+@st.cache_resource
+def load_uploaded_files(uploaded_files: list,target_dir:str):
+    # delete_files_in_folder("./docs")
+    # delete_folder("./docs")
+    # shutil.rmtree("./docs")
+    # try :
+    #     os.remove("./docs/chroma/chroma.sqlite3")
+    # except :
+    #     print("No such directory")
+    if len(uploaded_files) > 0:
+        # creates new folder for the uploaded data
+        create_folder(target_dir)
+        print("Created the directory for the uploaded files")
+
+        for uploaded_file in uploaded_files:
+            file_path = os.path.join(target_dir, uploaded_file.name)
+            with open(file_path, "wb") as f:
+                f.write(uploaded_file.read())
+        print("Written all the files successfully")
+        st.write("Successfully Uploaded the files")
+
+        # Loading pdfs
+        docs = []
+        for file  in os.listdir(target_dir):
+            loader = PyPDFLoader(target_dir + '/' + file)
+            docs.extend(loader.load())
+        print(f"length of the docs {len(docs)}")
+
+
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100,separators=["\n\n","\n"])
+        splits = text_splitter.split_documents(docs)
+
+        print(f"length of the splits {len(splits)}")
+        embedding = OpenAIEmbeddings(deployment="ada-002")
+        print("loaded the Open AI Embeddings Function")
+
+
+        # Creating Vector Database and persisting it
+        persist_directory = './docs/chroma/'
+        vectordb = Chroma.from_documents(
+            documents=splits,
+            embedding=embedding,
+            persist_directory=persist_directory
+        )
+        # Creating a retriever
+        retriever = vectordb.as_retriever()
+        print("Created Vector DB ")
+
+        # Selecting a Model
+        current_date = datetime.datetime.now().date()
+        if current_date < datetime.date(2023, 9, 2):
+            llm_name = "gpt-3.5-turbo-0301"
+        else:
+            llm_name = "gpt-3.5-turbo"
+        print(llm_name)
+        print("chose the llm")
+
+        delete_files_in_folder(target_dir)
+        delete_folder(target_dir)
+        print("Deleted the files and directory")
+
+
+        return llm_name, retriever
+
+
+if len(uploaded_files) > 0 :
+    PROMPT_TEMPLATE = """You are an AI Assistant. Please refer to the context given and answer your query.
+
+Context: {context}
+Question: {question}
+
+As you respond to your query, adhere to these ground rules delimited by four hashtags:
+####
+    Begin the conversation with a warm greetings with the user.
+    Maintain a consistently polite tone in all interactions.
+    Provide answers solely relevant to the context , If the question falls outside this scope, politely mention that it's beyond your knowledge.
+Comprehend the context thoroughly and offer responses based solely on the provided information. Refrain from generating irrelevant or speculative answers.
+#### """
+    input_variables = ['context','question']
+    prompt = PromptTemplate(template=PROMPT_TEMPLATE,input_variables=input_variables)
+    llm_name,retriever = load_uploaded_files(uploaded_files,target_dir=TARGET_DIR)
+
+    # llm = AzureChatOpenAI(model_name=llm_name, temperature=0)
+    llm = ChatOpenAI(model="gpt-4-32k", deployment_id="dse-copilot-gpt4-32k")
+    chat_history = []
+    qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever,
+                                                     chain_type="stuff")
+
+
+print("started Chatbot")
+st.title("QA Bot")
+if "messages" not in st.session_state.keys():
+    st.session_state.messages = [{"role": "Assistant", "content": "How can i help you"}]
+
+for msg in st.session_state.messages:
+    st.chat_message(msg["role"]).write(msg["content"])
+
+question = st.chat_input("Ask a Question")
+#
+if question is not None:
+    st.session_state.messages.append({"role": "user", "content": question})
+    st.chat_message("user").write(question)
+    result = qa_chain({"question": question, "chat_history": chat_history})['answer']
+    chat_history = [(question, result)]
+    st.session_state.messages.append({"role": 'Assistant', "content": result})
+    st.chat_message("Assistant").write(result)
+
+
+
+
diff --git a/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/link_lists.bin b/docs/chroma/a01819bb-4432-43f1-9ddd-4f9a00807020/link_lists.bin