Skip to content

Commit

Permalink
Added Azure Version of the Code
Browse files Browse the repository at this point in the history
  • Loading branch information
ramachaitanya0 committed Feb 16, 2024
1 parent 4e38ca3 commit cc458bf
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 0 deletions.
11 changes: 11 additions & 0 deletions azure.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
OPENAI_API_TYPE="azure"
OPENAI_API_BASE="https://tvsmazoogoaidev03.openai.azure.com/"
OPENAI_API_VERSION="2023-07-01-preview"
OPENAI_API_KEY="155c5a32ffe04391a86ee48aac8e5f39"
LANGCHAIN_TRACING_V2="true"
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
LANGCHAIN_API_KEY="ls__a5475b0b04fb458d8cc5b350be47b1bb"
LANGCHAIN_PROJECT="ask_pdf"
MAIL_ACCOUNT_NAME="[email protected]"
MAIL_APP_PASSWORD="Kramvenu@1224"

154 changes: 154 additions & 0 deletions azure_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import shutil

import streamlit as st
import os
from dotenv import load_dotenv
import datetime
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain import PromptTemplate
load_dotenv("./azure.env")
print("Streamlit run has started")
# Title
st.title("Ask PDF")

def create_folder(folder_path):
if not os.path.exists(folder_path):
os.makedirs(folder_path)

def delete_files_in_folder(folder_path):
files = os.listdir(folder_path)
for file in files:
file_path = os.path.join(folder_path, file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception as e:
st.error(f"Error deleting file {file_path}: {e}")

def delete_folder(folder_path):
try:
os.rmdir(folder_path)
except Exception as e:
st.error(f"Error deleting folder {folder_path}: {e}")


TARGET_DIR = "./uploaded_data"

# Uploading Files
uploaded_files = st.file_uploader("Upload your files", type=['pdf'], accept_multiple_files=True)

@st.cache_resource
def load_uploaded_files(uploaded_files: list,target_dir:str):
# delete_files_in_folder("./docs")
# delete_folder("./docs")
# shutil.rmtree("./docs")
# try :
# os.remove("./docs/chroma/chroma.sqlite3")
# except :
# print("No such directory")
if len(uploaded_files) > 0:
# creates new folder for the uploaded data
create_folder(target_dir)
print("Created the directory for the uploaded files")

for uploaded_file in uploaded_files:
file_path = os.path.join(target_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.read())
print("Written all the files successfully")
st.write("Successfully Uploaded the files")

# Loading pdfs
docs = []
for file in os.listdir(target_dir):
loader = PyPDFLoader(target_dir + '/' + file)
docs.extend(loader.load())
print(f"length of the docs {len(docs)}")


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100,separators=["\n\n","\n"])
splits = text_splitter.split_documents(docs)

print(f"length of the splits {len(splits)}")
embedding = OpenAIEmbeddings(deployment="ada-002")
print("loaded the Open AI Embeddings Function")


# Creating Vector Database and persisting it
persist_directory = './docs/chroma/'
vectordb = Chroma.from_documents(
documents=splits,
embedding=embedding,
persist_directory=persist_directory
)
# Creating a retriever
retriever = vectordb.as_retriever()
print("Created Vector DB ")

# Selecting a Model
current_date = datetime.datetime.now().date()
if current_date < datetime.date(2023, 9, 2):
llm_name = "gpt-3.5-turbo-0301"
else:
llm_name = "gpt-3.5-turbo"
print(llm_name)
print("chose the llm")

delete_files_in_folder(target_dir)
delete_folder(target_dir)
print("Deleted the files and directory")


return llm_name, retriever


if len(uploaded_files) > 0 :
PROMPT_TEMPLATE = """You are an AI Assistant. Please refer to the context given and answer your query.
Context: {context}
Question: {question}
As you respond to your query, adhere to these ground rules delimited by four hashtags:
####
Begin the conversation with a warm greetings with the user.
Maintain a consistently polite tone in all interactions.
Provide answers solely relevant to the context , If the question falls outside this scope, politely mention that it's beyond your knowledge.
Comprehend the context thoroughly and offer responses based solely on the provided information. Refrain from generating irrelevant or speculative answers.
#### """
input_variables = ['context','question']
prompt = PromptTemplate(template=PROMPT_TEMPLATE,input_variables=input_variables)
llm_name,retriever = load_uploaded_files(uploaded_files,target_dir=TARGET_DIR)

# llm = AzureChatOpenAI(model_name=llm_name, temperature=0)
llm = ChatOpenAI(model="gpt-4-32k", deployment_id="dse-copilot-gpt4-32k")
chat_history = []
qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever,
chain_type="stuff")


print("started Chatbot")
st.title("QA Bot")
if "messages" not in st.session_state.keys():
st.session_state.messages = [{"role": "Assistant", "content": "How can i help you"}]

for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])

question = st.chat_input("Ask a Question")
#
if question is not None:
st.session_state.messages.append({"role": "user", "content": question})
st.chat_message("user").write(question)
result = qa_chain({"question": question, "chat_history": chat_history})['answer']
chat_history = [(question, result)]
st.session_state.messages.append({"role": 'Assistant', "content": result})
st.chat_message("Assistant").write(result)




Empty file.

0 comments on commit cc458bf

Please sign in to comment.