PromtEngineer · Mahmoud-Khorshed-DS-AI · May 18, 2024 · May 18, 2024 · May 18, 2024 · May 18, 2024
diff --git a/Results/New Microsoft Word Document.docx b/Results/New Microsoft Word Document.docx
diff --git a/SOURCE_DOCUMENTS/Orca_paper.pdf b/SOURCE_DOCUMENTS/Orca_paper.pdf
diff --git a/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf b/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf
diff --git a/chroma.sqlite3 b/chroma.sqlite3
diff --git a/constants.py b/constants.py
@@ -2,11 +2,12 @@
 
 # from dotenv import load_dotenv
 from chromadb.config import Settings
+# from faissdb.config import Settings
 
 # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel
-from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
-from langchain.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader
-from langchain.document_loaders import UnstructuredHTMLLoader
+from langchain_community.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
+from langchain_community.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader
+from langchain_community.document_loaders import UnstructuredHTMLLoader
 
 
 # load_dotenv()
@@ -19,6 +20,9 @@
 
 MODELS_PATH = "./models"
 
+# INDEX_PATH = "faiss_index.index"
+# METADATA_PATH = "faiss_metadata.pkl"
+
 # Can be changed to a specific number
 INGEST_THREADS = os.cpu_count() or 8
 
@@ -59,7 +63,7 @@
 
 # Default Instructor Model
 EMBEDDING_MODEL_NAME = "hkunlp/instructor-large"  # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)
-
+# EMBEDDING_MODEL_NAME = 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF'
 ####
 #### OTHER EMBEDDING MODEL OPTIONS
 ####
@@ -107,15 +111,18 @@
 # MODEL_BASENAME = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"
 
 # LLAMA 3 # use for Apple Silicon
-MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
-MODEL_BASENAME = None
+# MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
+# MODEL_ID = "TheBloke/Llama-2-7B-32K-Instruct-GPTQ"
+# MODEL_BASENAME = None#"Llama-2-7B-32K-Instruct-GPTQ"
+# MODEL_BASENAME = "model.safetensors.awq"
+
 
 # LLAMA 3 # use for NVIDIA GPUs
 # MODEL_ID = "unsloth/llama-3-8b-bnb-4bit"
 # MODEL_BASENAME = None
 
-# MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
-# MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
+MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
+MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
 
 # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
 # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"

diff --git a/ingest.py b/ingest.py
@@ -1,13 +1,17 @@
 import logging
 import os
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
+import faiss
+import pickle
+from transformers import AutoModel, AutoTokenizer
 
 import click
 import torch
 from langchain.docstore.document import Document
 from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
-from langchain.vectorstores import Chroma
+from langchain_community.vectorstores import Chroma , FAISS
 from utils import get_embeddings
+from langchain_community.docstore.in_memory import InMemoryDocstore
 
 from constants import (
     CHROMA_SETTINGS,
@@ -16,9 +20,13 @@
     INGEST_THREADS,
     PERSIST_DIRECTORY,
     SOURCE_DIRECTORY,
+    # INDEX_PATH,
+    # METADATA_PATH,
+
 )
 
 
+
 def file_log(logentry):
     file1 = open("file_ingest.log", "a")
     file1.write(logentry + "\n")
@@ -142,7 +150,30 @@ def split_documents(documents: list[Document]) -> tuple[list[Document], list[Doc
     ),
     help="Device to run on. (Default is cuda)",
 )
+def save_faiss_index(db, index_path, metadata_path):
+    faiss.write_index(db.index, index_path)
+    metadata = {
+        "index_to_docstore_id": db.index_to_docstore_id,
+        "docstore": db.docstore,
+    }
+    with open(metadata_path, "wb") as f:
+        pickle.dump(metadata, f)
+
+def load_faiss_index(index_path, metadata_path):
+    index = faiss.read_index(index_path)
+    with open(metadata_path, "rb") as f:
+        metadata = pickle.load(f)
+    docstore = metadata["docstore"]
+    index_to_docstore_id = metadata["index_to_docstore_id"]
+    db = FAISS(index=index,
+               docstore=docstore,
+               index_to_docstore_id=index_to_docstore_id)
+    return db
+
+
+
 def main(device_type):
+    print(f"Running on device: {device_type}")
     # Load documents and split in chunks
     logging.info(f"Loading documents from {SOURCE_DIRECTORY}")
     documents = load_documents(SOURCE_DIRECTORY)
@@ -161,6 +192,7 @@ def main(device_type):
 
     (2) Provides additional arguments for instructor and BGE models to improve results, pursuant to the instructions contained on
     their respective huggingface repository, project page or github repository.
+
     """
 
     embeddings = get_embeddings(device_type)
@@ -173,10 +205,78 @@ def main(device_type):
         persist_directory=PERSIST_DIRECTORY,
         client_settings=CHROMA_SETTINGS,
     )
+    # if os.path.exists(INDEX_PATH) and os.path.exists(METADATA_PATH):
+    #     db = load_faiss_index(INDEX_PATH, METADATA_PATH)
+    #     logging.info("Loaded FAISS index and metadata from disk.")
+    # else:
+
+    #     d = embeddings.shape[1]
+    #     index = faiss.IndexFlatL2(d)
+    #     index.add(embeddings)
+
+    #     docstore = InMemoryDocstore()
+    #     index_to_docstore_id = {i: doc["id"] for i, doc in enumerate(texts)}
+
+    #     db = FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id)
+
+    #     save_faiss_index(db, INDEX_PATH, METADATA_PATH)
+    #     logging.info("Saved FAISS index and metadata to disk.")
+
+    # Load the model and tokenizer
+    # model_name = EMBEDDING_MODEL_NAME
+    # tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # model = AutoModel.from_pretrained(model_name)
+    # # Tokenize the input texts
+    # inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
+    # # Get the embeddings from the model
+    # with torch.no_grad():
+    #     outputs = model(**inputs)
+    # # Extract the last hidden states (embeddings)
+    # embeddings = outputs.last_hidden_state
+    # # Pool the embeddings (e.g., mean pooling)
+    # pooled_embeddings = embeddings.mean(dim=1)
+    # # Convert the embeddings to a NumPy array
+    # numpy_embeddings = pooled_embeddings.cpu().numpy()
+
+    # # Get the dimension of the vectors
+    # vector_dimension = numpy_embeddings.shape[1]
+
+    # Create the FAISS index
+    # faiss_index = faiss.IndexFlatL2(vector_dimension)
+    # print(faiss_index.is_trained)
+    # # Add the embeddings to the index
+    # faiss_index.add(numpy_embeddings)
+    # # Save the index
+    # faiss.write_index(faiss_index, index_file_path)
+    # print(f"Index saved to {index_file_path}")
+    # print(faiss_index.ntotal)
+
+    # Define the directory and file name to save the index
+    # persist_dir = PERSIST_DIRECTORY
+    # index_file_path = os.path.join(persist_dir, 'faiss_index.index')
+
+    # # Load the index to verify
+    # faiss_index_loaded = faiss.read_index(index_file_path)
+    # print(f"Index loaded from {index_file_path}")
+
+    # Verify the loaded index
+    # print(f"Number of vectors in the loaded index: {faiss_index_loaded.ntotal}")
+
+    # db = FAISS.from_documents(
+    #     texts,
+    #     embeddings,
+    #     # persist_directory=PERSIST_DIRECTORY,
+    #     # client_settings=CHROMA_SETTINGS,
+    #     )
+    # db.save_local("DB/faiss")
 
+import argparse
 
 if __name__ == "__main__":
     logging.basicConfig(
         format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO
     )
-    main()
+    parser = argparse.ArgumentParser(description="Ingest script for localGPT")
+    parser.add_argument("--device_type", type=str, required=True, help="Device type (cpu or gpu)")
+    args = parser.parse_args()
+    main(args.device_type)
diff --git a/localGPT_UI.py b/localGPT_UI.py
@@ -2,14 +2,16 @@
 import subprocess
 import streamlit as st
 from run_localGPT import load_model
-from langchain.vectorstores import Chroma
+from langchain_community.vectorstores import Chroma
 from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME
-from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 from langchain.chains import RetrievalQA
-from streamlit_extras.add_vertical_space import add_vertical_space
+from streamlit_extras import add_vertical_space
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 
+# Use the function in your Streamlit app
+add_vertical_space(10)
 
 def model_memory():
     # Adding history to the model.

diff --git a/run_localGPT.py b/run_localGPT.py
@@ -4,18 +4,27 @@
 import torch
 import utils
 from langchain.chains import RetrievalQA
-from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 from langchain.llms import HuggingFacePipeline
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler  # for streaming response
 from langchain.callbacks.manager import CallbackManager
+from transformers import AutoModel, AutoTokenizer
+
+from langchain_community.embeddings import OpenAIEmbeddings
+from langchain_community.docstore.in_memory import InMemoryDocstore
+import faiss
+from langchain_community.vectorstores.faiss import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.docstore.in_memory import InMemoryDocstore
 
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 
 from prompt_template_utils import get_prompt_template
 from utils import get_embeddings
 
 # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.vectorstores import Chroma
+from langchain_community.vectorstores import Chroma ,FAISS
+from langchain_community.vectorstores import chroma
 from transformers import (
     GenerationConfig,
     pipeline,
@@ -38,6 +47,9 @@
     CHROMA_SETTINGS,
 )
 
+# Check if CUDA is available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 
 def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
     """
@@ -132,14 +144,51 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):
     logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}")
 
     # load the vectorstore
-    db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
+    db = Chroma(persist_directory=PERSIST_DIRECTORY,
+                embedding_function=embeddings,
+                client_settings=CHROMA_SETTINGS
+                )
+
+
+    # print(embeddings)
+
+
+
+
+
+    # Initialize the FAISS index
+    # faiss_index = faiss.IndexFlatL2(768)
+
+    # # # Initialize the docstore
+    # docstore = InMemoryDocstore()
+    # # # Initialize the index_to_docstore_id
+    # index_to_docstore_id = {}
+    # # Add the embeddings to the index
+    # faiss_index.add(embeddings)
+    # Loading the saved embeddings 
+    # db =FAISS.load_local("DB/faiss", embeddings, allow_dangerous_deserialization=True)
+    # db = FAISS(
+    #             embedding_function=embeddings,
+    #             index=faiss_index,
+    #             # docstore=docstore,
+    #             # index_to_docstore_id=index_to_docstore_id
+    #             )
+
+    # # Add documents and their embeddings to the FAISS index and the docstore
+    # for i, (text, embedding) in enumerate(zip(df['Text'].tolist(), embeddings)):
+    #     db.add_document(doc_id=i, text=text, embedding=embedding)
     retriever = db.as_retriever()
 
     # get the prompt template and memory if set by the user.
-    prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, history=use_history)
+    prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type,
+                                          history=use_history)
 
     # load the llm pipeline
     llm = load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME, LOGGING=logging)
+
+    # # Ensure the model is on CPU
+    # device = torch.device("cpu")
+    # llm.to(device)
 
     if use_history:
         qa = RetrievalQA.from_chain_type(
@@ -256,7 +305,9 @@ def main(device_type, show_sources, use_history, model_type, save_qa):
         if query == "exit":
             break
         # Get the answer from the chain
-        res = qa(query)
+        # res = qa(query)
+        res = qa.invoke(query)
+
         answer, docs = res["result"], res["source_documents"]
 
         # Print the result

diff --git a/run_localGPT_API.py b/run_localGPT_API.py
@@ -7,14 +7,14 @@
 import torch
 from flask import Flask, jsonify, request
 from langchain.chains import RetrievalQA
-from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 
-# from langchain.embeddings import HuggingFaceEmbeddings
+# from langchain_community.embeddings import HuggingFaceEmbeddings
 from run_localGPT import load_model
 from prompt_template_utils import get_prompt_template
 
 # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.vectorstores import Chroma
+from langchain_community.vectorstores import Chroma
 from werkzeug.utils import secure_filename
 
 from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME

diff --git a/utils.py b/utils.py
@@ -2,9 +2,9 @@
 import csv
 from datetime import datetime
 from constants import EMBEDDING_MODEL_NAME
-from langchain.embeddings import HuggingFaceInstructEmbeddings
-from langchain.embeddings import HuggingFaceBgeEmbeddings
-from langchain.embeddings import HuggingFaceEmbeddings
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
 def log_to_csv(question, answer):