Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chroma db #801

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Binary file added Results/New Microsoft Word Document.docx
Binary file not shown.
Binary file removed SOURCE_DOCUMENTS/Orca_paper.pdf
Binary file not shown.
Binary file added SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf
Binary file not shown.
Binary file added chroma.sqlite3
Binary file not shown.
23 changes: 15 additions & 8 deletions constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

# from dotenv import load_dotenv
from chromadb.config import Settings
# from faissdb.config import Settings

# https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel
from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
from langchain.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader
from langchain.document_loaders import UnstructuredHTMLLoader
from langchain_community.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
from langchain_community.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader
from langchain_community.document_loaders import UnstructuredHTMLLoader


# load_dotenv()
Expand All @@ -19,6 +20,9 @@

MODELS_PATH = "./models"

# INDEX_PATH = "faiss_index.index"
# METADATA_PATH = "faiss_metadata.pkl"

# Can be changed to a specific number
INGEST_THREADS = os.cpu_count() or 8

Expand Down Expand Up @@ -59,7 +63,7 @@

# Default Instructor Model
EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)

# EMBEDDING_MODEL_NAME = 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF'
####
#### OTHER EMBEDDING MODEL OPTIONS
####
Expand Down Expand Up @@ -107,15 +111,18 @@
# MODEL_BASENAME = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"

# LLAMA 3 # use for Apple Silicon
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
MODEL_BASENAME = None
# MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
# MODEL_ID = "TheBloke/Llama-2-7B-32K-Instruct-GPTQ"
# MODEL_BASENAME = None#"Llama-2-7B-32K-Instruct-GPTQ"
# MODEL_BASENAME = "model.safetensors.awq"


# LLAMA 3 # use for NVIDIA GPUs
# MODEL_ID = "unsloth/llama-3-8b-bnb-4bit"
# MODEL_BASENAME = None

# MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
# MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"

# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
Expand Down
104 changes: 102 additions & 2 deletions ingest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import logging
import os
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
import faiss
import pickle
from transformers import AutoModel, AutoTokenizer

import click
import torch
from langchain.docstore.document import Document
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import Chroma , FAISS
from utils import get_embeddings
from langchain_community.docstore.in_memory import InMemoryDocstore

from constants import (
CHROMA_SETTINGS,
Expand All @@ -16,9 +20,13 @@
INGEST_THREADS,
PERSIST_DIRECTORY,
SOURCE_DIRECTORY,
# INDEX_PATH,
# METADATA_PATH,

)



def file_log(logentry):
file1 = open("file_ingest.log", "a")
file1.write(logentry + "\n")
Expand Down Expand Up @@ -142,7 +150,30 @@ def split_documents(documents: list[Document]) -> tuple[list[Document], list[Doc
),
help="Device to run on. (Default is cuda)",
)
def save_faiss_index(db, index_path, metadata_path):
faiss.write_index(db.index, index_path)
metadata = {
"index_to_docstore_id": db.index_to_docstore_id,
"docstore": db.docstore,
}
with open(metadata_path, "wb") as f:
pickle.dump(metadata, f)

def load_faiss_index(index_path, metadata_path):
index = faiss.read_index(index_path)
with open(metadata_path, "rb") as f:
metadata = pickle.load(f)
docstore = metadata["docstore"]
index_to_docstore_id = metadata["index_to_docstore_id"]
db = FAISS(index=index,
docstore=docstore,
index_to_docstore_id=index_to_docstore_id)
return db



def main(device_type):
print(f"Running on device: {device_type}")
# Load documents and split in chunks
logging.info(f"Loading documents from {SOURCE_DIRECTORY}")
documents = load_documents(SOURCE_DIRECTORY)
Expand All @@ -161,6 +192,7 @@ def main(device_type):

(2) Provides additional arguments for instructor and BGE models to improve results, pursuant to the instructions contained on
their respective huggingface repository, project page or github repository.

"""

embeddings = get_embeddings(device_type)
Expand All @@ -173,10 +205,78 @@ def main(device_type):
persist_directory=PERSIST_DIRECTORY,
client_settings=CHROMA_SETTINGS,
)
# if os.path.exists(INDEX_PATH) and os.path.exists(METADATA_PATH):
# db = load_faiss_index(INDEX_PATH, METADATA_PATH)
# logging.info("Loaded FAISS index and metadata from disk.")
# else:

# d = embeddings.shape[1]
# index = faiss.IndexFlatL2(d)
# index.add(embeddings)

# docstore = InMemoryDocstore()
# index_to_docstore_id = {i: doc["id"] for i, doc in enumerate(texts)}

# db = FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id)

# save_faiss_index(db, INDEX_PATH, METADATA_PATH)
# logging.info("Saved FAISS index and metadata to disk.")

# Load the model and tokenizer
# model_name = EMBEDDING_MODEL_NAME
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModel.from_pretrained(model_name)
# # Tokenize the input texts
# inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
# # Get the embeddings from the model
# with torch.no_grad():
# outputs = model(**inputs)
# # Extract the last hidden states (embeddings)
# embeddings = outputs.last_hidden_state
# # Pool the embeddings (e.g., mean pooling)
# pooled_embeddings = embeddings.mean(dim=1)
# # Convert the embeddings to a NumPy array
# numpy_embeddings = pooled_embeddings.cpu().numpy()

# # Get the dimension of the vectors
# vector_dimension = numpy_embeddings.shape[1]

# Create the FAISS index
# faiss_index = faiss.IndexFlatL2(vector_dimension)
# print(faiss_index.is_trained)
# # Add the embeddings to the index
# faiss_index.add(numpy_embeddings)
# # Save the index
# faiss.write_index(faiss_index, index_file_path)
# print(f"Index saved to {index_file_path}")
# print(faiss_index.ntotal)

# Define the directory and file name to save the index
# persist_dir = PERSIST_DIRECTORY
# index_file_path = os.path.join(persist_dir, 'faiss_index.index')

# # Load the index to verify
# faiss_index_loaded = faiss.read_index(index_file_path)
# print(f"Index loaded from {index_file_path}")

# Verify the loaded index
# print(f"Number of vectors in the loaded index: {faiss_index_loaded.ntotal}")

# db = FAISS.from_documents(
# texts,
# embeddings,
# # persist_directory=PERSIST_DIRECTORY,
# # client_settings=CHROMA_SETTINGS,
# )
# db.save_local("DB/faiss")

import argparse

if __name__ == "__main__":
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO
)
main()
parser = argparse.ArgumentParser(description="Ingest script for localGPT")
parser.add_argument("--device_type", type=str, required=True, help="Device type (cpu or gpu)")
args = parser.parse_args()
main(args.device_type)
8 changes: 5 additions & 3 deletions localGPT_UI.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
import subprocess
import streamlit as st
from run_localGPT import load_model
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import Chroma
from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from streamlit_extras.add_vertical_space import add_vertical_space
from streamlit_extras import add_vertical_space
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

# Use the function in your Streamlit app
add_vertical_space(10)

def model_memory():
# Adding history to the model.
Expand Down
61 changes: 56 additions & 5 deletions run_localGPT.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,27 @@
import torch
import utils
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # for streaming response
from langchain.callbacks.manager import CallbackManager
from transformers import AutoModel, AutoTokenizer

from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.docstore.in_memory import InMemoryDocstore
import faiss
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.docstore.in_memory import InMemoryDocstore

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

from prompt_template_utils import get_prompt_template
from utils import get_embeddings

# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import Chroma ,FAISS
from langchain_community.vectorstores import chroma
from transformers import (
GenerationConfig,
pipeline,
Expand All @@ -38,6 +47,9 @@
CHROMA_SETTINGS,
)

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
"""
Expand Down Expand Up @@ -132,14 +144,51 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):
logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}")

# load the vectorstore
db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
db = Chroma(persist_directory=PERSIST_DIRECTORY,
embedding_function=embeddings,
client_settings=CHROMA_SETTINGS
)


# print(embeddings)





# Initialize the FAISS index
# faiss_index = faiss.IndexFlatL2(768)

# # # Initialize the docstore
# docstore = InMemoryDocstore()
# # # Initialize the index_to_docstore_id
# index_to_docstore_id = {}
# # Add the embeddings to the index
# faiss_index.add(embeddings)
# Loading the saved embeddings
# db =FAISS.load_local("DB/faiss", embeddings, allow_dangerous_deserialization=True)
# db = FAISS(
# embedding_function=embeddings,
# index=faiss_index,
# # docstore=docstore,
# # index_to_docstore_id=index_to_docstore_id
# )

# # Add documents and their embeddings to the FAISS index and the docstore
# for i, (text, embedding) in enumerate(zip(df['Text'].tolist(), embeddings)):
# db.add_document(doc_id=i, text=text, embedding=embedding)
retriever = db.as_retriever()

# get the prompt template and memory if set by the user.
prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, history=use_history)
prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type,
history=use_history)

# load the llm pipeline
llm = load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME, LOGGING=logging)

# # Ensure the model is on CPU
# device = torch.device("cpu")
# llm.to(device)

if use_history:
qa = RetrievalQA.from_chain_type(
Expand Down Expand Up @@ -256,7 +305,9 @@ def main(device_type, show_sources, use_history, model_type, save_qa):
if query == "exit":
break
# Get the answer from the chain
res = qa(query)
# res = qa(query)
res = qa.invoke(query)

answer, docs = res["result"], res["source_documents"]

# Print the result
Expand Down
6 changes: 3 additions & 3 deletions run_localGPT_API.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import torch
from flask import Flask, jsonify, request
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.embeddings import HuggingFaceInstructEmbeddings

# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain_community.embeddings import HuggingFaceEmbeddings
from run_localGPT import load_model
from prompt_template_utils import get_prompt_template

# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import Chroma
from werkzeug.utils import secure_filename

from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME
Expand Down
6 changes: 3 additions & 3 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import csv
from datetime import datetime
from constants import EMBEDDING_MODEL_NAME
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings


def log_to_csv(question, answer):
Expand Down