pixegami · issadarkthing · Aug 3, 2024 · Aug 3, 2024 · Aug 3, 2024 · Aug 3, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 *.pyc
 .DS_Store
 backup
-chroma
+chroma
+venv
diff --git a/README.md b/README.md
@@ -1 +1,14 @@
 # rag-tutorial-v2
+
+## Getting Started
+Before running, ensure that ollama has been installed. If not, please install it [here](https://ollama.com/download). After that you **must** run `ollama serve`.
+
+1. `python -m venv venv`
+2. `source venv/bin/activate`
+3. `pip install -r requirements.txt`
+4. `python ./populate_database.py`
+5. `python ./query_data.py "How much each players get for each round in Monopoly?"`
+
+## Update document
+1. Place the pdf file inside the `data`
+2. `python ./populate_database.py`
diff --git a/get_embedding_function.py b/get_embedding_function.py
@@ -3,8 +3,8 @@
 
 
 def get_embedding_function():
-    embeddings = BedrockEmbeddings(
-        credentials_profile_name="default", region_name="us-east-1"
-    )
-    # embeddings = OllamaEmbeddings(model="nomic-embed-text")
+    # embeddings = BedrockEmbeddings(
+    #     credentials_profile_name="default", region_name="us-east-1"
+    # )
+    embeddings = OllamaEmbeddings(model="mxbai-embed-large")
     return embeddings
diff --git a/populate_database.py b/populate_database.py
@@ -1,11 +1,11 @@
 import argparse
 import os
 import shutil
-from langchain.document_loaders.pdf import PyPDFDirectoryLoader
+from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain.schema.document import Document
 from get_embedding_function import get_embedding_function
-from langchain.vectorstores.chroma import Chroma
+from langchain_chroma import Chroma
 
 
 CHROMA_PATH = "chroma"
@@ -67,7 +67,6 @@ def add_to_chroma(chunks: list[Document]):
         print(f"👉 Adding new documents: {len(new_chunks)}")
         new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
         db.add_documents(new_chunks, ids=new_chunk_ids)
-        db.persist()
     else:
         print("✅ No new documents to add")
 

diff --git a/query_data.py b/query_data.py
@@ -1,5 +1,5 @@
 import argparse
-from langchain.vectorstores.chroma import Chroma
+from langchain_chroma import Chroma
 from langchain.prompts import ChatPromptTemplate
 from langchain_community.llms.ollama import Ollama
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
 pypdf
 langchain
-chromadb # Vector storage
+langchain_community
+langchain_chroma
 pytest
 boto3