From ccbd9e4a0d0e7fdd162b3ab48eed49f130412df4 Mon Sep 17 00:00:00 2001 From: raziman Date: Sun, 4 Aug 2024 00:18:14 +0800 Subject: [PATCH 1/6] fix library deprecations --- populate_database.py | 4 ++-- query_data.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/populate_database.py b/populate_database.py index 3d2a1ab8..d1f1a476 100644 --- a/populate_database.py +++ b/populate_database.py @@ -1,11 +1,11 @@ import argparse import os import shutil -from langchain.document_loaders.pdf import PyPDFDirectoryLoader +from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.schema.document import Document from get_embedding_function import get_embedding_function -from langchain.vectorstores.chroma import Chroma +from langchain_chroma import Chroma CHROMA_PATH = "chroma" diff --git a/query_data.py b/query_data.py index 33299e58..482ef57a 100644 --- a/query_data.py +++ b/query_data.py @@ -1,5 +1,5 @@ import argparse -from langchain.vectorstores.chroma import Chroma +from langchain_chroma import Chroma from langchain.prompts import ChatPromptTemplate from langchain_community.llms.ollama import Ollama From 5bbb019377d3c452de815f80d1ecea3cb8077110 Mon Sep 17 00:00:00 2001 From: raziman Date: Sun, 4 Aug 2024 00:18:28 +0800 Subject: [PATCH 2/6] use local LLM --- get_embedding_function.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/get_embedding_function.py b/get_embedding_function.py index 79d04113..07986737 100644 --- a/get_embedding_function.py +++ b/get_embedding_function.py @@ -3,8 +3,8 @@ def get_embedding_function(): - embeddings = BedrockEmbeddings( - credentials_profile_name="default", region_name="us-east-1" - ) - # embeddings = OllamaEmbeddings(model="nomic-embed-text") + # embeddings = BedrockEmbeddings( + # credentials_profile_name="default", region_name="us-east-1" + # ) + embeddings = OllamaEmbeddings(model="mxbai-embed-large") return embeddings From 267b9a06ce7006a57b54ad0b100ba7f53c2fd55d Mon Sep 17 00:00:00 2001 From: raziman Date: Sun, 4 Aug 2024 00:18:34 +0800 Subject: [PATCH 3/6] ignore venv --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ea09c3ec..5ab5d45a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc .DS_Store backup -chroma \ No newline at end of file +chroma +venv From 30a1678a69a0eb5e2dc2a9026ee82e567483ec56 Mon Sep 17 00:00:00 2001 From: raziman Date: Sun, 4 Aug 2024 00:25:05 +0800 Subject: [PATCH 4/6] add missing dependencies --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b290b554..d5c081c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ pypdf langchain -chromadb # Vector storage +langchain_community +langchain_chroma pytest boto3 From 73ce044ca6bf1e95a01c9d020ad08e0489333e58 Mon Sep 17 00:00:00 2001 From: raziman Date: Sun, 4 Aug 2024 00:25:24 +0800 Subject: [PATCH 5/6] fix missing attribute --- populate_database.py | 1 - 1 file changed, 1 deletion(-) diff --git a/populate_database.py b/populate_database.py index d1f1a476..966643f9 100644 --- a/populate_database.py +++ b/populate_database.py @@ -67,7 +67,6 @@ def add_to_chroma(chunks: list[Document]): print(f"👉 Adding new documents: {len(new_chunks)}") new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks] db.add_documents(new_chunks, ids=new_chunk_ids) - db.persist() else: print("✅ No new documents to add") From 38e8d98c271b8f300aa32b71d0734a850bed104e Mon Sep 17 00:00:00 2001 From: raziman Date: Sun, 4 Aug 2024 00:38:17 +0800 Subject: [PATCH 6/6] update documentation --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 6af3a525..4af16f6c 100644 --- a/README.md +++ b/README.md @@ -1 +1,14 @@ # rag-tutorial-v2 + +## Getting Started +Before running, ensure that ollama has been installed. If not, please install it [here](https://ollama.com/download). After that you **must** run `ollama serve`. + +1. `python -m venv venv` +2. `source venv/bin/activate` +3. `pip install -r requirements.txt` +4. `python ./populate_database.py` +5. `python ./query_data.py "How much each players get for each round in Monopoly?"` + +## Update document +1. Place the pdf file inside the `data` +2. `python ./populate_database.py`