Skip to content

Commit

Permalink
trying to fix vectorstore being empty
Browse files Browse the repository at this point in the history
  • Loading branch information
mdr223 committed Sep 8, 2023
1 parent ab467e7 commit e640f7c
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
1 change: 1 addition & 0 deletions A2rchi/chains/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(self):


def update_vectorstore(self):
self.dataManager.update_vectorstore()
self.vectorstore = self.dataManager.fetch_vectorstore()
self.chain = BaseChain.from_llm(self.llm, self.vectorstore.as_retriever(), return_source_documents=True)

Expand Down
5 changes: 3 additions & 2 deletions A2rchi/utils/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def update_vectorstore(self):
# remove obsolete files
files_to_remove = list(set(files_in_vstore) - set(files_in_data))
ids_to_remove = [id for id, file in zip(ids_in_vstore, files_in_vstore) if file in files_to_remove]
vstore._collection.delete(ids_to_remove)
if ids_to_remove:
vstore._collection.delete(ids_to_remove)

# add new files to vectorstore; will do nothing if files_to_add is empty
files_to_add = list(set(files_in_data) - set(files_in_vstore))
Expand All @@ -70,7 +71,7 @@ def loader(self, file_path):
# return the document loader from a path, with the correct loader given the extension
_, file_extension = os.path.splitext(file_path)
if file_extension == ".txt" : return TextLoader(file_path)
elif file_extension == ".html" : return BSHTMLLoader(file_path)
elif file_extension == ".html" : return BSHTMLLoader(file_path, bs_kwargs={"features": "html.parser"})
elif file_extension == ".pdf" : return PyPDFLoader(file_path)
else: print(file_path, " Error: format not supported")

Expand Down

0 comments on commit e640f7c

Please sign in to comment.