Skip to content

Commit

Permalink
refactor(docstore): remove DocumentStore alias and enhance test coverage
Browse files Browse the repository at this point in the history
- Removed the `DocumentStore` alias for `ChromaDBDocStore` to streamline the codebase and avoid confusion.
- Expanded the test suite in `test_docstore.py` to cover multiple document store implementations (`LanceDBDocStore`, `ChromaDBDocStore`, and `BM25DocStore`).
- Introduced parameterized tests using Hypothesis to ensure robust testing across different document store types.
- Simplified the `test_add_documents` function to focus on single document retrieval, improving test clarity and execution speed.
  • Loading branch information
ericmjl committed Mar 18, 2024
1 parent 7604d8d commit 7103b84
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 22 deletions.
3 changes: 0 additions & 3 deletions llamabot/components/docstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,6 @@ def reset(self):
)


# Define DocumentStore as an alias for ChromaDBDocStore for backwards compatibility.
DocumentStore = ChromaDBDocStore

registry = get_registry()
func = registry.get(name="sentence-transformers").create()

Expand Down
50 changes: 31 additions & 19 deletions tests/components/test_docstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,40 @@


from pathlib import Path
from llamabot.components.docstore import DocumentStore
from llamabot.components.docstore import BM25DocStore, ChromaDBDocStore, LanceDBDocStore
from hypothesis import HealthCheck, given, settings, strategies as st


def test_document_store():
"""Test the DocumentStore class."""
docstore = DocumentStore(collection_name="test_collection")
assert docstore.collection_name == "test_collection"
docstore.client.delete_collection("test_collection")
def lancedb():
"""Return a LanceDBDocStore."""
store = LanceDBDocStore(table_name="test_lancedb", storage_path=Path("/tmp"))
store.reset()
return store


def test_add_documents(tmp_path: Path, docstore_class):
"""Test the add_documents method of DocumentStore."""
# Create a temporary collection for testing
collection_name = "test_collection"
storage_path = Path.home() / ".llamabot" / "test_chroma.db"
docstore = DocumentStore(collection_name=collection_name, storage_path=storage_path)
def chromadb():
"""Return a ChromaDBDocStore."""
store = ChromaDBDocStore(collection_name="test_chromadb", storage_path=Path("/tmp"))
store.reset()
return store


def bm25():
"""Return a BM25DocStore."""
return BM25DocStore()


docstore_strategies = [
st.just(lancedb()),
st.just(chromadb()),
st.just(bm25()),
]


@given(docstore=st.one_of(docstore_strategies))
@settings(suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None)
def test_add_documents(tmp_path: Path, docstore):
"""Test the add_documents method of DocumentStore."""
# Add a single document
# document_path = Path("path/to/document.txt")
document_path = tmp_path / "document.txt"
Expand All @@ -32,9 +49,6 @@ def test_add_documents(tmp_path: Path, docstore_class):
# Assert that the retrieved document matches the added document
assert retrieved_documents == ["content of the document"]

# Reset the document store
docstore.reset()

# Add multiple documents
document_paths = [tmp_path / "document1.txt", tmp_path / "document2.txt"]
for i, document_path in enumerate(document_paths):
Expand All @@ -43,12 +57,10 @@ def test_add_documents(tmp_path: Path, docstore_class):
docstore.add_documents(document_paths=document_paths)

# Retrieve the documents from the store
retrieved_documents = docstore.retrieve("query", n_results=2)
retrieved_documents = docstore.retrieve("document1", n_results=1)

# Assert that the retrieved documents match the added documents
assert set(retrieved_documents) == set(
["content of document1", "content of document2"]
)
assert set(retrieved_documents) == set(["content of document1"])

# Clean up the temporary collection
docstore.reset()

0 comments on commit 7103b84

Please sign in to comment.