Skip to content

Commit

Permalink
test: reorganize docstore test suite to isolate dataframe tests (#8684)
Browse files Browse the repository at this point in the history
* reorganize docstore test suite to isolate dataframe tests

* improve docstring

* include FilterDocumentsTestWithDataframe in InMemoryDocumentStore tests
  • Loading branch information
anakin87 authored Jan 8, 2025
1 parent 5539f6c commit bc30105
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 106 deletions.
249 changes: 145 additions & 104 deletions haystack/testing/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,74 +174,86 @@ def test_delete_documents_non_existing_document(self, document_store: DocumentSt
assert document_store.count_documents() == 1


class FilterableDocsFixtureMixin:
def create_filterable_docs(include_dataframe_docs: bool = False) -> List[Document]:
"""
Mixin class that adds a filterable_docs() fixture to a test class.
Create a list of filterable documents to be used in the filterable_docs and filterable_docs_with_dataframe fixtures.
"""

@pytest.fixture
def filterable_docs(self) -> List[Document]:
"""Fixture that returns a list of Documents that can be used to test filtering."""
documents = []
for i in range(3):
documents.append(
Document(
content=f"A Foo Document {i}",
meta={
"name": f"name_{i}",
"page": "100",
"chapter": "intro",
"number": 2,
"date": "1969-07-21T20:17:40",
},
embedding=_random_embeddings(768),
)
documents = []
for i in range(3):
documents.append(
Document(
content=f"A Foo Document {i}",
meta={
"name": f"name_{i}",
"page": "100",
"chapter": "intro",
"number": 2,
"date": "1969-07-21T20:17:40",
},
embedding=_random_embeddings(768),
)
documents.append(
Document(
content=f"A Bar Document {i}",
meta={
"name": f"name_{i}",
"page": "123",
"chapter": "abstract",
"number": -2,
"date": "1972-12-11T19:54:58",
},
embedding=_random_embeddings(768),
)
)
documents.append(
Document(
content=f"A Bar Document {i}",
meta={
"name": f"name_{i}",
"page": "123",
"chapter": "abstract",
"number": -2,
"date": "1972-12-11T19:54:58",
},
embedding=_random_embeddings(768),
)
documents.append(
Document(
content=f"A Foobar Document {i}",
meta={
"name": f"name_{i}",
"page": "90",
"chapter": "conclusion",
"number": -10,
"date": "1989-11-09T17:53:00",
},
embedding=_random_embeddings(768),
)
)
documents.append(
Document(
content=f"A Foobar Document {i}",
meta={
"name": f"name_{i}",
"page": "90",
"chapter": "conclusion",
"number": -10,
"date": "1989-11-09T17:53:00",
},
embedding=_random_embeddings(768),
)
documents.append(
Document(
content=f"Document {i} without embedding",
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"},
)
)
documents.append(
Document(
content=f"Document {i} without embedding",
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"},
)
)
documents.append(
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1)
)
documents.append(
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2)
)

if include_dataframe_docs:
for i in range(3):
documents.append(Document(dataframe=pd.DataFrame([i]), meta={"name": f"table_doc_{i}"}))
documents.append(
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1)
)
documents.append(
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2)
)
return documents

return documents


class FilterableDocsFixtureMixin:
"""
Mixin class that adds a filterable_docs() fixture to a test class.
"""

@pytest.fixture
def filterable_docs(self) -> List[Document]:
"""Fixture that returns a list of Documents that can be used to test filtering."""
return create_filterable_docs(include_dataframe_docs=False)


class FilterDocumentsTest(AssertDocumentsEqualMixin, FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using different types of filters.
Utility class to test a Document Store `filter_documents` method using different types of filters.
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
Example usage:
Expand Down Expand Up @@ -270,16 +282,6 @@ def test_comparison_equal(self, document_store, filterable_docs):
result = document_store.filter_documents(filters={"field": "meta.number", "operator": "==", "value": 100})
self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") == 100])

def test_comparison_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with == comparator and dataframe"""
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result, [d for d in filterable_docs if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1]))]
)

def test_comparison_equal_with_none(self, document_store, filterable_docs):
"""Test filter_documents() with == comparator and None"""
document_store.write_documents(filterable_docs)
Expand All @@ -293,16 +295,6 @@ def test_comparison_not_equal(self, document_store, filterable_docs):
result = document_store.filter_documents({"field": "meta.number", "operator": "!=", "value": 100})
self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") != 100])

def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with != comparator and dataframe"""
document_store.write_documents(filterable_docs)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result, [d for d in filterable_docs if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1]))]
)

def test_comparison_not_equal_with_none(self, document_store, filterable_docs):
"""Test filter_documents() with != comparator and None"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -340,12 +332,6 @@ def test_comparison_greater_than_with_string(self, document_store, filterable_do
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": ">", "value": "1"})

def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with > comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])})

def test_comparison_greater_than_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with > comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -389,14 +375,6 @@ def test_comparison_greater_than_equal_with_string(self, document_store, filtera
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": ">=", "value": "1"})

def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with >= comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])}
)

def test_comparison_greater_than_equal_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with >= comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -440,12 +418,6 @@ def test_comparison_less_than_with_string(self, document_store, filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": "<", "value": "1"})

def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with < comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])})

def test_comparison_less_than_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with < comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -489,14 +461,6 @@ def test_comparison_less_than_equal_with_string(self, document_store, filterable
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "meta.number", "operator": "<=", "value": "1"})

def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs):
"""Test filter_documents() with <= comparator and dataframe"""
document_store.write_documents(filterable_docs)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])}
)

def test_comparison_less_than_equal_with_list(self, document_store, filterable_docs):
"""Test filter_documents() with <= comparator and list"""
document_store.write_documents(filterable_docs)
Expand Down Expand Up @@ -638,6 +602,83 @@ def test_missing_condition_value_key(self, document_store, filterable_docs):
)


class FilterableDocsFixtureMixinWithDataframe:
"""
Mixin class that adds a filterable_docs_with_dataframe() fixture to a test class, including dataframe documents.
"""

@pytest.fixture
def filterable_docs_with_dataframe(self) -> List[Document]:
"""Fixture that returns a list of Documents including dataframe documents."""
documents = create_filterable_docs(include_dataframe_docs=True)

return documents


class FilterDocumentsTestWithDataframe(AssertDocumentsEqualMixin, FilterableDocsFixtureMixinWithDataframe):
"""
Utility class to test a Document Store `filter_documents` method specifically for DataFrame documents.
"""

def test_comparison_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with == comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result,
[
d
for d in filterable_docs_with_dataframe
if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1]))
],
)

def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with != comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
result = document_store.filter_documents(
filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])}
)
self.assert_documents_are_equal(
result,
[
d
for d in filterable_docs_with_dataframe
if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1]))
],
)

def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with > comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])})

def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with >= comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])}
)

def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with < comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])})

def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
"""Test filter_documents() with <= comparator and dataframe"""
document_store.write_documents(filterable_docs_with_dataframe)
with pytest.raises(FilterError):
document_store.filter_documents(
filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])}
)


class DocumentStoreBaseTests(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest):
@pytest.fixture
def document_store(self) -> DocumentStore:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
enhancements:
- |
Reorganized the document store test suite to isolate dataframe filter tests.
This change prepares for potential future deprecation of the Document class's dataframe field.
4 changes: 2 additions & 2 deletions test/document_stores/test_in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
from haystack import Document
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.testing.document_store import DocumentStoreBaseTests
from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe


class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
class TestMemoryDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe): # pylint: disable=R0904
"""
Test InMemoryDocumentStore's specific features
"""
Expand Down

0 comments on commit bc30105

Please sign in to comment.