Skip to content

Commit

Permalink
bug: run parameter "ranking_mode" does not override init param in met…
Browse files Browse the repository at this point in the history
…a field ranker (#7375)

* bug: run parameter ranking_mode does not override init param in metafield ranker

* Added a release note

* Used pytest.approx for comparing floating point numbers in unit test
  • Loading branch information
mohitlal31 authored Mar 19, 2024
1 parent d4df9c0 commit 2807193
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
12 changes: 8 additions & 4 deletions haystack/components/rankers/meta_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def run(
# Add the docs missing the meta_field back on the end
sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta]
sorted_documents = sorted_by_meta + docs_missing_meta_field
sorted_documents = self._merge_rankings(documents, sorted_documents, weight)
sorted_documents = self._merge_rankings(documents, sorted_documents, weight, ranking_mode)
return {"documents": sorted_documents[:top_k]}

def _parse_meta(
Expand Down Expand Up @@ -296,18 +296,22 @@ def _parse_meta(
return meta_values

def _merge_rankings(
self, documents: List[Document], sorted_documents: List[Document], weight: float
self,
documents: List[Document],
sorted_documents: List[Document],
weight: float,
ranking_mode: Literal["reciprocal_rank_fusion", "linear_score"],
) -> List[Document]:
"""
Merge the two different rankings for Documents sorted both by their content and by their meta field.
"""
scores_map: Dict = defaultdict(int)

if self.ranking_mode == "reciprocal_rank_fusion":
if ranking_mode == "reciprocal_rank_fusion":
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - weight)
scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * weight
elif self.ranking_mode == "linear_score":
elif ranking_mode == "linear_score":
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
score = float(0)
if document.score is None:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
fixes:
- |
Fixed a bug in the `MetaFieldRanker`: when the `ranking_mode` parameter was overridden in the `run` method,
the component was incorrectly using the `ranking_mode` parameter set in the `__init__` method.
17 changes: 16 additions & 1 deletion test/components/rankers/test_metafield.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_reciprocal_rank_fusion(self):
]
output = ranker.run(documents=docs_before)
docs_after = output["documents"]
assert docs_after[0].score == 0.01626123744050767
assert docs_after[0].score == pytest.approx(0.016261, abs=1e-5)

@pytest.mark.parametrize("score", [-1, 2, 1.3, 2.1])
def test_linear_score_raises_warning_if_doc_wrong_score(self, score, caplog):
Expand All @@ -221,3 +221,18 @@ def test_linear_score_raises_raises_warning_if_doc_without_score(self, caplog):
with caplog.at_level(logging.WARNING):
ranker.run(documents=docs_before)
assert "The score wasn't provided; defaulting to 0." in caplog.text

def test_different_ranking_mode_for_init_vs_run(self):
ranker = MetaFieldRanker(meta_field="rating", ranking_mode="linear_score", weight=0.5)
docs_before = [
Document(content="abc", meta={"rating": 1.3}, score=0.3),
Document(content="abc", meta={"rating": 0.7}, score=0.4),
Document(content="abc", meta={"rating": 2.1}, score=0.6),
]
output = ranker.run(documents=docs_before)
docs_after = output["documents"]
assert docs_after[0].score == 0.8

output = ranker.run(documents=docs_before, ranking_mode="reciprocal_rank_fusion")
docs_after = output["documents"]
assert docs_after[0].score == pytest.approx(0.016261, abs=1e-5)

0 comments on commit 2807193

Please sign in to comment.