bug: run parameter "ranking_mode" does not override init param in met…

…a field ranker (#7375) * bug: run parameter ranking_mode does not override init param in metafield ranker * Added a release note * Used pytest.approx for comparing floating point numbers in unit test
deepset-ai · Mar 19, 2024 · 2807193 · 2807193
1 parent d4df9c0
commit 2807193
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 5 deletions.
diff --git a/haystack/components/rankers/meta_field.py b/haystack/components/rankers/meta_field.py
@@ -250,7 +250,7 @@ def run(
         # Add the docs missing the meta_field back on the end
         sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta]
         sorted_documents = sorted_by_meta + docs_missing_meta_field
-        sorted_documents = self._merge_rankings(documents, sorted_documents, weight)
+        sorted_documents = self._merge_rankings(documents, sorted_documents, weight, ranking_mode)
         return {"documents": sorted_documents[:top_k]}
 
     def _parse_meta(
@@ -296,18 +296,22 @@ def _parse_meta(
         return meta_values
 
     def _merge_rankings(
-        self, documents: List[Document], sorted_documents: List[Document], weight: float
+        self,
+        documents: List[Document],
+        sorted_documents: List[Document],
+        weight: float,
+        ranking_mode: Literal["reciprocal_rank_fusion", "linear_score"],
     ) -> List[Document]:
         """
         Merge the two different rankings for Documents sorted both by their content and by their meta field.
         """
         scores_map: Dict = defaultdict(int)
 
-        if self.ranking_mode == "reciprocal_rank_fusion":
+        if ranking_mode == "reciprocal_rank_fusion":
             for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
                 scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - weight)
                 scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * weight
-        elif self.ranking_mode == "linear_score":
+        elif ranking_mode == "linear_score":
             for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
                 score = float(0)
                 if document.score is None:

diff --git a/releasenotes/notes/updated-ranking-mode-in-meta-field-ranker-d1f1304cc422b202.yaml b/releasenotes/notes/updated-ranking-mode-in-meta-field-ranker-d1f1304cc422b202.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    Fixed a bug in the `MetaFieldRanker`: when the `ranking_mode` parameter was overridden in the `run` method,
+    the component was incorrectly using the `ranking_mode` parameter set in the `__init__` method.
diff --git a/test/components/rankers/test_metafield.py b/test/components/rankers/test_metafield.py
@@ -196,7 +196,7 @@ def test_reciprocal_rank_fusion(self):
         ]
         output = ranker.run(documents=docs_before)
         docs_after = output["documents"]
-        assert docs_after[0].score == 0.01626123744050767
+        assert docs_after[0].score == pytest.approx(0.016261, abs=1e-5)
 
     @pytest.mark.parametrize("score", [-1, 2, 1.3, 2.1])
     def test_linear_score_raises_warning_if_doc_wrong_score(self, score, caplog):
@@ -221,3 +221,18 @@ def test_linear_score_raises_raises_warning_if_doc_without_score(self, caplog):
         with caplog.at_level(logging.WARNING):
             ranker.run(documents=docs_before)
             assert "The score wasn't provided; defaulting to 0." in caplog.text
+
+    def test_different_ranking_mode_for_init_vs_run(self):
+        ranker = MetaFieldRanker(meta_field="rating", ranking_mode="linear_score", weight=0.5)
+        docs_before = [
+            Document(content="abc", meta={"rating": 1.3}, score=0.3),
+            Document(content="abc", meta={"rating": 0.7}, score=0.4),
+            Document(content="abc", meta={"rating": 2.1}, score=0.6),
+        ]
+        output = ranker.run(documents=docs_before)
+        docs_after = output["documents"]
+        assert docs_after[0].score == 0.8
+
+        output = ranker.run(documents=docs_before, ranking_mode="reciprocal_rank_fusion")
+        docs_after = output["documents"]
+        assert docs_after[0].score == pytest.approx(0.016261, abs=1e-5)