Skip to content

Commit

Permalink
Remove to_dict and from_dict and update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
silvanocerza committed Mar 19, 2024
1 parent 781533e commit eb1a48c
Showing 1 changed file with 24 additions and 14 deletions.
38 changes: 24 additions & 14 deletions haystack/components/evaluators/answer_f1.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Any, Dict, List
from typing import Dict, List

from haystack import default_from_dict, default_to_dict
from haystack.core.component import component


Expand All @@ -12,14 +11,21 @@ class AnswerF1Evaluator:
The result is a number from 0.0 to 1.0.
Each question can have multiple ground truth answers and multiple predicted answers.
"""
def to_dict(self) -> Dict[str, Any]:
return default_to_dict(self)
Usage example:
```python
from haystack.components.evaluators import AnswerF1Evaluator
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "AnswerF1Evaluator":
return default_from_dict(cls, data)
evaluator = AnswerF1Evaluator()
result = evaluator.run(
questions=["What is the capital of Germany?", "What is the capital of France?"],
ground_truth_answers=[["Berlin"], ["Paris"]],
predicted_answers=[["Berlin"], ["London"]],
)
print(result["result"])
# 0.5
```
"""

@component.output_types(result=float)
def run(
Expand All @@ -29,12 +35,16 @@ def run(
Run the AnswerF1Evaluator on the given inputs.
All lists must have the same length.
:param questions: A list of questions.
:param ground_truth_answers: A list of expected answers for each question.
:param predicted_answers: A list of predicted answers for each question.
:returns: A dictionary with the following outputs:
* `result` - A number from 0.0 to 1.0 that represents the average F1 score of the predicted
answer matched with the ground truth answers.
:param questions:
A list of questions.
:param ground_truth_answers:
A list of expected answers for each question.
:param predicted_answers:
A list of predicted answers for each question.
:returns:
A dictionary with the following outputs:
- `result`: A number from 0.0 to 1.0 that represents the average F1 score of the predicted
answer matched with the ground truth answers.
"""
if not len(questions) == len(ground_truth_answers) == len(predicted_answers):
raise ValueError("The length of questions, ground_truth_answers, and predicted_answers must be the same.")
Expand Down

0 comments on commit eb1a48c

Please sign in to comment.