-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into docstrings-linting
- Loading branch information
Showing
23 changed files
with
477 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ jobs: | |
name: Add new issues to project for triage | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/[email protected].0 | ||
- uses: actions/[email protected].1 | ||
with: | ||
project-url: https://github.com/orgs/deepset-ai/projects/5 | ||
github-token: ${{ secrets.GH_PROJECT_PAT }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
loaders: | ||
- type: haystack_pydoc_tools.loaders.CustomPythonLoader | ||
search_path: [../../../haystack/components/evaluators] | ||
modules: | ||
[ | ||
"answer_exact_match", | ||
"context_relevance", | ||
"document_map", | ||
"document_mrr", | ||
"document_recall", | ||
"evaluation_result", | ||
"document_recall", | ||
"faithfulness", | ||
"llm_evaluator", | ||
"sas_evaluator", | ||
] | ||
ignore_when_discovered: ["__init__"] | ||
processors: | ||
- type: filter | ||
expression: | ||
documented_only: true | ||
do_not_filter_modules: false | ||
skip_empty_modules: true | ||
- type: smart | ||
- type: crossref | ||
renderer: | ||
type: haystack_pydoc_tools.renderers.ReadmeCoreRenderer | ||
excerpt: Evaluate your pipelines or individual components. | ||
category_slug: haystack-api | ||
title: Evaluators | ||
slug: evaluators-api | ||
order: 5 | ||
markdown: | ||
descriptive_class_title: false | ||
classdef_code_block: false | ||
descriptive_module_title: true | ||
add_method_class_prefix: true | ||
add_member_class_prefix: false | ||
filename: evaluators_api.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
from typing import Any, Dict, List, Optional | ||
|
||
from numpy import mean as np_mean | ||
|
||
from haystack import default_from_dict | ||
from haystack.components.evaluators.llm_evaluator import LLMEvaluator | ||
from haystack.core.component import component | ||
from haystack.utils import Secret, deserialize_secrets_inplace | ||
|
||
# Private global variable for default examples to include in the prompt if the user does not provide any examples | ||
_DEFAULT_EXAMPLES = [ | ||
{ | ||
"inputs": { | ||
"questions": "What is the capital of Germany?", | ||
"contexts": ["Berlin is the capital of Germany and was founded in 1244."], | ||
}, | ||
"outputs": { | ||
"statements": ["Berlin is the capital of Germany.", "Berlin was founded in 1244."], | ||
"statement_scores": [1, 0], | ||
}, | ||
}, | ||
{ | ||
"inputs": {"questions": "What is the capital of France?", "contexts": ["Berlin is the capital of Germany."]}, | ||
"outputs": {"statements": ["Berlin is the capital of Germany."], "statement_scores": [0]}, | ||
}, | ||
{ | ||
"inputs": {"questions": "What is the capital of Italy?", "contexts": ["Rome is the capital of Italy."]}, | ||
"outputs": {"statements": ["Rome is the capital of Italy."], "statement_scores": [1]}, | ||
}, | ||
] | ||
|
||
|
||
class ContextRelevanceEvaluator(LLMEvaluator): | ||
""" | ||
Evaluator that checks if a provided context is relevant to the question. | ||
An LLM separates the answer into multiple statements and checks whether the statement can be inferred from the | ||
context or not. The final score for the full answer is a number from 0.0 to 1.0. It represents the proportion of | ||
statements that can be inferred from the provided contexts. | ||
Usage example: | ||
```python | ||
from haystack.components.evaluators import ContextRelevanceEvaluator | ||
questions = ["Who created the Python language?"] | ||
contexts = [ | ||
[ | ||
"Python, created by Guido van Rossum in the late 1980s, is a high-level general-purpose programming language. Its design philosophy emphasizes code readability, and its language constructs aim to help programmers write clear, logical code for both small and large-scale software projects." | ||
], | ||
] | ||
evaluator = ContextRelevanceEvaluator() | ||
result = evaluator.run(questions=questions, contexts=contexts) | ||
print(result["score"]) | ||
# 1.0 | ||
print(result["individual_scores"]) | ||
# [1.0] | ||
print(result["results"]) | ||
# [{'statements': ['Python, created by Guido van Rossum in the late 1980s.'], 'statement_scores': [1], 'score': 1.0}] | ||
``` | ||
""" | ||
|
||
def __init__( | ||
self, | ||
examples: Optional[List[Dict[str, Any]]] = None, | ||
api: str = "openai", | ||
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), | ||
): | ||
""" | ||
Creates an instance of ContextRelevanceEvaluator. | ||
:param examples: | ||
Optional few-shot examples conforming to the expected input and output format of ContextRelevanceEvaluator. | ||
Default examples will be used if none are provided. | ||
Each example must be a dictionary with keys "inputs" and "outputs". | ||
"inputs" must be a dictionary with keys "questions" and "contexts". | ||
"outputs" must be a dictionary with "statements" and "statement_scores". | ||
Expected format: | ||
[{ | ||
"inputs": { | ||
"questions": "What is the capital of Italy?", "contexts": ["Rome is the capital of Italy."], | ||
}, | ||
"outputs": { | ||
"statements": ["Rome is the capital of Italy."], | ||
"statement_scores": [1], | ||
}, | ||
}] | ||
:param api: | ||
The API to use for calling an LLM through a Generator. | ||
Supported APIs: "openai". | ||
:param api_key: | ||
The API key. | ||
""" | ||
self.instructions = ( | ||
"Your task is to judge how relevant the provided context is for answering a question. " | ||
"First, please extract statements from the provided context. " | ||
"Second, calculate a relevance score for each statement in the context. " | ||
"The score is 1 if the statement is relevant to answer the question or 0 if it is not relevant." | ||
) | ||
self.inputs = [("questions", List[str]), ("contexts", List[List[str]])] | ||
self.outputs = ["statements", "statement_scores"] | ||
self.examples = examples or _DEFAULT_EXAMPLES | ||
self.api = api | ||
self.api_key = api_key | ||
|
||
super().__init__( | ||
instructions=self.instructions, | ||
inputs=self.inputs, | ||
outputs=self.outputs, | ||
examples=self.examples, | ||
api=self.api, | ||
api_key=self.api_key, | ||
) | ||
|
||
@component.output_types(results=List[Dict[str, Any]]) | ||
def run(self, questions: List[str], contexts: List[List[str]]) -> Dict[str, Any]: | ||
""" | ||
Run the LLM evaluator. | ||
:param questions: | ||
A list of questions. | ||
:param contexts: | ||
A list of lists of contexts. Each list of contexts corresponds to one question. | ||
:returns: | ||
A dictionary with the following outputs: | ||
- `score`: Mean context relevance score over all the provided input questions. | ||
- `individual_scores`: A list of context relevance scores for each input question. | ||
- `results`: A list of dictionaries with `statements` and `statement_scores` for each input context. | ||
""" | ||
result = super().run(questions=questions, contexts=contexts) | ||
|
||
# calculate average statement relevance score per query | ||
for res in result["results"]: | ||
res["score"] = np_mean(res["statement_scores"]) | ||
|
||
# calculate average context relevance score over all queries | ||
result["score"] = np_mean([res["score"] for res in result["results"]]) | ||
result["individual_scores"] = [res["score"] for res in result["results"]] | ||
|
||
return result | ||
|
||
@classmethod | ||
def from_dict(cls, data: Dict[str, Any]) -> "ContextRelevanceEvaluator": | ||
""" | ||
Deserialize this component from a dictionary. | ||
:param data: | ||
The dictionary representation of this component. | ||
:returns: | ||
The deserialized component instance. | ||
""" | ||
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) | ||
return default_from_dict(cls, data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.