Skip to content

[WIP] Introduce a new evaluator to detect hallucinations #1461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* correctness of the chat response based on the context.
*
* @author Mark Pollack
* @author Eddú Meléndez
* @since 1.0.0 M1
*/
public class EvaluationRequest {
Expand All @@ -26,6 +27,10 @@ public EvaluationRequest(String userText, String responseContent) {
this(userText, Collections.emptyList(), responseContent);
}

public EvaluationRequest(List<Content> dataList, String responseContent) {
this("", dataList, responseContent);
}

public EvaluationRequest(String userText, List<Content> dataList, String responseContent) {
this.userText = userText;
this.dataList = dataList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,29 @@

public class EvaluationResponse {

private boolean pass;
private final boolean pass;

private float score;
private final float score;

private String feedback;
private final String feedback;

Map<String, Object> metadata;
private final Map<String, Object> metadata;

@Deprecated
public EvaluationResponse(boolean pass, float score, String feedback, Map<String, Object> metadata) {
this.pass = pass;
this.score = score;
this.feedback = feedback;
this.metadata = metadata;
}

public EvaluationResponse(boolean pass, String feedback, Map<String, Object> metadata) {
this.pass = pass;
this.score = 0;
this.feedback = feedback;
this.metadata = metadata;
}

public boolean isPass() {
return pass;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
package org.springframework.ai.evaluation;

import org.springframework.ai.model.Content;
import org.springframework.util.StringUtils;

import java.util.List;
import java.util.stream.Collectors;

@FunctionalInterface
public interface Evaluator {

EvaluationResponse evaluate(EvaluationRequest evaluationRequest);

default String doGetSupportingData(EvaluationRequest evaluationRequest) {
List<Content> data = evaluationRequest.getDataList();
return data.stream()
.map(Content::getContent)
.filter(StringUtils::hasText)
.collect(Collectors.joining(System.lineSeparator()));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package org.springframework.ai.evaluation;

import org.springframework.ai.chat.client.ChatClient;

import java.util.Collections;

public class HallucinationDetectionEvaluator implements Evaluator {

private static final String DEFAULT_EVALUATION_PROMPT_TEXT = """
Document: \\n {document}\\n
Claim: \\n {claim}
""";

private final ChatClient.Builder chatClientBuilder;

public HallucinationDetectionEvaluator(ChatClient.Builder chatClientBuilder) {
this.chatClientBuilder = chatClientBuilder;
}

@Override
public EvaluationResponse evaluate(EvaluationRequest evaluationRequest) {
var response = evaluationRequest.getResponseContent();
var context = doGetSupportingData(evaluationRequest);

String evaluationResponse = this.chatClientBuilder.build()
.prompt()
.user(userSpec -> userSpec.text(DEFAULT_EVALUATION_PROMPT_TEXT)
.param("document", context)
.param("claim", response))
.call()
.content();

boolean passing = evaluationResponse.equalsIgnoreCase("yes");
return new EvaluationResponse(passing, "", Collections.emptyMap());
}

}
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
package org.springframework.ai.evaluation;

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.model.Content;

import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.springframework.util.StringUtils;

public class RelevancyEvaluator implements Evaluator {

Expand Down Expand Up @@ -53,12 +49,4 @@ public EvaluationResponse evaluate(EvaluationRequest evaluationRequest) {
return new EvaluationResponse(passing, score, "", Collections.emptyMap());
}

protected String doGetSupportingData(EvaluationRequest evaluationRequest) {
List<Content> data = evaluationRequest.getDataList();
return data.stream()
.map(Content::getContent)
.filter(StringUtils::hasText)
.collect(Collectors.joining(System.lineSeparator()));
}

}