Skip to content

Commit 5799c92

Browse files
committed
Switched to llama index integration
1 parent 0fbf8bc commit 5799c92

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

tests/validate_test.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import json
22
import os
3-
from tonic_validate import ValidateScorer, Benchmark, LLMResponse, ValidateApi
3+
from tonic_validate import ValidateApi
44
from tonic_validate.metrics import AnswerSimilarityMetric, RetrievalPrecisionMetric, AugmentationPrecisionMetric, AnswerConsistencyMetric
5+
from llama_index.evaluation import TonicValidateEvaluator
56
import requests
67

78
from dotenv import load_dotenv
@@ -24,35 +25,37 @@ def get_llm_response(prompt):
2425
result = response['result']
2526
return result['content'], result['context']
2627

27-
def test_llama_index():
28+
def get_q_and_a():
2829
# Load qa_pairs.json
2930
qa_pairs = json.load(open('./tests/qa_pairs.json'))
30-
benchmark = Benchmark(
31-
questions=[x['question'] for x in qa_pairs],
32-
answers=[x['answer'] for x in qa_pairs]
33-
)
31+
return ([x['question'] for x in qa_pairs], [x['answer'] for x in qa_pairs])
3432

35-
# Save the responses into an array for scoring
36-
responses = []
37-
for item in benchmark:
38-
llm_answer, llm_context_list = get_llm_response(item.question)
39-
llm_response = LLMResponse(
40-
llm_answer=llm_answer,
41-
llm_context_list=llm_context_list,
42-
benchmark_item=item
43-
)
44-
responses.append(llm_response)
45-
46-
# Score run
33+
def get_responses(questions):
34+
llm_answers = []
35+
context_lists = []
36+
for item in questions:
37+
llm_answer, llm_context_list = get_llm_response(item)
38+
llm_answers.append(llm_answer)
39+
context_lists.append(llm_context_list)
40+
return (llm_answers, context_lists)
41+
42+
def score_run(questions, context_lists, reference_answers, llm_answers):
4743
metrics = [
4844
AnswerSimilarityMetric(),
4945
RetrievalPrecisionMetric(),
5046
AugmentationPrecisionMetric(),
5147
AnswerConsistencyMetric()
5248
]
53-
scorer = ValidateScorer(metrics)
54-
run = scorer.score_run(responses)
49+
scorer = TonicValidateEvaluator(metrics, model_evaluator="gpt-4-1106-preview")
50+
run = scorer.evaluate_run(
51+
questions, context_lists, reference_answers, llm_answers
52+
)
53+
return run, metrics
5554

55+
def test_llama_index():
56+
questions, reference_answers = get_q_and_a()
57+
llm_answers, context_lists = get_responses(questions)
58+
run, metrics = score_run(questions, context_lists, reference_answers, llm_answers)
5659
# Upload results to web ui
5760
validate_api = ValidateApi()
5861
# Get project id from env

0 commit comments

Comments
 (0)