Skip to content

Commit

Permalink
Switched to llama index integration
Browse files Browse the repository at this point in the history
  • Loading branch information
ethan-tonic committed Jan 24, 2024
1 parent 0fbf8bc commit 5799c92
Showing 1 changed file with 23 additions and 20 deletions.
43 changes: 23 additions & 20 deletions tests/validate_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import json
import os
from tonic_validate import ValidateScorer, Benchmark, LLMResponse, ValidateApi
from tonic_validate import ValidateApi
from tonic_validate.metrics import AnswerSimilarityMetric, RetrievalPrecisionMetric, AugmentationPrecisionMetric, AnswerConsistencyMetric
from llama_index.evaluation import TonicValidateEvaluator
import requests

from dotenv import load_dotenv
Expand All @@ -24,35 +25,37 @@ def get_llm_response(prompt):
result = response['result']
return result['content'], result['context']

def test_llama_index():
def get_q_and_a():
# Load qa_pairs.json
qa_pairs = json.load(open('./tests/qa_pairs.json'))
benchmark = Benchmark(
questions=[x['question'] for x in qa_pairs],
answers=[x['answer'] for x in qa_pairs]
)
return ([x['question'] for x in qa_pairs], [x['answer'] for x in qa_pairs])

# Save the responses into an array for scoring
responses = []
for item in benchmark:
llm_answer, llm_context_list = get_llm_response(item.question)
llm_response = LLMResponse(
llm_answer=llm_answer,
llm_context_list=llm_context_list,
benchmark_item=item
)
responses.append(llm_response)

# Score run
def get_responses(questions):
llm_answers = []
context_lists = []
for item in questions:
llm_answer, llm_context_list = get_llm_response(item)
llm_answers.append(llm_answer)
context_lists.append(llm_context_list)
return (llm_answers, context_lists)

def score_run(questions, context_lists, reference_answers, llm_answers):
metrics = [
AnswerSimilarityMetric(),
RetrievalPrecisionMetric(),
AugmentationPrecisionMetric(),
AnswerConsistencyMetric()
]
scorer = ValidateScorer(metrics)
run = scorer.score_run(responses)
scorer = TonicValidateEvaluator(metrics, model_evaluator="gpt-4-1106-preview")
run = scorer.evaluate_run(
questions, context_lists, reference_answers, llm_answers
)
return run, metrics

def test_llama_index():
questions, reference_answers = get_q_and_a()
llm_answers, context_lists = get_responses(questions)
run, metrics = score_run(questions, context_lists, reference_answers, llm_answers)
# Upload results to web ui
validate_api = ValidateApi()
# Get project id from env
Expand Down

0 comments on commit 5799c92

Please sign in to comment.