Skip to content

Commit

Permalink
Merge branch 'rag-qa-arena-benchmark' of https://github.com/Future-Ho…
Browse files Browse the repository at this point in the history
…use/paper-qa into rag-qa-arena-benchmark
  • Loading branch information
JoaquinPolonuer committed Feb 3, 2025
2 parents 7582ea2 + 8bfb740 commit 3e7a885
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
10 changes: 8 additions & 2 deletions gradable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import asyncio
import os

from aviary.env import TaskDataset
from ldp.agent import SimpleAgent
from ldp.alg.callbacks import MeanMetricsCallback
Expand All @@ -8,6 +9,7 @@
from paperqa import Settings
from paperqa.agents.task import LFRQATaskDataset


async def evaluate() -> None:
settings = Settings()
settings.agent.index.name = "lfrqa_science_index_complete"
Expand All @@ -22,7 +24,11 @@ async def evaluate() -> None:

settings.parsing.use_doc_details = False

dataset = LFRQATaskDataset(data_path="rag-qa-benchmarking/lfrqa/questions.csv", num_questions=2, settings=settings)
dataset = LFRQATaskDataset(
data_path="rag-qa-benchmarking/lfrqa/questions.csv",
num_questions=2,
settings=settings,
)
metrics_callback = MeanMetricsCallback(eval_dataset=dataset)

evaluator = Evaluator(
Expand Down
13 changes: 11 additions & 2 deletions paperqa/agents/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,8 +579,17 @@ async def pairwise_evaluation(
print(f"PQa answer was:\n{pqa_answer} \n\n")
print(f"Human answer was:\n{human_answer} \n\n")
print(f"Winner is: {winner}\n")
self.log_results_to_json(self._settings.llm, qid, question, pqa_answer, human_answer, pqa_answer_index, winner, result)

self.log_results_to_json(
self._settings.llm,
qid,
question,
pqa_answer,
human_answer,
pqa_answer_index,
winner,
result,
)

reward = (
self._rewards["win"]
if winner == "paperqa"
Expand Down
3 changes: 1 addition & 2 deletions paperqa/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ def get_summary_llm(self) -> LiteLLMModel:
self.summary_llm, self.temperature
),
)

def get_pairwise_eval_llm(self) -> LiteLLMModel:
return LiteLLMModel(
name=self.pair_eval_llm,
Expand All @@ -855,7 +855,6 @@ def get_agent_llm(self) -> LiteLLMModel:
self.agent.agent_llm, self.temperature
),
)


def get_embedding_model(self) -> EmbeddingModel:
return embedding_model_factory(self.embedding, **(self.embedding_config or {}))
Expand Down

0 comments on commit 3e7a885

Please sign in to comment.