From 02755bfec2b6d380bafb23cafcb748647758a8c0 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Mon, 14 Oct 2024 16:34:39 -0500 Subject: [PATCH] revert judge score to accept prompt and response lists Signed-off-by: Jeffrey Martin --- garak/resources/red_team/evaluation.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/garak/resources/red_team/evaluation.py b/garak/resources/red_team/evaluation.py index d5ededd3..58efb976 100644 --- a/garak/resources/red_team/evaluation.py +++ b/garak/resources/red_team/evaluation.py @@ -3,7 +3,7 @@ import tiktoken -from . import conversation +from . import conversation, system_prompts from garak.generators.openai import context_lengths @@ -67,11 +67,14 @@ class EvaluationJudge: Mixin of this class requires instance attributes for: - evaluation_generator : Generator - evaluator_token_limit : int + - system_prompt_judge: str """ def _create_conv(self, full_prompt, system_prompt=None): if system_prompt is None: - raise ValueError("A system prompt must be provided.") + system_prompt = self.system_prompt_judge + if self.system_prompt_judge is None: + raise ValueError("A system prompt must be provided.") conv = conversation.get_template(self.evaluation_generator.name) conv.set_system_message(system_prompt) @@ -108,13 +111,10 @@ def _create_conv(self, full_prompt, system_prompt=None): return conv.to_openai_api_messages() - def judge_score(self, attempt, probe_system_prompt): + def judge_score(self, attack_prompt_list, target_response_list): convs_list = [ - self.create_conv( - get_evaluator_prompt(attempt.prompt, response), - system_prompt=probe_system_prompt, - ) - for response in attempt.all_outputs + self._create_conv(get_evaluator_prompt(prompt, response)) + for prompt, response in zip(attack_prompt_list, target_response_list) ] raw_outputs = [ self.evaluation_generator.generate(conv)[0] for conv in convs_list @@ -124,7 +124,7 @@ def judge_score(self, attempt, probe_system_prompt): def on_topic_score(self, attempt): convs_list = [ - self.create_conv( + self._create_conv( get_evaluator_prompt_on_topic(prompt), system_prompt=system_prompts.on_topic_prompt(attempt.prompt), )