-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexplaining_ar.py
executable file
·65 lines (57 loc) · 2.07 KB
/
explaining_ar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python3.8
# Autorate using an autorater that gives an explanation
# The explanation is just a letter A,B,C,D - see the prompt for
# definitions but basically each letter should indicate a different
# level of confidence
import argparse
import json
import re
import sys
import util
def explaining_ar_prompt(qa_response):
q = qa_response['question']
if not q.endswith('?'):
q += '?'
answers = ' or '.join(qa_response['answer'])
candidate_answer = qa_response['prediction']
return (f"Question: {q}\n"
+ f"Known acceptible answer(s): {answers}\n"
+ f"Candidate answer: {candidate_answer}\n\n"
+ f"Is the candidate answer acceptable? Give the best response from those below.\n\n"
+ "(A) Yes - the candidate is essentially the same as one of the known acceptable answers.\n"
+ "(B) Yes - the candidate is different from each of the known acceptable answers, but I am confident it is also correct.\n"
+ "(C) Perhaps - the candidate is different from each of the known acceptable answers, but it might be acceptable.\n"
+ "(D) No - the candidate is different from the known acceptable answers, and is incorrect.\n")
def parse_multichoice_response(long_response):
m = re.search(r'\b[ABCD]\b', long_response)
if m:
return m.group(0)
else:
return 'C' # ie "perhaps"
with open('data/NQ_FiD.jsonl') as fp:
fid = [json.loads(line) for line in fp]
parser = argparse.ArgumentParser(prog='explaining_ar')
parser.add_argument(
'--start',
help='example number to start at - assumes previous examples stored in buf/*.jsonl')
args = parser.parse_args()
print(args)
if args.start is None:
results = util.run_eval(
fid,
explaining_ar_prompt,
parse_multichoice_response,
filestem='buf/explain',
time_interval=60)
else:
lo = int(args.start)
results = util.run_eval(
fid[lo:],
explaining_ar_prompt,
parse_multichoice_response,
service='mistral',
filestem='buf/explain',
time_interval=60,
last_chkpt_k=lo,
k=lo,
results=[{'dummy':None}] * lo)