-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_utils.py
56 lines (48 loc) · 1.98 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import yaml
import json
def load_yaml(file_path):
with open(file_path, 'r') as stream:
try:
yaml_dict = yaml.safe_load(stream)
return yaml_dict
except yaml.YAMLError as exc:
print(exc)
return None
def verify_response(response):
if isinstance(response, str):
response = response.strip()
if response == "" or response is None:
return False
if "Response Error" in response:
return False
return True
def build_query(sample, config, strategy):
"""Build the text query by combining the context, question and options. The <image_n> token is still there"""
context = sample['context']
question = sample['question']
example = ""
res_dict = {}
if sample['type'].lower() == 'multiple choice':
options = sample['options']
start_chr = 'A'
for option in options:
example += f"{start_chr}: {option}\n"
start_chr = chr(ord(start_chr) + 1)
empty_prompt_sample_structure = config['multi_choice_format']
empty_prompt = empty_prompt_sample_structure.format(context=context, question=question, options=example)
if strategy == 'CoT':
res_dict['query'] = empty_prompt + config['Strategy_Instruction']['CoT']
else:
res_dict['query'] = empty_prompt + config['Strategy_Instruction']['Directly']
res_dict['gt_content'] = options[ord(sample['answer'].upper()) - ord('A')]
else:
empty_prompt_sample_structure = config['open_ended_format']
empty_prompt = empty_prompt_sample_structure.format(context=context, question=question)
if strategy == 'CoT':
res_dict['query'] = empty_prompt + config['Strategy_Instruction']['CoT']
else:
res_dict['query'] = empty_prompt + config['Strategy_Instruction']['Directly']
res_dict['gt_content'] = sample['answer']
# append existing key and value in data
res_dict.update(sample)
return res_dict