forked from aliyun/cflue
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexec_application_eval.py
65 lines (57 loc) · 2.05 KB
/
exec_application_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from evaluator.evaluator import load_models_tokenizer, load_llama_models_tokenizer
from utils.dataset import load_dataset
from utils.compute_score import *
from tqdm import tqdm
import argparse
def eval_application(args):
# load model & tokenizer
if 'llama' in args.model_name:
model, tokenizer = load_llama_models_tokenizer(args)
else:
model, tokenizer = load_models_tokenizer(args)
# 载入评测集
dataset = load_dataset(args.eval_type)
# 大模型推理回答&记录答案
responses = []
for _, record in tqdm(dataset.iterrows()):
prompt = record['instruction']
model_response, _ = model.chat(
tokenizer,
prompt,
history=None,
)
responses.append(model_response)
result_path = os.path.join(args.save_result_dir, f"{args.model_name}_application_result.json")
if args.save_result_dir:
dataset["model_response"] = responses
os.makedirs(args.save_result_dir, exist_ok=True)
dataset.to_json(result_path, orient='records', force_ascii=False)
# 计算应用评分
get_application_score(args)
def get_application_score(args):
_path = args.save_result_dir
file_path = f'{_path}/{args.model_name}_application_result.json'
result = {}
print('Model: %s' % args.model_name)
# QA
rouge_l, qa_bert = compute_finqa(file_path)
result['QA'] = {'rouge-L': rouge_l,
'Bert': qa_bert}
# TG
rouge_l_tg, _, tg_bert, _ = compute_text_generation(file_path)
result['TG'] = {'rouge-L': rouge_l_tg,
'Bert': tg_bert}
# MT-e2zh
bleu, comet = compute_nmt_en2zh(file_path)
result['MT-e2zh'] = {'BLEU': bleu,
'COMET': comet}
# MT-zh2e
bleu, comet = compute_nmt_zh2en(file_path)
result['MT-zh2e'] = {'BLEU': bleu,
'COMET': comet}
# TC
acc, _ = compute_text_classification(file_path)
result['TC'] = {'ACC': acc}
# RE
f1, _ = compute_extraction(file_path)
result['RE'] = {'F1-score': f1}