forked from guyfe/LongSumm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_output_jsons.py
76 lines (53 loc) · 3.08 KB
/
parse_output_jsons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
'''
Parses the json output files of models into the require format
for the evaluation script.
Author: Tomas + Zhihao
'''
import os, json
annotations_dir = "./annotations" # where we store the output to be evaluated
input_json_dir = "./predicted_summaries/"
## CHANGE THIS TO CHANGE FILE ##
#input_json_file = "simplified_my_longsumm_test_AIC_news_pretrained_allenai_output.json"
#input_json_file = "simplified_my_longsumm_test_AIC_my_pretrained_allenai_output.json"
# input_json_file = "simplified_my_longsumm_test_SEC_my_pretrained_allenai_output.json"
# input_json_file = "simplified_my_longsumm_test_SEC_news_pretrained_allenai_output.json"
# input_json_file = "simplified_my_longsumm_test_SEC_news_pretrained_BART_output.jsonl"
#input_json_file = "simplified_my_longsumm_test_AIC_news_pretrained_BART_output.jsonl"
# input_json_file = "my_longsumm_test_abs_AIC_news_pretrained_BART_output.jsonl"
# input_json_file = "my_longsumm_test_abs_AIC_news_pretrained_BART_ft_output.jsonl"
# input_json_file = "my_longsumm_test_abs_doc_alt_arxiv_pretrained_BigBird_output.jsonl"
# input_json_file = "my_longsumm_test_abs_AIC_alt_arxiv_pretrained_BigBird_output.jsonl"
# input_json_file = "my_longsumm_test_abs_AIC_arxiv_pretrained_BigBird_output.jsonl"
# input_json_file = "my_longsumm_test_abs_SEC_news_pretrained_BART_ft_output.jsonl"
# input_json_file = "simplified_my_longsumm_test_abs_AIC_alt_arxiv_pretrained_BigBird_output.jsonl"
# input_json_file = "simplified_my_longsumm_test_abs_AIC_news_pretrained_BART_ft_output.jsonl"
# input_json_file = "simplified_my_longsumm_test_abs_AIC_news_pretrained_BART_output.jsonl"
# input_json_file = "simplified_my_longsumm_test_abs_doc_alt_arxiv_pretrained_BigBird_output.jsonl"
input_json_file = "simplified_my_longsumm_test_abs_SEC_news_pretrained_BART_ft_output.jsonl"
input_json_filepath = input_json_dir + input_json_file
# Format for evaluation script
# {
# "paper_id_1":"summary of paper 1",
# "paper_id_2":"summary of the paper 2"
# }
input_json_file_contents = open(input_json_filepath, "r")
input_json_lines = input_json_file_contents.readlines()
input_json_file_contents.close()
output_dict_ground_truth = {}
output_dict_simple_pred = {}
output_dict_pred = {}
for i, line in enumerate(input_json_lines):
input_dict = json.loads(line) # { "ground_truth": str, "prediction": str, "simplified_prediction": str }
output_dict_ground_truth[i] = input_dict["ground_truth"]
output_dict_simple_pred[i] = input_dict["simplified_prediction"]
output_dict_pred[i] = input_dict["prediction"]
# output ground truth
with open(annotations_dir + "/my_test_ground_truth.json", "w") as gt_out:
gt_out.write(json.dumps(output_dict_ground_truth, indent=4, sort_keys=False))
# output predicted summary
with open(annotations_dir + "/" + input_json_file.replace("simplified_", ""), "w") as gt_out:
gt_out.write(json.dumps(output_dict_pred, indent=4, sort_keys=False))
# output predicted simplified summary
with open(annotations_dir + "/" + input_json_file, "w") as gt_out:
gt_out.write(json.dumps(output_dict_simple_pred, indent=4, sort_keys=False))
# test_annotations_testsplit.json