-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluator.py
134 lines (121 loc) · 6.57 KB
/
evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import sys
sys.path.append('../../pycocoevalcap/bleu')
sys.path.append('../../pycocoevalcap/rouge')
sys.path.append('../../pycocoevalcap/meteor')
sys.path.append('../../pycocoevalcap/cider')
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.cider.cider import Cider
import os
import pickle
import time
from _coco import _CocoCaptions
import torch
from torch.utils.data import DataLoader
class Evaluator(object):
def __init__(self, caption_file, stoi, itos, max_sentence_length, transforms, verbosity=1):
self.BLEU_evaluator = Bleu(n=4)
self.Rouge_evaluator = Rouge()
self.Meteor_evaluator = Meteor()
self.Cider_evaluator = Cider()
self.caption_file = caption_file
self.references = pickle.load(open(self.caption_file, 'rb'))
for image_path in self.references:
for i in range(len(self.references[image_path])):
self.references[image_path][i] = ' '.join(self.references[image_path][i])
self.hypotheses = {image_path: [''] for image_path in self.references}
self.stoi = stoi
self.itos = itos
self.max_sentence_length = max_sentence_length
self.transforms = transforms
self.verbosity = verbosity
self.data = _CocoCaptions(self.caption_file, 'inference', self.stoi, self.max_sentence_length, transforms=self.transforms)
self.dataloader = DataLoader(self.data, batch_size=1, shuffle=False, num_workers=0)
def evaluate(self, encoder, generator, model_info=''):
start_time = time.time()
if not os.path.exists(generator.model_name + '_evaluate'):
os.mkdir(generator.model_name + '_evaluate')
result_file = os.path.join(generator.model_name + '_evaluate', 'evaluate.txt')
with torch.no_grad():
encoder.eval()
generator.eval()
for (images, image_path_index) in self.dataloader:
images = images.cuda()
image_feature, mean_image_feature = encoder(images)
result = generator.decode(image_feature, mean_image_feature)
for i in range(len(result)):
result[i] = self.itos[result[i]]
self.hypotheses[self.data.image_path[image_path_index[0]]][0] = ' '.join(result)
[BLEU_1, BLEU_2, BLEU_3, BLEU_4], _ = self.BLEU_evaluator.compute_score(self.references, self.hypotheses)
ROUGE, _ = self.Rouge_evaluator.compute_score(self.references, self.hypotheses)
METEOR, _ = self.Meteor_evaluator.compute_score(self.references, self.hypotheses)
CIDEr, _ = self.Cider_evaluator.compute_score(self.references, self.hypotheses)
end_time = time.time()
if self.verbosity > 0:
if model_info != '':
print(model_info)
print('Evaluate time : %.3fs.' % (end_time - start_time))
print('BLEU-1 :', BLEU_1)
print('BLEU-2 :', BLEU_2)
print('BLEU-3 :', BLEU_3)
print('BLEU-4 :', BLEU_4)
print('ROUGE :', ROUGE)
print('METEOR :', METEOR)
print('CIDEr :', CIDEr)
with open(result_file, 'w') as evaluate_log_file:
evaluate_log_file.write(model_info)
evaluate_log_file.write('Evaluate time : %.3fs.\n' % (end_time - start_time))
evaluate_log_file.write('BLEU-1 :' + str(BLEU_1) + '\n')
evaluate_log_file.write('BLEU-2 :' + str(BLEU_2) + '\n')
evaluate_log_file.write('BLEU-3 :' + str(BLEU_3) + '\n')
evaluate_log_file.write('BLEU-4 :' + str(BLEU_4) + '\n')
evaluate_log_file.write('ROUGE :' + str(ROUGE) + '\n')
evaluate_log_file.write('METEOR :' + str(METEOR) + '\n')
evaluate_log_file.write('CIDEr :' + str(CIDEr) + '\n')
return BLEU_1, BLEU_2, BLEU_3, BLEU_4, ROUGE, METEOR, CIDEr
def decode(self, encoder, generator, model_info=''):
start_time = time.time()
if not os.path.exists(generator.model_name + '_decode'):
os.mkdir(generator.model_name + '_decode')
result_file = os.path.join(generator.model_name + '_decode', 'decode.txt')
with open(result_file, 'w') as decode_result_file:
with torch.no_grad():
encoder.eval()
generator.eval()
for (images, image_path_index) in self.dataloader:
images = images.cuda()
image_feature, mean_image_feature = encoder(images)
result = generator.decode(image_feature, mean_image_feature)
for i in range(len(result)):
result[i] = self.itos[result[i]]
image_path = self.data.image_path[image_path_index[0]]
hypothesis_caption = ' '.join(result)
decode_result_file.write('%s : %s\n' % (image_path, hypothesis_caption))
self.hypotheses[image_path][0] = hypothesis_caption
[BLEU_1, BLEU_2, BLEU_3, BLEU_4], _ = self.BLEU_evaluator.compute_score(self.references, self.hypotheses)
ROUGE, _ = self.Rouge_evaluator.compute_score(self.references, self.hypotheses)
METEOR, _ = self.Meteor_evaluator.compute_score(self.references, self.hypotheses)
CIDEr, _ = self.Cider_evaluator.compute_score(self.references, self.hypotheses)
end_time = time.time()
if self.verbosity > 0:
if model_info != '':
print(model_info)
print('Decode time : %.3fs.' % (end_time - start_time))
print('BLEU-1 :', BLEU_1)
print('BLEU-2 :', BLEU_2)
print('BLEU-3 :', BLEU_3)
print('BLEU-4 :', BLEU_4)
print('ROUGE :', ROUGE)
print('METEOR :', METEOR)
print('CIDEr :', CIDEr)
decode_result_file.write(model_info)
decode_result_file.write('\n\nDecode time : %.3fs.\n' % (end_time - start_time))
decode_result_file.write('BLEU-1 :' + str(BLEU_1) + '\n')
decode_result_file.write('BLEU-2 :' + str(BLEU_2) + '\n')
decode_result_file.write('BLEU-3 :' + str(BLEU_3) + '\n')
decode_result_file.write('BLEU-4 :' + str(BLEU_4) + '\n')
decode_result_file.write('ROUGE :' + str(ROUGE) + '\n')
decode_result_file.write('METEOR :' + str(METEOR) + '\n')
decode_result_file.write('CIDEr :' + str(CIDEr) + '\n')
return BLEU_1, BLEU_2, BLEU_3, BLEU_4, ROUGE, METEOR, CIDEr