forked from ruotianluo/self-critical.pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_rewards.py
67 lines (52 loc) · 2.1 KB
/
get_rewards.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import time
import misc.utils as utils
from collections import OrderedDict
import torch
from torch.autograd import Variable
import sys
sys.path.append("cider")
from pyciderevalcap.ciderD.ciderD import CiderD
#from pyciderevalcap.cider.cider import Cider
CiderD_scorer = CiderD(df='coco-train-idxs')
#CiderD_scorer = CiderD(df='corpus')
def array_to_str(arr):
out = ''
for i in range(len(arr)):
out += str(arr[i]) + ' '
if arr[i] == 0:
break
return out.strip()
def get_self_critical_reward(model, fc_feats, att_feats, data, gen_result):
batch_size = gen_result.size(0)# batch_size = sample_size * seq_per_img
seq_per_img = batch_size // len(data['gts'])
# get greedy decoding baseline
greedy_res, _ = model.sample(Variable(fc_feats.data, volatile=True), Variable(att_feats.data, volatile=True))
res = OrderedDict()
gen_result = gen_result.cpu().numpy()
greedy_res = greedy_res.cpu().numpy()
for i in range(batch_size):
res[i] = [array_to_str(gen_result[i])]
for i in range(batch_size):
res[batch_size + i] = [array_to_str(greedy_res[i])]
gts = OrderedDict()
for i in range(len(data['gts'])):
gts[i] = [array_to_str(data['gts'][i][j]) for j in range(len(data['gts'][i]))]
#_, scores = Bleu(4).compute_score(gts, res)
#scores = np.array(scores[3])
res = [{'image_id':i, 'caption': res[i]} for i in range(2 * batch_size)]
gts = {i: gts[i % batch_size // seq_per_img] for i in range(2 * batch_size)}
_, scores = CiderD_scorer.compute_score(gts, res)
print('Cider scores:', _)
scores = scores[:batch_size] - scores[batch_size:]
#rewards = np.ones((batch_size, gen_result.shape[1])) * np.inf
rewards = np.repeat(scores[:, np.newaxis], gen_result.shape[1], 1)
# for i in range(batch_size):
# for j in range(gen_result.shape[1]):
# rewards[i, j] = scores[i]
# if gen_result[i, j] == 0:
# break
return rewards