-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreward_drawer.py
50 lines (43 loc) · 1.17 KB
/
reward_drawer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import matplotlib.pyplot as plt
import numpy as np
episodes = 3400
mean_rewards = []
all_rewards = []
name = 'SSbot'
for i in range(1, episodes+1):
with open('rewards_log/' + name + str(i) + '.log', 'r') as f:
rwds = [float(j) for j in f.read().split()]
if rwds:
all_rewards += rwds
mean_rewards.append(sum(rwds)/len(rwds))
else:
mean_rewards.append(-1)
points = range(1, len(all_rewards)+1)
plt.plot(points, all_rewards)
plt.title('rewards by step')
z = np.polyfit(points, all_rewards, 2)
p = np.poly1d(z)
plt.plot(points, p(points), "r", lw=3)
plt.show()
points = range(1, len(mean_rewards)+1)
plt.plot(points, mean_rewards)
z = np.polyfit(points, mean_rewards, 2)
p = np.poly1d(z)
plt.plot(points, p(points), "r", lw=3)
plt.title('rewards by episode')
plt.show()
window = 10000
rw = np.array(all_rewards)
size = rw.shape[0]/window*window
print size
rw = rw[:size]
size /= window
rw = rw.reshape((size, window))
points = range(size)
slope_rewards = [np.mean(rw[i]) for i in points]
plt.plot(points, slope_rewards)
plt.title('average rewards')
z = np.polyfit(points, slope_rewards, 2)
p = np.poly1d(z)
plt.plot(points, p(points), "r", lw=3)
plt.show()