-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEval_ICR_agent.py
72 lines (67 loc) · 2.74 KB
/
Eval_ICR_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 11 12:40:07 2018
@author: ESC
"""
from Incremental_greedy_agent import ICR_greedy_agent,env_ICR_GreedyAgent
# construct a greedy agent :
# comparing differet initial reward estimation values:
# first a=0
# running the random agent 10 times for a 100 time step:
print('Incremental Greedy agent with different Values for the reward:')
print('initial rewards=0')
agent_SumRewards_zero=[]
for j in range(10):
greedyAgent =ICR_greedy_agent(10,0)
Environment=env_ICR_GreedyAgent(binnary_reward=True,
binnary_reward_vector=[0.1,0.5,0.9,0.01,0.99,0.02,0,0.74,0.3,0.17])
for i in range(100):
action=greedyAgent.do_action()
greedyAgent.update_memory(Environment.return_reward(action))
agent_SumRewards_zero.append(greedyAgent.sum_rewards())
print('average reward: \t',
sum(agent_SumRewards_zero)/len(agent_SumRewards_zero))
input("Press Enter to continue...")
# second a=0.1
print('initial rewards=0.1')
agent_SumRewards_p_one=[]
for j in range(10):
greedyAgent =ICR_greedy_agent(10,0.1)
Environment=env_ICR_GreedyAgent(binnary_reward=True,
binnary_reward_vector=[0.1,0.5,0.9,0.01,0.99,0.02,0,0.74,0.3,0.17])
for i in range(100):
action=greedyAgent.do_action()
greedyAgent.update_memory(Environment.return_reward(action))
agent_SumRewards_p_one.append(greedyAgent.sum_rewards())
print('average reward: \t',
sum(agent_SumRewards_p_one)/len(agent_SumRewards_p_one))
input("Press Enter to continue...")
# third a=1
print('initial rewards=1')
agent_SumRewards_one=[]
for j in range(10):
greedyAgent =ICR_greedy_agent(10,1)
Environment=env_ICR_GreedyAgent(binnary_reward=True,
binnary_reward_vector=[0.1,0.5,0.9,0.01,0.99,0.02,0,0.74,0.3,0.17])
for i in range(100):
action=greedyAgent.do_action()
greedyAgent.update_memory(Environment.return_reward(action))
agent_SumRewards_one.append(greedyAgent.sum_rewards())
print('average reward: \t',
sum(agent_SumRewards_one)/len(agent_SumRewards_one))
input("Press Enter to continue...")
# fourth a=10:
print('initial rewards=10')
agent_SumRewards_ten=[]
for j in range(10):
greedyAgent =ICR_greedy_agent(10,10)
Environment=env_ICR_GreedyAgent(binnary_reward=True,
binnary_reward_vector=[0.1,0.5,0.9,0.01,0.99,0.02,0,0.74,0.3,0.17])
for i in range(100):
action=greedyAgent.do_action()
greedyAgent.update_memory(Environment.return_reward(action))
agent_SumRewards_ten.append(greedyAgent.sum_rewards())
print('average reward: \t',
sum(agent_SumRewards_ten)/len(agent_SumRewards_ten
))
input("Press Enter to continue...")