-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunner.py
41 lines (33 loc) · 1.26 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
This is the machinnery that runs your agent in an environment.
This is not intented to be modified during the practical.
"""
import subprocess as sp
class Runner:
def __init__(self, environment, agent, name):
self.environment = environment
self.agent = agent
self.name = name
def step(self):
observation = self.environment.observe()
action = self.agent.action(observation)
reward, done = self.environment.act(action)
self.agent.update(observation, action, reward)
return (observation, action, reward, done)
def loop(self, n_episodes):
if self.name in ["RD", "QL", "SARSA"]:
list_sum_reward = []
for i in range(1, n_episodes + 1):
cumul_reward = 0.0
done = False
while not done:
(obs, act, rew, done) = self.step()
cumul_reward += rew
print("Episode ", i, " cumul reward ", cumul_reward)
self.environment.env.reset()
list_sum_reward.append(cumul_reward)
return list_sum_reward
elif self.name == "PI":
return self.agent.policy_iteration()
else:
return self.agent.value_iteration()