-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
52 lines (39 loc) · 1.77 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# run this file to see how my trained models work.
from unityagents import UnityEnvironment
import numpy as np
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import namedtuple,deque
# please write the path of Banana.exe here)
env = UnityEnvironment(file_name="Banana_Windows_x86_64/Banana_Windows_x86_64/Banana.exe")
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
from model import QNetwork
from bufer import Replay_Buffer
from Agent import Agent
agent = Agent(state_size = 37, action_size = 4, seed = 0)
agent.qnetwork_local.load_state_dict(torch.load('Banana_saved_model.pth'))
eps = 0.
scores = []
for i in range(5):
env_info = env.reset(train_mode=True)[brain_name] # reset the environment
state = env_info.vector_observations[0] # get the current state
score = 0 # initialize the score
while True:
action = agent.select_act(state,eps) # select an action
env_info = env.step(action)[brain_name] # send the action to the environment
next_state = env_info.vector_observations[0] # get the next state
reward = env_info.rewards[0] # get the reward
done = env_info.local_done[0] # see if episode has finished
score += reward # update the score
state = next_state # roll over the state to next time step
if done: # exit loop if episode finished
break
scores.append(score)
#print("Score: {}".format(score))
print('Avg score:',np.mean(scores))