-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
executable file
·176 lines (132 loc) · 5.09 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python3
"""
2020.07.05
Reload trained q to add more training iterations
if on dkm desktop, use hash -r
2020.07.03
I'll initialize 2 types of training wherein
(1) Train for a fixed period of epoch moves
(2) Train until the 2048 tile is created OR the game is over.
To do: make a text file that prints the success of each episode.
2020.06.27
The following setup trains an RL agent
and observes the moves the agent takes
via a visualization (GameGrid).
"""
#Numerics
import numpy as np
import pandas as pd
import random
# RL Package
import gym
import sys
from ai.agent2048 import GameGrid, AsyncioThread
import ai.parameters as params
from ai.rl_model import QLearn, action_dict
import asyncio
import threading
import random
import queue
# Save files
#import gzip
import pickle as pkl
def run_model(env, qlearn, all_frames, maxepoch=1e4, savetmp=None, tmpsave=500):
"""
Training regime for models
that train for a fixed episodic/epoch level
env- pass RL environment
qlearn - pass Q-learning
maxepoch-stop training an episode after N (if None, trains until game condition met)
savetmp - periodically save temp files.
"""
countr = 0
score = 0
if savetmp is not None:
scorefile = "_".join(["scores", str(params.Alpha), str(params.Gamma), str(params.Epsilon)])
outputtxt = open(savetmp + scorefile + ".csv", "w")
outputtxt.writelines(["Episode\tMove\tScore\tGameStatus(W/L)\t\n"])
for eps in range(params.N_episodes):
print("Training episode = ", eps + 1, "/", params.N_episodes)
env.reset()
frames = {0: {"state": env._get_observation(),
"reward": 0,
"total_score": 0,
"action":
"start", "game_over": False}}
done = False
epoch = 0
while not done:
line = [eps+1, epoch+1, score, env.Game.game_over, "\n"]
outputtxt.writelines("\t".join([str(i) for i in line]))
currState = env._get_observation()
action = qlearn.chooseAction(currState)
# Updated state, new board score, reached 2048?, reward
nextState, reward, done, score = env.step(action)
qlearn.learn(currState, action, reward, nextState)
frames.update({epoch: {
'state': nextState,
'action': params.action_dict[action],
'reward': reward,
'total_score': score,
'game_over': env.Game.game_over}
})
# Iterate the counters
epoch += 1
if savetmp is not None and countr >= tmpsave:
print("Saving temporary file.")
countr = 0
qtmp = "_".join(["qlearn_tmp", str(params.Alpha), str(params.Gamma), str(params.Epsilon)])
ftmp = "_".join(["states_tmp", str(params.Alpha), str(params.Gamma), str(params.Epsilon)])
with open(savetmp + qtmp + ".pkl", "wb") as f:
pkl.dump(qlearn, f)
with open(savetmp + ftmp + ".pkl", "wb") as f:
pkl.dump(all_frames, f)
if epoch >= maxepoch:
done = True
all_frames.append(frames)
countr += 1
outputtxt.writelines("\n\n")
print("All episodes completed")
qname = "_".join(["qlearn", str(params.N_episodes), str(params.Alpha), str(params.Gamma), str(params.Epsilon)])
sname = "_".join(["states", str(params.N_episodes), str(params.Alpha), str(params.Gamma), str(params.Epsilon)])
with open(savetmp + qname + ".pkl", "wb") as f:
pkl.dump(qlearn, f)
with open(savetmp + sname + ".pkl", "wb") as f:
pkl.dump(all_frames, f)
outputtxt.close()
# --------------------- #
if __name__ == "__main__":
print("Learning Rate:", params.Alpha, " Reward scaling:", params.Gamma, " Exploration:", params.Epsilon)
#Save directory
savedir = "data/"
# Reload last qlearn, if None, restart.
qlearnfile = None
#qlearnfile = "/home/natasha/proj/2048/data/train_1000/qlearn_eps1000.pkl"
# Initialize a random seed for reproducibility
random.seed(314159)
np.random.seed(123)
# Create the environment
env = gym.make('gym_2048:game2048-v0')
env.seed(456)
env.action_space.seed(789)
# Set-up learning algorithm
if qlearnfile is None:
print("Learning with naive prior")
qlearn = QLearn(actions=range(env.action_space.n),
alpha=params.Alpha,
gamma=params.Gamma,
epsilon=params.Epsilon)
all_frames = []
else:
print("Learning from old file=", qlearnfile)
#Load the previous policy
with open(qlearnfile, 'rb') as f:
qlearn = pkl.load(f)
#Load the previous history
framefile = qlearnfile.split('qlearn')
framefile = "".join([framefile[0], 'states', framefile[1]])
with open(framefile, 'rb') as f:
all_frames = pkl.load(f)
run_model(env, qlearn, all_frames, maxepoch=1e4, savetmp=savedir)
#g = GameGrid(frames[0]['state'], traj, 1.5)
#g.mainloop()