forked from linchengweiii/reversi-pygame
-
Notifications
You must be signed in to change notification settings - Fork 0
/
env.py
287 lines (217 loc) · 8.1 KB
/
env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
import numpy as np
import sys
import pygame
from pygamewrapper import PyGameWrapper
from pygame.constants import MOUSEBUTTONDOWN, MOUSEMOTION
import copy
class Environment(object):
"""
env.Environment(
game, fps=30,
frame_skip=1, num_steps=1,
reward_values={}, force_fps=True,
display_screen=False, add_noop_action=True,
NOOP=K_F15, state_preprocessor=None
)
Main wrapper that interacts with games.
Provides a similar interface to Arcade Learning Environment.
Parameters
----------
game: Class from ple.games.base
The game the PLE environment manipulates and maintains.
fps: int (default: 30)
The desired frames per second we want to run our game at.
Typical settings are 30 and 60 fps.
frame_skip: int (default: 1)
The number of times we skip getting observations while
repeat an action.
num_steps: int (default: 1)
The number of times we repeat an action.
force_fps: bool (default: True)
If False PLE delays between game.step() calls to ensure the fps is
specified. If not PLE passes an elapsed time delta to ensure the
game steps by an amount of time consistent with the specified fps.
This is usally set to True as it allows the game to run as fast as
possible which speeds up training.
display_screen: bool (default: False)
If we draw updates to the screen. Disabling this speeds up
interation speed. This can be toggled to True during testing phases
so you can observe the agents progress.
add_noop_action: bool (default: True)
This inserts the NOOP action specified as a valid move the agent
can make.
state_preprocessor: python function (default: None)
Python function which takes a dict representing game state and
returns a numpy array.
"""
def __init__(self,
game, fps=30, frame_skip=1, num_steps=1,
reward_values={}, force_fps=True, display_screen=False,
add_noop_action=True, state_preprocessor=None):
self.game = game
self.fps = fps
self.frame_skip = frame_skip
self.NOOP = None
self.num_steps = num_steps
self.force_fps = force_fps
self.display_screen = display_screen
self.add_noop_action = add_noop_action
self.last_action = []
self.action = []
self.previous_scores = {}
self.frame_count = 0
self.init()
self.state_preprocessor = state_preprocessor
self.state_dim = None
if self.state_preprocessor is not None:
self.state_dim = self.game.get_game_state()
if self.state_dim is None:
raise ValueError(
"Asked to return non-visual state on game that does not support it!")
else:
self.state_dim = self.state_preprocessor(self.state_dim).shape
def _tick(self):
"""
Calculates the elapsed time between frames or ticks.
"""
if self.force_fps:
return 1000.0 / self.fps
else:
return self.game.clock.tick_busy_loop(self.fps)
def init(self):
"""
Initializes the game. This depends on the game and could include
doing things such as setting up the display, clock etc.
This method should be explicitly called.
"""
self.game.setup()
self.game.init() #this is the games setup/init
def get_action_set(self):
"""
Gets the actions the game supports. Optionally inserts the NOOP
action if PLE has add_noop_action set to True.
Returns
--------
list of pygame.constants
The agent can simply select the index of the action
to perform.
"""
actions = self.game.actions
if (sys.version_info > (3, 0)): #python ver. 3
if isinstance(actions, dict) or isinstance(actions, dict_values):
actions = actions.values()
else:
if isinstance(actions, dict):
actions = actions.values()
actions = list(actions) #.values()
if self.add_noop_action:
actions.append(self.NOOP)
return actions
def get_frame_number(self):
"""
Gets the current number of frames the agent has seen
since PLE was initialized.
Returns
--------
int
"""
return self.frame_count
def game_over(self):
"""
Returns True if the game has reached a terminal state and
False otherwise.
This state is game dependent.
Returns
-------
bool
"""
return self.game.game_over()
def score(self):
"""
Gets the score the agent currently has in game.
Returns
-------
int
"""
return self.game.get_scores()
def reset_game(self):
"""
Performs a reset of the games to a clean initial state.
"""
self.last_action = []
self.action = []
self.previous_scores = {}
self.game.reset()
def get_screen_dims(self):
"""
Gets the games screen dimensions.
Returns
-------
tuple of int
Returns a tuple of the following format (screen_width, screen_height).
"""
return self.game.get_screen_dims()
def get_game_state(self):
"""
Gets a non-visual state representation of the game.
This can include items such as player position, velocity, ball location and velocity etc.
Returns
-------
dict or None
It returns a dict of game information. This greatly depends on the game in question and must be referenced against each game.
If no state is available or supported None will be returned back.
"""
state = self.game.get_game_state()
if state is not None:
if self.state_preprocessor is not None:
return self.state_preprocessor(state)
return state
else:
raise ValueError(
"Was asked to return state vector for game that does not support it!")
def act(self, action, event_type):
"""
Perform an action on the game. We lockstep frames with actions. If act is not called the game will not run.
Parameters
----------
action : (x, y)
The index of the action we wish to perform. The index usually corresponds to the index item returned by getActionSet().
event_type : int
event type
Returns
-------
list
Returns the reward that the agent has accumlated while performing the action.
"""
return self._one_step_act(action, event_type) # for i in range(self.frame_skip)
def _draw_frame(self):
"""
Decides if the screen will be drawn too
"""
self.game.draw_frame(self.display_screen)
def _one_step_act(self, action, event_type):
"""
Performs an action on the game. Checks if the game is over or if the provided action is valid based on the allowed action set.
"""
self._set_action(action, event_type)
for i in range(self.num_steps):
time_elapsed = self._tick()
self.game.step(time_elapsed)
self._draw_frame()
self.frame_count += self.num_steps
return self._get_reward()
def _set_action(self, action, event_type):
"""
Instructs the game to perform an action if its not a NOOP
"""
if action is not None:
self.game.set_action(action, self.last_action, event_type)
self.last_action = action
def _get_reward(self):
"""
Returns the reward the agent has gained as the difference between the last action and the current one.
"""
sc = copy.deepcopy(self.game.get_scores())
reward = {key: sc[key] - self.previous_scores.get(key, 2) for key in sc}
self.previous_scores = sc
return reward