-
Notifications
You must be signed in to change notification settings - Fork 0
/
game_simulator.py
86 lines (71 loc) · 3.28 KB
/
game_simulator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import argparse
import game
import logging
import numpy as np
import game_model
import random
import tensorflow as tf
parser = argparse.ArgumentParser()
parser.add_argument(
'-d', '--debug',
help="Print lots of debugging statements",
action="store_const", dest="loglevel", const=logging.INFO,
default=logging.INFO)
parser.add_argument('--use_ai', help="choose to use ai", action="store_true", default=False)
parser.add_argument('--model_path')
parser.add_argument('--log_name', default='/tmp/game.log')
parser.add_argument('--exp_name', help="choose to use ai", default='d1')
args = parser.parse_args()
logging.basicConfig(filename=args.log_name, level=args.loglevel)
class GameSimulator(object):
def __init__(self, game_instance, model_path, use_ai=True, exp_name="d1"):
self._game_instance = game_instance
self._game_model = game_model.GameModel("train", 1)
self._target_board_q = self._game_model.get_internal_variable().get("action_q")
self._sess = tf.Session()
self._use_ai = use_ai
# Load the existing model.
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(model_path + exp_name)
saver.restore(self._sess, ckpt.model_checkpoint_path)
def eval(self):
feed_dict = self._game_model.create_feed_dict(
1, self._game_instance.get_board(), max(self._game_instance.get_board(), None, None))
board_q_value = self._sess.run([self._target_board_q], feed_dict=feed_dict)
return board_q_value[0][0]
def get_game(self):
return self._game_instance;
def move(self):
raw_directions = self._game_instance.get_valid_directions(False)
board_score_dict = self._game_instance.get_next_boards(raw_directions)
if self._use_ai:
next_score_list = self.eval()
else:
next_score_list = map(lambda k: board_score_dict[k][1], board_score_dict)[0:4]
# max score must be in the set of valid direction.
max_score = max(map(lambda i: next_score_list[i], self._game_instance.get_valid_directions()))
max_direction = []
for i in xrange(0, 4):
# The direction must be valid direction.
if i in self._game_instance.get_valid_directions() and abs(next_score_list[i] - max_score) < 0.0001:
max_direction.append(i)
self._game_instance.move(random.choice(max_direction))
if __name__ == "__main__":
simulator = GameSimulator(game.Game(), args.model_path, args.use_ai, args.exp_name)
step_list = [] # For stat
score_list = [] # For stat
total_simulation_counter = 1000
for x in xrange(total_simulation_counter):
simulator.get_game().reset()
while len(simulator.get_game().get_valid_directions()) > 0:
simulator.move()
simulator.get_game().display()
log_content = "step=%d\tmax=%d\tscore=%d" % (simulator.get_game().get_action_counter(),
max(simulator.get_game().get_board()),
simulator.get_game().get_score())
logging.info("Record id:%05d info: %s" % (x, log_content))
print "Final result: %05d %s" % (x, log_content)
step_list.append(simulator.get_game().get_action_counter())
score_list.append(simulator.get_game().get_score())
print "Average step %d, Average score %d" % (sum(step_list) / total_simulation_counter,
sum(score_list) / total_simulation_counter)