-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgogym_play.py
64 lines (49 loc) · 2.06 KB
/
gogym_play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gym
from gym_go import gogame
from utils import *
# from neuralnet import *
# we can change these args as we go
args = dotdict({
'numIters': 1000,
# Number of complete self-play games to simulate during a new iteration.
'numEps': 100,
'tempThreshold': 15, #
# During arena playoff, new neural net will be accepted if threshold or more of games are won.
'updateThreshold': 0.6,
# Number of game examples to train the neural networks.
'maxlenOfQueue': 200000,
'numMCTSSims': 25, # Number of games moves for MCTS to simulate.
# Number of games to play during arena play to determine if new net will be accepted.
'arenaCompare': 40,
'cpuct': 1,
'checkpoint': './temp/',
'load_model': False,
'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
'numItersForTrainExamplesHistory': 20,
})
if __name__ == "__main__":
go_env = gym.make('gym_go:go-v0', size=19, komi=0, reward_method='real')
# print(go_env)
# initialize the environment
go_env.reset() # this is necessary
# nnet = AlphaGoLite_Network()
print("go_env.render('terminal'):\n")
go_env.render('terminal')
# initial_state_copy = go_env.state()
# # visual representation of board
# print(gogame.str(initial_state_copy))
actions = ((18, 18), (0, 0), (0, 2), (2, 17))
for action in actions:
# the state is a class of instance go game. We can use
# this state to perform mcts and evaluation.
(state, past_states_w_player), reward, done, info = go_env.step(action)
assert (go_env.past_states_with_player == past_states_w_player).all()
print(f"Action taken: {action}")
go_env.render('terminal')
print("past_states_w_player:\n")
print(past_states_w_player[:4], past_states_w_player.shape)
print('\nvalid moves:\n')
print(go_env.valid_moves(), go_env.valid_moves().shape)
print(f"It is {go_env.turn()}'s turn\n")
# valid_moves = go_env.valid_moves()[:-1].reshape((19, 19))
# assert valid_moves[18, 18] == 0.