-
Notifications
You must be signed in to change notification settings - Fork 0
/
Main.py
127 lines (102 loc) · 6.05 KB
/
Main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
'''
train a DQN with MCTS
See if DQN agrees with DQN (saving the best path)
At what step DQN starts to work with MCTS
'''
# use both mcts trajectories and dqn trajectories in the dqn buffer
# use the selection path but with all children in the path
# rollout with dqn policy in mcts
import threading
import time
import Utils as utils, Config as config
from Experiments.ExperimentObject import ExperimentObject
from Experiments.GridWorldExperiment import RunExperiment as GridWorld_RunExperiment
from Environments.GridWorldRooms import GridWorldRooms
from Agents.BaseDynaAgent import BaseDynaAgent
from Agents.RealBaseDynaAgent import RealBaseDynaAgent
from Agents.ImperfectDQNMCTSAgent import *
from Agents.MCTSAgent_Torch import *
# from Agents.MCTSAgent import MCTSAgent
# from Agents.DQNMCTSAgent import *
if __name__ == '__main__':
# agent_class_list = [BaseDynaAgent]
# agent_class_list = [DQNMCTSAgent_MCTSPolicy]
# agent_class_list = [DQNMCTSAgent_InitialValue]
# agent_class_list = [DQNMCTSAgent_BootstrapInitial]
# agent_class_list = [DQNMCTSAgent_Bootstrap]
# agent_class_list = [MCTSAgent]
# agent_class_list = [DQNMCTSAgent_UseTreeExpansion]
# agent_class_list = [DQNMCTSAgent_UseTreeSelection]
# agent_class_list = [DQNMCTSAgent_Rollout]
# agent_class_list = [DQNMCTSAgent_MCTSSelectedAction]
# agent_class_list = [DQNMCTSAgent_UseSelectedAction]
# agent_class_list = [DQNMCTSAgent_UseMCTSwPriority]
# agent_class_list = [DQNMCTSAgent_InitialValue_offline]
# agent_class_list = [DQNMCTSAgent_ReduceBreadth]
# agent_class_list = [RealBaseDynaAgent]
# agent_class_list = [ImperfectMCTSAgent]
# agent_class_list = [ImperfectMCTSAgentUncertainty]
agent_class_list = [ImperfectMCTSAgentIdeas]
# agent_class_list = [MCTSAgent_Torch]
# show_pre_trained_error_grid = [False, False],
# show_values_grid = [False, False],
# show_model_error_grid = [False, False]
# s_vf_list = [2 ** -5, 2 ** -7, 2 ** -9, 2 ** -11]
s_vf_list = [2 ** -12]
# s_md_list = [2 ** -2, 2 ** -4, 2 ** -6, 2 ** -8, 2 ** -10, 2 ** -12, 2 ** -14, 2 ** -16]
s_md_list = [2 ** -6]
# c_list = [2 ** -1, 2 ** 0, 2 ** 0.5, 2 ** 1]
c_list = [2 ** 0.5]
num_iteration_list = [100]#[i for i in range(30, 40, 10)]
simulation_depth_list = [75]
# simulation_depth_list = [5, 10, 75]
# num_simulation_list = [10]
num_simulation_list = [1]
# model_list = [{'type':'forward', 'num_networks':1, 'layers_type':['fc'], 'layers_features':[128]},
# {'type': 'forward', 'num_networks': 2, 'layers_type': ['fc'], 'layers_features': [64]},
# {'type': 'forward', 'num_networks': 4, 'layers_type': ['fc'], 'layers_features': [32]}
# ]
model_list = [{'type': 'heter', 'layers_type': ['fc', 'fc'], 'layers_features': [32, 16], 'action_layer_num': 3}]
vf_list = [
# {'type': 'q', 'layers_type': ['fc', 'fc'], 'layers_features': [64, 64], 'action_layer_num': 3},
# {'type': 'q', 'layers_type': ['fc', 'fc'], 'layers_features': [32, 32], 'action_layer_num': 3},
# {'type': 'q', 'layers_type': ['fc', 'fc'], 'layers_features': [16, 16], 'action_layer_num': 3},
# {'type': 'q', 'layers_type': ['fc', 'fc'], 'layers_features': [16, 8], 'action_layer_num': 3},
# {'type': 'q', 'layers_type': ['fc', 'fc'], 'layers_features': [8, 8], 'action_layer_num': 3},
{'type': 'q', 'layers_type': ['fc', 'fc'], 'layers_features': [4, 4], 'action_layer_num': 3}
]
experiment = GridWorld_RunExperiment()
experiment_object_list = []
for agent_class in agent_class_list:
for s_vf in s_vf_list:
for model in model_list:
for vf in vf_list:
for s_md in s_md_list:
for c in c_list:
for num_iteration in num_iteration_list:
for simulation_depth in simulation_depth_list:
for num_simulation in num_simulation_list:
params = {'pre_trained': None,
'vf_step_size': s_vf,
'vf': vf,
'model': model,
'model_step_size': s_md,
'c': c,
'num_iteration': num_iteration,
'simulation_depth': simulation_depth,
'num_simulation': num_simulation}
obj = ExperimentObject(agent_class, params)
experiment_object_list.append(obj)
# x = time.time()
# detail = "Env = 4room - 4x4; Not keep subtree; max_episode = 100; Pretrained DQN - DQN VF: 16x8 dqn_vf_9.p"
# detail = "Env = Empty Room; _n = 20; max_episode = 100; Pretrained DQN - DQN VF: 16x8 dqn_vf_5.p"
# experiment.run_experiment(experiment_object_list, result_file_name="DQNMCTS_InitialValue_PretrainedDQN_AutoImperfect15", detail=detail)
# experiment.run_experiment(experiment_object_list, result_file_name="DQNMCTS_BootstrapInitial_PretrainedDQN_AutoImperfect15", detail=detail)
# experiment.run_experiment(experiment_object_list, result_file_name="DQNMCTS_BootstrapInitial_PretrainedDQN_AutoImperfect_prob=0.025_step=1", detail=detail)
# detail = "Env = 4room - 4x4; Not keep subtree; max_episode = 100"
# experiment.run_experiment(experiment_object_list, result_file_name="ddd", detail=detail)
detail = "Env = 4room - 4x4; Not keep subtree; max_episode = 100"
experiment.run_experiment(experiment_object_list, result_file_name="ImperfectMCTSAgentIdeas_S7P25_SelectionDivLinear2m4_2", detail=detail)
# detail = "Env = Empty Room; _n = 20; max_episode = 100"
# experiment.run_experiment(experiment_object_list, result_file_name="MCTS_BestParameter", detail=detail)
# print(time.time() - x)