Skip to content

Commit d3fb093

Browse files
committed
graph and checkpoint folder structure, md hyperlink
1 parent bfeeb3b commit d3fb093

14 files changed

+25
-29
lines changed

DRLTP1PolicyGradient/REINFORCEplayingloop.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def play_REINFORCE_agent_discrete(env='CartPole-v0'):
6060
saver = tf_cv1.train.Saver()
6161

6262
with tf_cv1.Session() as sess:
63-
saver.restore(sess, 'checkpoint_directory/REINFORCE_agent-49')
63+
saver.restore(sess, 'graph/saved_training/REINFORCE_agent-39')
6464

6565
while True: #keep playing
6666
# for run in range(3): #recorder version

DRLTP1PolicyGradient/REINFORCEtrainingloop.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,18 @@
55
# region ::Import statement ...
66
import tensorflow as tf
77
tf_cv1 = tf.compat.v1 # shortcut
8+
import tensorflow.python.util.deprecation as deprecation
9+
deprecation._PRINT_DEPRECATION_WARNINGS = False
10+
811
import numpy as np
912
import matplotlib.pyplot as plt
1013
from datetime import datetime
1114

15+
from REINFORCEbrain import REINFORCE_policy
1216
from blocAndTools import buildingbloc as bloc
1317
from blocAndTools.buildingbloc import ExperimentSpec, GymPlayground
14-
from REINFORCEbrain import REINFORCE_policy
1518
from blocAndTools.visualisationtools import ConsolPrintLearningStats
1619
from blocAndTools.samplecontainer import TrajectoryCollector, UniformBatchCollector
17-
18-
# import tensorflow_weak_warning_supressor as no_cpu_compile_warn
19-
# no_cpu_compile_warn.execute()
20-
2120
from blocAndTools.rl_vocabulary import rl_name
2221
vocab = rl_name()
2322
# endregion
@@ -30,7 +29,7 @@
3029
""" --- TensorBoard ----------------------------------------------------------------------------------------------------
3130
3231
Start TensorBoard in terminal:
33-
tensorboard --logdir=DRLTP1PolicyGradient/graph/
32+
tensorboard --logdir=DRLTP1PolicyGradient/graph/runs
3433
3534
In browser, go to:
3635
http://0.0.0.0:6006/
@@ -117,7 +116,7 @@ def train_REINFORCE_agent_discrete(render_env=None, discounted_reward_to_go=None
117116
if RENDER_ENV is not None:
118117
render_env = RENDER_ENV
119118

120-
print("\n\n:: Environment rendering: {}".format(render_env))
119+
print("\n\n:: Environment rendering: {}\n\n".format(render_env))
121120

122121
consol_print_learning_stats = ConsolPrintLearningStats(exp_spec, exp_spec.print_metric_every_what_epoch)
123122

@@ -152,7 +151,7 @@ def train_REINFORCE_agent_discrete(render_env=None, discounted_reward_to_go=None
152151

153152

154153
""" ---- Setup parameters saving ---- """
155-
saver = tf.train.Saver()
154+
saver = tf_cv1.train.Saver()
156155

157156

158157
""" ---- Warm-up the computation graph and start learning! ---- """
@@ -258,8 +257,8 @@ def train_REINFORCE_agent_discrete(render_env=None, discounted_reward_to_go=None
258257

259258
""" ---- Save learned model ---- """
260259
if batch_average_trjs_return == 200:
261-
saver.save(sess, 'checkpoint_directory/REINFORCE_agent', global_step=epoch)
262-
print("\n::Policy_theta parameters were saved\n")
260+
saver.save(sess, 'graph/checkpoint_directory/REINFORCE_agent', global_step=epoch)
261+
print("\n\n :: Policy_theta parameters were saved\n")
263262

264263
consol_print_learning_stats.print_experiment_stats()
265264
writer.close()

DRLTP1PolicyGradient/blocAndTools/buildingbloc.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,10 @@
77
# import env_spec_pretty_printing
88
import numpy as np
99
import tensorflow as tf
10-
1110
tf_cv1 = tf.compat.v1 # shortcut
1211

13-
# import tensorflow_weak_warning_supressor as no_cpu_compile_warn
14-
# no_cpu_compile_warn.execute()
12+
import tensorflow.python.util.deprecation as deprecation
13+
deprecation._PRINT_DEPRECATION_WARNINGS = False
1514

1615
from blocAndTools.rl_vocabulary import rl_name
1716
vocab = rl_name()
@@ -107,10 +106,9 @@ def set_experiment_spec(self, dict_param: dict):
107106

108107
self._assert_param()
109108

110-
print("\n\n>>> Switching to parameter: {}".format(self.paramameter_set_name),
109+
print("\n\n:: Switching to parameter: {}".format(self.paramameter_set_name),
111110
self.get_agent_training_spec(),
112-
self.get_neural_net_spec(),
113-
"\n")
111+
self.get_neural_net_spec())
114112
return None
115113

116114

@@ -173,12 +171,12 @@ def __init__(self, environment_name='LunarLanderContinuous-v2', print_env_info=F
173171

174172
info_str = ""
175173
if isinstance(self._env.action_space, gym.spaces.Box):
176-
info_str += "\n\n>>> Action space is Contiuous"
174+
info_str += "\n\n:: Action space is Contiuous\n"
177175
self.ACTION_SPACE = self._env.action_space
178176
dimension = self.ACTION_SPACE.shape
179177
self.ACTION_CHOICES = [*dimension][-1]
180178
else:
181-
info_str += "\n\n>>> Action space is Discrete"
179+
info_str += "\n\n:: Action space is Discrete\n"
182180
self.ACTION_SPACE = self._env.action_space
183181
self.ACTION_CHOICES = self.ACTION_SPACE.n
184182

@@ -201,7 +199,6 @@ def __init__(self, environment_name='LunarLanderContinuous-v2', print_env_info=F
201199
def env(self) -> Union[TimeLimit, Any]:
202200
return self._env
203201

204-
205202
def get_environment_spec(self):
206203
"""
207204
Return specification related to the gym environment

DRLTP1PolicyGradient/blocAndTools/referenceimplementation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def train(env_name='CartPole-v0', hidden_sizes=[32], lr=1e-2, epochs=50, batch_s
105105
# \\\\\\ My bloc \\\\\\
106106
date_now = datetime.now()
107107
run_str = "Run--{}h{}--{}-{}-{}".format(date_now.hour, date_now.minute, date_now.day, date_now.month, date_now.year)
108-
writer = tf_cv1.summary.FileWriter("./test_integration/graph/integration_test/{}".format(run_str), tf_cv1.get_default_graph())
108+
writer = tf_cv1.summary.FileWriter("./test_integration/graph/{}".format(run_str), tf_cv1.get_default_graph())
109109

110110
the_TRAJECTORY_COLLECTOR = TrajectoryCollector(exp_spec, playground) # \\\\\\ My bloc \\\\\\
111111
the_UNI_BATCH_COLLECTOR = UniformBatchCollector(exp_spec.batch_size_in_ts) # \\\\\\ My bloc \\\\\\

DRLTP1PolicyGradient/blocAndTools/visualisationtools.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,11 @@ def epoch_training_stat(self, epoch_loss, epoch_average_trjs_return, epoch_avera
148148
smoothed_return = self.return_smoothing_buffer / self.print_metric_every
149149
smoothed_lenght = self.lenght_smoothing_buffer / self.print_metric_every
150150
print(
151-
"\r ↳ {:^3}".format(self.epoch),
151+
"\r ↳ {:^3}".format(self.epoch),
152152
":: Collected {} trajectories for a total of {} timestep.".format(
153153
self.number_of_trj_collected, self.total_timestep_collected),
154-
"\n ↳ pseudo loss: {:>6.3f} ".format(self.epoch_loss),
155-
"| average trj return: {:>6.3f} | average trj lenght: {:>6.3f}".format(
154+
"\n ↳ pseudo loss: {:>6.2f} ".format(self.epoch_loss),
155+
"| average trj return: {:>6.2f} | average trj lenght: {:>6.2f}".format(
156156
self.average_trjs_return, self.average_trjs_lenght),
157157
end="\n", flush=True)
158158

@@ -194,7 +194,7 @@ def trajectory_training_stat(self, the_trajectory_return, timestep) -> None:
194194
:return:
195195
:rtype: None
196196
"""
197-
print("\r ↳ {:^3} :: Trajectory {:>4} ".format(self.epoch, self.trj),
197+
print("\r ↳ {:^3} :: Trajectory {:>4} ".format(self.epoch, self.trj),
198198
">"*self.cycle_indexer.i, " "*self.cycle_indexer.j,
199199
" got return {:>8.2f} after {:>4} timesteps".format(
200200
the_trajectory_return, timestep),
@@ -233,7 +233,7 @@ def ultra_basic_ploter(epoch_average_return: list, epoch_average_loss: list, epo
233233

234234
x_axes = np.arange(0, len(epoch_average_return)) * metric_computed_every_what_epoch
235235
ax.plot(x_axes, epoch_average_return, label='Average Return')
236-
ax.plot(x_axes, epoch_average_loss, label='Average loss')
236+
ax.plot(x_axes, epoch_average_loss, label='Average pseudo loss')
237237
ax.plot(x_axes, epoch_average_lenght, label='Average lenght')
238238

239239
plt.xlabel('Epoch')
Binary file not shown.
Binary file not shown.
Binary file not shown.

DRLTP1PolicyGradient/tests/test_integration/test_intergrationreinforce.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
"""
77
Start TensorBoard in terminal:
8-
tensorboard --logdir=tests/graph/integration_test
8+
tensorboard --logdir=DRLTP1PolicyGradient/tests/graph/integration_test
99
1010
In browser, go to:
1111
http://0.0.0.0:6006/

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,13 @@ python REINFORCEtrainingloop.py
6767
6868
**To navigate trough the computation graph in TensorBoard**
6969
```bash
70-
tensorboard --logdir=DRL-TP1-Policy-Gradient/graph/
70+
tensorboard --logdir=DRLTP1PolicyGradient/graph/runs
7171
```
7272
7373
To see [video example](video/)
7474
7575
76-
![Training run](video/training_run.png)
76+
![Training run](video/training_run_3.png)
7777
7878
---
7979

video/cartpole_0.mp4

-18.6 KB
Binary file not shown.

video/cartpole_1.mp4

-19 KB
Binary file not shown.

video/training_run.png

-47.5 KB
Binary file not shown.

video/training_run_3.png

57.4 KB
Loading

0 commit comments

Comments
 (0)