RedLeader962
diff --git a/‎DRLTP1PolicyGradient/REINFORCEplayingloop.py
+1-1 b/‎DRLTP1PolicyGradient/REINFORCEplayingloop.py
+1-1
diff --git a/‎DRLTP1PolicyGradient/REINFORCEtrainingloop.py
+9-10 b/‎DRLTP1PolicyGradient/REINFORCEtrainingloop.py
+9-10
diff --git a/‎DRLTP1PolicyGradient/blocAndTools/buildingbloc.py
+6-9 b/‎DRLTP1PolicyGradient/blocAndTools/buildingbloc.py
+6-9
diff --git a/‎DRLTP1PolicyGradient/blocAndTools/referenceimplementation.py
+1-1 b/‎DRLTP1PolicyGradient/blocAndTools/referenceimplementation.py
+1-1
diff --git a/‎DRLTP1PolicyGradient/blocAndTools/visualisationtools.py
+5-5 b/‎DRLTP1PolicyGradient/blocAndTools/visualisationtools.py
+5-5
diff --git a/‎DRLTP1PolicyGradient/graph/saved_training/REINFORCE_agent-39.data-00000-of-00001
5.12 KB b/‎DRLTP1PolicyGradient/graph/saved_training/REINFORCE_agent-39.data-00000-of-00001
5.12 KB
diff --git a/‎DRLTP1PolicyGradient/graph/saved_training/REINFORCE_agent-39.index
515 Bytes b/‎DRLTP1PolicyGradient/graph/saved_training/REINFORCE_agent-39.index
515 Bytes
diff --git a/‎DRLTP1PolicyGradient/graph/saved_training/REINFORCE_agent-39.meta
52.4 KB b/‎DRLTP1PolicyGradient/graph/saved_training/REINFORCE_agent-39.meta
52.4 KB
diff --git a/‎DRLTP1PolicyGradient/tests/test_integration/test_intergrationreinforce.py
+1-1 b/‎DRLTP1PolicyGradient/tests/test_integration/test_intergrationreinforce.py
+1-1
diff --git a/‎README.md
+2-2 b/‎README.md
+2-2
diff --git a/‎video/cartpole_0.mp4
-18.6 KB b/‎video/cartpole_0.mp4
-18.6 KB
diff --git a/‎video/cartpole_1.mp4
-19 KB b/‎video/cartpole_1.mp4
-19 KB
diff --git a/‎video/training_run.png
-47.5 KB b/‎video/training_run.png
-47.5 KB
diff --git a/‎video/training_run_3.png
57.4 KB b/‎video/training_run_3.png
57.4 KB
@@ -60,7 +60,7 @@ def play_REINFORCE_agent_discrete(env='CartPole-v0'):
     saver = tf_cv1.train.Saver()
 
     with tf_cv1.Session() as sess:
-        saver.restore(sess, 'checkpoint_directory/REINFORCE_agent-49')
+        saver.restore(sess, 'graph/saved_training/REINFORCE_agent-39')
 
         while True: #keep playing
         # for run in range(3):      #recorder version
 
@@ -5,19 +5,18 @@
 # region ::Import statement ...
 import tensorflow as tf
 tf_cv1 = tf.compat.v1   # shortcut
+import tensorflow.python.util.deprecation as deprecation
+deprecation._PRINT_DEPRECATION_WARNINGS = False
+
 import numpy as np
 import matplotlib.pyplot as plt
 from datetime import datetime
 
+from REINFORCEbrain import REINFORCE_policy
 from blocAndTools import buildingbloc as bloc
 from blocAndTools.buildingbloc import ExperimentSpec, GymPlayground
-from REINFORCEbrain import REINFORCE_policy
 from blocAndTools.visualisationtools import ConsolPrintLearningStats
 from blocAndTools.samplecontainer import TrajectoryCollector, UniformBatchCollector
-
-# import tensorflow_weak_warning_supressor as no_cpu_compile_warn
-# no_cpu_compile_warn.execute()
-
 from blocAndTools.rl_vocabulary import rl_name
 vocab = rl_name()
 # endregion
@@ -30,7 +29,7 @@
 """ --- TensorBoard ----------------------------------------------------------------------------------------------------
 
 Start TensorBoard in terminal:
-    tensorboard --logdir=DRLTP1PolicyGradient/graph/
+    tensorboard --logdir=DRLTP1PolicyGradient/graph/runs
 
 In browser, go to:
     http://0.0.0.0:6006/ 
@@ -117,7 +116,7 @@ def train_REINFORCE_agent_discrete(render_env=None, discounted_reward_to_go=None
     if RENDER_ENV is not None:
         render_env = RENDER_ENV
 
-    print("\n\n:: Environment rendering: {}".format(render_env))
+    print("\n\n:: Environment rendering: {}\n\n".format(render_env))
 
     consol_print_learning_stats = ConsolPrintLearningStats(exp_spec, exp_spec.print_metric_every_what_epoch)
 
@@ -152,7 +151,7 @@ def train_REINFORCE_agent_discrete(render_env=None, discounted_reward_to_go=None
 
 
     """ ---- Setup parameters saving ---- """
-    saver = tf.train.Saver()
+    saver = tf_cv1.train.Saver()
 
 
     """ ---- Warm-up the computation graph and start learning! ---- """
@@ -258,8 +257,8 @@ def train_REINFORCE_agent_discrete(render_env=None, discounted_reward_to_go=None
 
             """ ---- Save learned model ---- """
             if batch_average_trjs_return == 200:
-                saver.save(sess, 'checkpoint_directory/REINFORCE_agent', global_step=epoch)
-                print("\n::Policy_theta parameters were saved\n")
+                saver.save(sess, 'graph/checkpoint_directory/REINFORCE_agent', global_step=epoch)
+                print("\n\n    :: Policy_theta parameters were saved\n")
 
     consol_print_learning_stats.print_experiment_stats()
     writer.close()
 
@@ -7,11 +7,10 @@
 # import env_spec_pretty_printing
 import numpy as np
 import tensorflow as tf
-
 tf_cv1 = tf.compat.v1   # shortcut
 
-# import tensorflow_weak_warning_supressor as no_cpu_compile_warn
-# no_cpu_compile_warn.execute()
+import tensorflow.python.util.deprecation as deprecation
+deprecation._PRINT_DEPRECATION_WARNINGS = False
 
 from blocAndTools.rl_vocabulary import rl_name
 vocab = rl_name()
@@ -107,10 +106,9 @@ def set_experiment_spec(self, dict_param: dict):
 
         self._assert_param()
 
-        print("\n\n>>> Switching to parameter: {}".format(self.paramameter_set_name),
+        print("\n\n:: Switching to parameter: {}".format(self.paramameter_set_name),
               self.get_agent_training_spec(),
-              self.get_neural_net_spec(),
-              "\n")
+              self.get_neural_net_spec())
         return None
 
 
@@ -173,12 +171,12 @@ def __init__(self, environment_name='LunarLanderContinuous-v2', print_env_info=F
 
         info_str = ""
         if isinstance(self._env.action_space, gym.spaces.Box):
-            info_str += "\n\n>>> Action space is Contiuous"
+            info_str += "\n\n:: Action space is Contiuous\n"
             self.ACTION_SPACE = self._env.action_space
             dimension = self.ACTION_SPACE.shape
             self.ACTION_CHOICES = [*dimension][-1]
         else:
-            info_str += "\n\n>>> Action space is Discrete"
+            info_str += "\n\n:: Action space is Discrete\n"
             self.ACTION_SPACE = self._env.action_space
             self.ACTION_CHOICES = self.ACTION_SPACE.n
 
@@ -201,7 +199,6 @@ def __init__(self, environment_name='LunarLanderContinuous-v2', print_env_info=F
     def env(self) -> Union[TimeLimit, Any]:
         return self._env
 
-
     def get_environment_spec(self):
         """
         Return specification related to the gym environment
 
@@ -105,7 +105,7 @@ def train(env_name='CartPole-v0', hidden_sizes=[32], lr=1e-2, epochs=50, batch_s
     # \\\\\\    My bloc    \\\\\\
     date_now = datetime.now()
     run_str = "Run--{}h{}--{}-{}-{}".format(date_now.hour, date_now.minute, date_now.day, date_now.month, date_now.year)
-    writer = tf_cv1.summary.FileWriter("./test_integration/graph/integration_test/{}".format(run_str), tf_cv1.get_default_graph())
+    writer = tf_cv1.summary.FileWriter("./test_integration/graph/{}".format(run_str), tf_cv1.get_default_graph())
 
     the_TRAJECTORY_COLLECTOR = TrajectoryCollector(exp_spec, playground)                   # \\\\\\    My bloc    \\\\\\
     the_UNI_BATCH_COLLECTOR = UniformBatchCollector(exp_spec.batch_size_in_ts)             # \\\\\\    My bloc    \\\\\\
 
@@ -148,11 +148,11 @@ def epoch_training_stat(self, epoch_loss, epoch_average_trjs_return, epoch_avera
             smoothed_return = self.return_smoothing_buffer / self.print_metric_every
             smoothed_lenght = self.lenght_smoothing_buffer / self.print_metric_every
             print(
-                "\r      ↳ {:^3}".format(self.epoch),
+                "\r     ↳ {:^3}".format(self.epoch),
                 ":: Collected {} trajectories for a total of {} timestep.".format(
                     self.number_of_trj_collected, self.total_timestep_collected),
-                "\n         ↳ pseudo loss: {:>6.3f} ".format(self.epoch_loss),
-                "| average trj return: {:>6.3f} | average trj lenght: {:>6.3f}".format(
+                "\n        ↳ pseudo loss: {:>6.2f} ".format(self.epoch_loss),
+                "| average trj return: {:>6.2f} | average trj lenght: {:>6.2f}".format(
                     self.average_trjs_return, self.average_trjs_lenght),
                 end="\n", flush=True)
 
@@ -194,7 +194,7 @@ def trajectory_training_stat(self, the_trajectory_return, timestep) -> None:
         :return:
         :rtype: None
         """
-        print("\r      ↳ {:^3} :: Trajectory {:>4}  ".format(self.epoch, self.trj),
+        print("\r     ↳ {:^3} :: Trajectory {:>4}  ".format(self.epoch, self.trj),
               ">"*self.cycle_indexer.i, " "*self.cycle_indexer.j,
               "  got return {:>8.2f}   after  {:>4}  timesteps".format(
                   the_trajectory_return, timestep),
@@ -233,7 +233,7 @@ def ultra_basic_ploter(epoch_average_return: list, epoch_average_loss: list, epo
 
     x_axes = np.arange(0, len(epoch_average_return)) * metric_computed_every_what_epoch
     ax.plot(x_axes, epoch_average_return, label='Average Return')
-    ax.plot(x_axes, epoch_average_loss, label='Average loss')
+    ax.plot(x_axes, epoch_average_loss, label='Average pseudo loss')
     ax.plot(x_axes, epoch_average_lenght, label='Average lenght')
 
     plt.xlabel('Epoch')
 
@@ -5,7 +5,7 @@
 
 """
 Start TensorBoard in terminal:
-    tensorboard --logdir=tests/graph/integration_test
+    tensorboard --logdir=DRLTP1PolicyGradient/tests/graph/integration_test
     
 In browser, go to:
     http://0.0.0.0:6006/ 
 
@@ -67,13 +67,13 @@ python REINFORCEtrainingloop.py
 
 **To navigate trough the computation graph in TensorBoard**
 ```bash
-tensorboard --logdir=DRL-TP1-Policy-Gradient/graph/
+tensorboard --logdir=DRLTP1PolicyGradient/graph/runs
 ```
 
 To see [video example](video/) 
 
 
-![Training run](video/training_run.png)
+![Training run](video/training_run_3.png)
 
 ---