RedLeader962
diff --git a/‎DRLTP1PolicyGradient/REINFORCEplayingloop.py
+6-11 b/‎DRLTP1PolicyGradient/REINFORCEplayingloop.py
+6-11
diff --git a/‎DRLTP1PolicyGradient/blocAndTools/visualisationtools.py
+6-6 b/‎DRLTP1PolicyGradient/blocAndTools/visualisationtools.py
+6-6
diff --git a/‎README.md
+29-2 b/‎README.md
+29-2
diff --git a/‎requirements.txt
-5 b/‎requirements.txt
-5
diff --git a/‎setup.py
+14-1 b/‎setup.py
+14-1
diff --git a/‎tests/__init__.py b/‎tests/__init__.py
diff --git a/‎tests/test_exploration/__init__.py
-1 b/‎tests/test_exploration/__init__.py
-1
diff --git a/‎tests/test_exploration/test_python_myIncrement.py
-16 b/‎tests/test_exploration/test_python_myIncrement.py
-16
diff --git a/‎tests/test_integration/__init__.py
-1 b/‎tests/test_integration/__init__.py
-1
diff --git a/‎tests/test_integration/test_intergrationreinforce.py
-115 b/‎tests/test_integration/test_intergrationreinforce.py
-115
diff --git a/‎tests/test_unit/__init__.py
-1 b/‎tests/test_unit/__init__.py
-1
@@ -1,28 +1,25 @@
 # coding=utf-8
 
-# coding=utf-8
 from __future__ import absolute_import, division, print_function, unicode_literals
 
-
 # region ::Import statement ...
 import tensorflow as tf
 tf_cv1 = tf.compat.v1   # shortcut
 
 from blocAndTools import buildingbloc as bloc
 from blocAndTools.buildingbloc import ExperimentSpec, GymPlayground
 from REINFORCEbrain import REINFORCE_policy
-
-# import tensorflow_weak_warning_supressor as no_cpu_compile_warn
-# no_cpu_compile_warn.execute()
-
 from blocAndTools.rl_vocabulary import rl_name
 vocab = rl_name()
 # endregion
 
 
 def play_REINFORCE_agent_discrete(env='CartPole-v0'):
-    exp_spec = ExperimentSpec()
+    """
+    Execute playing loop of a previously trained REINFORCE agent in the 'CartPole-v0' environment
 
+    """
+    exp_spec = ExperimentSpec()
 
     cartpole_param_dict_2 = {
         'prefered_environment': 'CartPole-v0',
@@ -47,7 +44,7 @@ def play_REINFORCE_agent_discrete(env='CartPole-v0'):
 
     # * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
     # *                                                                                                               *
-    # *                                  Build computation graph & data collector                                     *
+    # *                                          Build computation graph                                              *
     # *                                                                                                               *
     # * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 
@@ -60,9 +57,7 @@ def play_REINFORCE_agent_discrete(env='CartPole-v0'):
     reinforce_policy = REINFORCE_policy(observation_ph, action_ph, Q_values_ph, exp_spec, playground)
     (policy_action_sampler, theta_mlp, pseudo_loss) = reinforce_policy
 
-
-
-    saver = tf.train.Saver()
+    saver = tf_cv1.train.Saver()
 
     with tf_cv1.Session() as sess:
         saver.restore(sess, 'checkpoint_directory/REINFORCE_agent-49')
 
@@ -148,20 +148,20 @@ def epoch_training_stat(self, epoch_loss, epoch_average_trjs_return, epoch_avera
             smoothed_return = self.return_smoothing_buffer / self.print_metric_every
             smoothed_lenght = self.lenght_smoothing_buffer / self.print_metric_every
             print(
-                "\r\t ↳ {:^3}".format(self.epoch),
+                "\r      ↳ {:^3}".format(self.epoch),
                 ":: Collected {} trajectories for a total of {} timestep.".format(
                     self.number_of_trj_collected, self.total_timestep_collected),
-                "\n\t\t↳ pseudo loss: {:>6.3f} ".format(self.epoch_loss),
+                "\n         ↳ pseudo loss: {:>6.3f} ".format(self.epoch_loss),
                 "| average trj return: {:>6.3f} | average trj lenght: {:>6.3f}".format(
                     self.average_trjs_return, self.average_trjs_lenght),
                 end="\n", flush=True)
 
-            print("\n\t\t\t\t\t\tAverage return over the past {} epoch: {:>6.3f}".format(
+            print("\n                    Average return over the past {} epoch: {:>6.3f}".format(
                 self.print_metric_every, smoothed_return))
             if abs(smoothed_return) < abs(self.last_batch_return):
-                print("\t\t\t\t\t\t\t↳ is lowering ⬊", end="", flush=True)
+                print("                        ↳ is lowering ⬊", end="", flush=True)
             elif abs(smoothed_return) > abs(self.last_batch_return):
-                print("\t\t\t\t\t\t\t↳ is rising ⬈  ...  goooood :)", end="", flush=True)
+                print("                        ↳ is rising ⬈  ...  goooood :)", end="", flush=True)
 
             self.collected_experiment_stats['smoothed_average_peusdo_loss'].append(smoothed_batch_loss)
             self.collected_experiment_stats['smoothed_average_return'].append(smoothed_return)
@@ -194,7 +194,7 @@ def trajectory_training_stat(self, the_trajectory_return, timestep) -> None:
         :return:
         :rtype: None
         """
-        print("\r\t ↳ {:^3} :: Trajectory {:>4}  ".format(self.epoch, self.trj),
+        print("\r      ↳ {:^3} :: Trajectory {:>4}  ".format(self.epoch, self.trj),
               ">"*self.cycle_indexer.i, " "*self.cycle_indexer.j,
               "  got return {:>8.2f}   after  {:>4}  timesteps".format(
                   the_trajectory_return, timestep),
 
@@ -13,8 +13,33 @@ Directeur du programme de baccalauréat en génie logiciel de l'Université Lava
 Québec, QC, Canada,
 [[email protected]]([email protected])
 
+
+---
+
+### Dependencies:
+    'gym>=0.14.0'
+    'tensorflow>=1.14.0,<2.0',
+    'matplotlib>=3.1.0',
+    'numpy>=1.16.4',
+    'seaborn>=0.9.0',
+    'pytest',
+    
+### Install instruction:
+1) Create & activate a new virtuel environment (I recommand using [conda](https://www.anaconda.com/distribution/), ... it's a walk in the park)
+    ```bash
+    conda create --name myNewVirtualEnvironmentName python=3.7
+    conda activate myNewVirtualEnvironmentName
+    ```
+2) Clone the GitHub repository & install
+    ```bash
+    git clone [email protected]:RedLeader962/LectureDirigeDRLimplementation.git
+    cd deep-reinforcement-learning-gym
+    pip install -e .
+    ```
+3) Enjoy DRL
 ---
 
+
 ## [Basic policy gradient](https://github.com/RedLeader962/LectureDirigeDRLimplementation/tree/master/DRL-TP1-Policy-Gradient)
 Policy gradient is a on-policy method which seek to directly optimize the policy  by using sampled trajectories as weight. Those weights will then be used to indicate how good the policy performed. Based on that knowledge, the algorithm updates the parameters of his policy to make action leading to similar good trajectories more likely and similar bad trajectories less likely. In the case of Deep Reinforcement Learning, the policy parameter is a neural net. For this essay, I've studied and implemented the basic version of policy gradient also known as REINFORCE. I've also complemented my reading with the following ressources:
 
@@ -43,8 +68,10 @@ python REINFORCEtrainingloop.py
 tensorboard --logdir=DRL-TP1-Policy-Gradient/graph/
 ```
 
-To see [video example](https://github.com/RedLeader962/LectureDirigeDRLimplementation/tree/IMPLEMENT-predict_loop/DRLTP1PolicyGradient/video) 
+To see [video example](video/) 
+
 
+![Training run](video/training_run.png)
 
-![Training run](DRLTP1PolicyGradient/video/training_run.png)
+---
 
@@ -10,13 +10,26 @@
     version="0.0.1",
     author="Luc Coupal",
     author_email="[email protected]",
-    description="Directed rearing on Deep Reinforcement Learning",
+    description="Directed reading on Deep Reinforcement Learning",
     url="https://github.com/RedLeader962/LectureDirigeDRLimplementation",
     packages=setuptools.find_packages(),
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ],
+    install_requires=[
+            'gym[atari,box2d,classic_control]>=0.14.0',
+            'ipython',
+            'joblib',
+            'matplotlib>=3.1.0',
+            'numpy>=1.16.4',
+            'pandas',
+            'pytest',
+            'psutil',
+            'scipy',
+            'seaborn>=0.9.0',
+            'tensorflow>=1.14.0,<2.0',
+        ],
     python_requires='>=3.7',
 )