Skip to content

Commit a27b63f

Browse files
committed
mod to structure, md file & consol printer
1 parent 1ac1ae1 commit a27b63f

22 files changed

+55
-1614
lines changed

DRLTP1PolicyGradient/REINFORCEplayingloop.py

+6-11
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,25 @@
11
# coding=utf-8
22

3-
# coding=utf-8
43
from __future__ import absolute_import, division, print_function, unicode_literals
54

6-
75
# region ::Import statement ...
86
import tensorflow as tf
97
tf_cv1 = tf.compat.v1 # shortcut
108

119
from blocAndTools import buildingbloc as bloc
1210
from blocAndTools.buildingbloc import ExperimentSpec, GymPlayground
1311
from REINFORCEbrain import REINFORCE_policy
14-
15-
# import tensorflow_weak_warning_supressor as no_cpu_compile_warn
16-
# no_cpu_compile_warn.execute()
17-
1812
from blocAndTools.rl_vocabulary import rl_name
1913
vocab = rl_name()
2014
# endregion
2115

2216

2317
def play_REINFORCE_agent_discrete(env='CartPole-v0'):
24-
exp_spec = ExperimentSpec()
18+
"""
19+
Execute playing loop of a previously trained REINFORCE agent in the 'CartPole-v0' environment
2520
21+
"""
22+
exp_spec = ExperimentSpec()
2623

2724
cartpole_param_dict_2 = {
2825
'prefered_environment': 'CartPole-v0',
@@ -47,7 +44,7 @@ def play_REINFORCE_agent_discrete(env='CartPole-v0'):
4744

4845
# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
4946
# * *
50-
# * Build computation graph & data collector *
47+
# * Build computation graph *
5148
# * *
5249
# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
5350

@@ -60,9 +57,7 @@ def play_REINFORCE_agent_discrete(env='CartPole-v0'):
6057
reinforce_policy = REINFORCE_policy(observation_ph, action_ph, Q_values_ph, exp_spec, playground)
6158
(policy_action_sampler, theta_mlp, pseudo_loss) = reinforce_policy
6259

63-
64-
65-
saver = tf.train.Saver()
60+
saver = tf_cv1.train.Saver()
6661

6762
with tf_cv1.Session() as sess:
6863
saver.restore(sess, 'checkpoint_directory/REINFORCE_agent-49')

DRLTP1PolicyGradient/blocAndTools/visualisationtools.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -148,20 +148,20 @@ def epoch_training_stat(self, epoch_loss, epoch_average_trjs_return, epoch_avera
148148
smoothed_return = self.return_smoothing_buffer / self.print_metric_every
149149
smoothed_lenght = self.lenght_smoothing_buffer / self.print_metric_every
150150
print(
151-
"\r\t ↳ {:^3}".format(self.epoch),
151+
"\r ↳ {:^3}".format(self.epoch),
152152
":: Collected {} trajectories for a total of {} timestep.".format(
153153
self.number_of_trj_collected, self.total_timestep_collected),
154-
"\n\t\t↳ pseudo loss: {:>6.3f} ".format(self.epoch_loss),
154+
"\n ↳ pseudo loss: {:>6.3f} ".format(self.epoch_loss),
155155
"| average trj return: {:>6.3f} | average trj lenght: {:>6.3f}".format(
156156
self.average_trjs_return, self.average_trjs_lenght),
157157
end="\n", flush=True)
158158

159-
print("\n\t\t\t\t\t\tAverage return over the past {} epoch: {:>6.3f}".format(
159+
print("\n Average return over the past {} epoch: {:>6.3f}".format(
160160
self.print_metric_every, smoothed_return))
161161
if abs(smoothed_return) < abs(self.last_batch_return):
162-
print("\t\t\t\t\t\t\t↳ is lowering ⬊", end="", flush=True)
162+
print(" ↳ is lowering ⬊", end="", flush=True)
163163
elif abs(smoothed_return) > abs(self.last_batch_return):
164-
print("\t\t\t\t\t\t\t↳ is rising ⬈ ... goooood :)", end="", flush=True)
164+
print(" ↳ is rising ⬈ ... goooood :)", end="", flush=True)
165165

166166
self.collected_experiment_stats['smoothed_average_peusdo_loss'].append(smoothed_batch_loss)
167167
self.collected_experiment_stats['smoothed_average_return'].append(smoothed_return)
@@ -194,7 +194,7 @@ def trajectory_training_stat(self, the_trajectory_return, timestep) -> None:
194194
:return:
195195
:rtype: None
196196
"""
197-
print("\r\t ↳ {:^3} :: Trajectory {:>4} ".format(self.epoch, self.trj),
197+
print("\r ↳ {:^3} :: Trajectory {:>4} ".format(self.epoch, self.trj),
198198
">"*self.cycle_indexer.i, " "*self.cycle_indexer.j,
199199
" got return {:>8.2f} after {:>4} timesteps".format(
200200
the_trajectory_return, timestep),

README.md

+29-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,33 @@ Directeur du programme de baccalauréat en génie logiciel de l'Université Lava
1313
Québec, QC, Canada,
1414
1515

16+
17+
---
18+
19+
### Dependencies:
20+
'gym>=0.14.0'
21+
'tensorflow>=1.14.0,<2.0',
22+
'matplotlib>=3.1.0',
23+
'numpy>=1.16.4',
24+
'seaborn>=0.9.0',
25+
'pytest',
26+
27+
### Install instruction:
28+
1) Create & activate a new virtuel environment (I recommand using [conda](https://www.anaconda.com/distribution/), ... it's a walk in the park)
29+
```bash
30+
conda create --name myNewVirtualEnvironmentName python=3.7
31+
conda activate myNewVirtualEnvironmentName
32+
```
33+
2) Clone the GitHub repository & install
34+
```bash
35+
git clone [email protected]:RedLeader962/LectureDirigeDRLimplementation.git
36+
cd deep-reinforcement-learning-gym
37+
pip install -e .
38+
```
39+
3) Enjoy DRL
1640
---
1741

42+
1843
## [Basic policy gradient](https://github.com/RedLeader962/LectureDirigeDRLimplementation/tree/master/DRL-TP1-Policy-Gradient)
1944
Policy gradient is a on-policy method which seek to directly optimize the policy by using sampled trajectories as weight. Those weights will then be used to indicate how good the policy performed. Based on that knowledge, the algorithm updates the parameters of his policy to make action leading to similar good trajectories more likely and similar bad trajectories less likely. In the case of Deep Reinforcement Learning, the policy parameter is a neural net. For this essay, I've studied and implemented the basic version of policy gradient also known as REINFORCE. I've also complemented my reading with the following ressources:
2045

@@ -43,8 +68,10 @@ python REINFORCEtrainingloop.py
4368
tensorboard --logdir=DRL-TP1-Policy-Gradient/graph/
4469
```
4570
46-
To see [video example](https://github.com/RedLeader962/LectureDirigeDRLimplementation/tree/IMPLEMENT-predict_loop/DRLTP1PolicyGradient/video)
71+
To see [video example](video/)
72+
4773
74+
![Training run](video/training_run.png)
4875
49-
![Training run](DRLTP1PolicyGradient/video/training_run.png)
76+
---
5077

requirements.txt

-5
This file was deleted.

setup.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,26 @@
1010
version="0.0.1",
1111
author="Luc Coupal",
1212
author_email="[email protected]",
13-
description="Directed rearing on Deep Reinforcement Learning",
13+
description="Directed reading on Deep Reinforcement Learning",
1414
url="https://github.com/RedLeader962/LectureDirigeDRLimplementation",
1515
packages=setuptools.find_packages(),
1616
classifiers=[
1717
"Programming Language :: Python :: 3",
1818
"License :: OSI Approved :: MIT License",
1919
"Operating System :: OS Independent",
2020
],
21+
install_requires=[
22+
'gym[atari,box2d,classic_control]>=0.14.0',
23+
'ipython',
24+
'joblib',
25+
'matplotlib>=3.1.0',
26+
'numpy>=1.16.4',
27+
'pandas',
28+
'pytest',
29+
'psutil',
30+
'scipy',
31+
'seaborn>=0.9.0',
32+
'tensorflow>=1.14.0,<2.0',
33+
],
2134
python_requires='>=3.7',
2235
)

tests/__init__.py

Whitespace-only changes.

tests/test_exploration/__init__.py

-1
This file was deleted.

tests/test_exploration/test_python_myIncrement.py

-16
This file was deleted.

tests/test_integration/__init__.py

-1
This file was deleted.

tests/test_integration/test_intergrationreinforce.py

-115
This file was deleted.

tests/test_unit/__init__.py

-1
This file was deleted.

0 commit comments

Comments
 (0)