add stop gradient

starry-sky6688 · Jan 18, 2018 · e6e59bc · e6e59bc
1 parent 639902b
commit e6e59bc
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 6 deletions.
diff --git a/contents/10_A3C/A3C_RNN.py b/contents/10_A3C/A3C_RNN.py
@@ -67,7 +67,7 @@ def __init__(self, scope, globalAC=None):
 
                 with tf.name_scope('a_loss'):
                     log_prob = normal_dist.log_prob(self.a_his)
-                    exp_v = log_prob * td
+                    exp_v = log_prob * tf.stop_gradient(td)
                     entropy = normal_dist.entropy()  # encourage exploration
                     self.exp_v = ENTROPY_BETA * entropy + exp_v
                     self.a_loss = tf.reduce_mean(-self.exp_v)

diff --git a/contents/10_A3C/A3C_continuous_action.py b/contents/10_A3C/A3C_continuous_action.py
@@ -67,7 +67,7 @@ def __init__(self, scope, globalAC=None):
 
                 with tf.name_scope('a_loss'):
                     log_prob = normal_dist.log_prob(self.a_his)
-                    exp_v = log_prob * td
+                    exp_v = log_prob * tf.stop_gradient(td)
                     entropy = normal_dist.entropy()  # encourage exploration
                     self.exp_v = ENTROPY_BETA * entropy + exp_v
                     self.a_loss = tf.reduce_mean(-self.exp_v)
@@ -124,8 +124,8 @@ def work(self):
             s = self.env.reset()
             ep_r = 0
             for ep_t in range(MAX_EP_STEP):
-                if self.name == 'W_0':
-                    self.env.render()
+                # if self.name == 'W_0':
+                #     self.env.render()
                 a = self.AC.choose_action(s)
                 s_, r, done, info = self.env.step(a)
                 done = True if ep_t == MAX_EP_STEP - 1 else False

diff --git a/contents/10_A3C/A3C_discrete_action.py b/contents/10_A3C/A3C_discrete_action.py
@@ -60,7 +60,7 @@ def __init__(self, scope, globalAC=None):
 
                 with tf.name_scope('a_loss'):
                     log_prob = tf.reduce_sum(tf.log(self.a_prob) * tf.one_hot(self.a_his, N_A, dtype=tf.float32), axis=1, keep_dims=True)
-                    exp_v = log_prob * td
+                    exp_v = log_prob * tf.stop_gradient(td)
                     entropy = -tf.reduce_sum(self.a_prob * tf.log(self.a_prob + 1e-5),
                                              axis=1, keep_dims=True)  # encourage exploration
                     self.exp_v = ENTROPY_BETA * entropy + exp_v

diff --git a/contents/10_A3C/A3C_distributed_tf.py b/contents/10_A3C/A3C_distributed_tf.py
@@ -49,7 +49,7 @@ def __init__(self, scope, opt_a=None, opt_c=None, global_net=None):
                     log_prob = tf.reduce_sum(
                         tf.log(self.a_prob) * tf.one_hot(self.a_his, N_A, dtype=tf.float32),
                         axis=1, keep_dims=True)
-                    exp_v = log_prob * td
+                    exp_v = log_prob * tf.stop_gradient(td)
                     entropy = -tf.reduce_sum(self.a_prob * tf.log(self.a_prob + 1e-5),
                                              axis=1, keep_dims=True)  # encourage exploration
                     self.exp_v = ENTROPY_BETA * entropy + exp_v