diff --git a/contents/10_A3C/A3C_RNN.py b/contents/10_A3C/A3C_RNN.py
index fbb72ed..9a42100 100644
--- a/contents/10_A3C/A3C_RNN.py
+++ b/contents/10_A3C/A3C_RNN.py
@@ -68,7 +68,7 @@ def __init__(self, scope, globalAC=None):
                 with tf.name_scope('a_loss'):
                     log_prob = normal_dist.log_prob(self.a_his)
                     exp_v = log_prob * td
-                    entropy = tf.stop_gradient(normal_dist.entropy())  # encourage exploration
+                    entropy = normal_dist.entropy()  # encourage exploration
                     self.exp_v = ENTROPY_BETA * entropy + exp_v
                     self.a_loss = tf.reduce_mean(-self.exp_v)
 
diff --git a/contents/10_A3C/A3C_continuous_action.py b/contents/10_A3C/A3C_continuous_action.py
index 60043d7..61b1b9f 100644
--- a/contents/10_A3C/A3C_continuous_action.py
+++ b/contents/10_A3C/A3C_continuous_action.py
@@ -68,7 +68,7 @@ def __init__(self, scope, globalAC=None):
                 with tf.name_scope('a_loss'):
                     log_prob = normal_dist.log_prob(self.a_his)
                     exp_v = log_prob * td
-                    entropy = tf.stop_gradient(normal_dist.entropy())  # encourage exploration
+                    entropy = normal_dist.entropy()  # encourage exploration
                     self.exp_v = ENTROPY_BETA * entropy + exp_v
                     self.a_loss = tf.reduce_mean(-self.exp_v)
 
diff --git a/contents/10_A3C/A3C_discrete_action.py b/contents/10_A3C/A3C_discrete_action.py
index 2ba7bce..489657f 100644
--- a/contents/10_A3C/A3C_discrete_action.py
+++ b/contents/10_A3C/A3C_discrete_action.py
@@ -61,8 +61,8 @@ def __init__(self, scope, globalAC=None):
                 with tf.name_scope('a_loss'):
                     log_prob = tf.reduce_sum(tf.log(self.a_prob) * tf.one_hot(self.a_his, N_A, dtype=tf.float32), axis=1, keep_dims=True)
                     exp_v = log_prob * td
-                    entropy = tf.stop_gradient(-tf.reduce_sum(self.a_prob * tf.log(self.a_prob + 1e-5),
-                                                              axis=1, keep_dims=True))  # encourage exploration
+                    entropy = -tf.reduce_sum(self.a_prob * tf.log(self.a_prob + 1e-5),
+                                             axis=1, keep_dims=True)  # encourage exploration
                     self.exp_v = ENTROPY_BETA * entropy + exp_v
                     self.a_loss = tf.reduce_mean(-self.exp_v)
 
diff --git a/contents/12_Proximal_Policy_Optimization/simply_PPO.py b/contents/12_Proximal_Policy_Optimization/simply_PPO.py
index 25464e9..bcb03e1 100644
--- a/contents/12_Proximal_Policy_Optimization/simply_PPO.py
+++ b/contents/12_Proximal_Policy_Optimization/simply_PPO.py
@@ -64,7 +64,7 @@ def __init__(self):
                 surr = ratio * self.tfadv
             if METHOD['name'] == 'kl_pen':
                 self.tflam = tf.placeholder(tf.float32, None, 'lambda')
-                kl = tf.stop_gradient(tf.distributions.kl_divergence(oldpi, pi))
+                kl = tf.distributions.kl_divergence(oldpi, pi)
                 self.kl_mean = tf.reduce_mean(kl)
                 self.aloss = -(tf.reduce_mean(surr - self.tflam * kl))
             else:   # clipping method, find this is better
diff --git a/contents/8_Actor_Critic_Advantage/AC_continue_Pendulum.py b/contents/8_Actor_Critic_Advantage/AC_continue_Pendulum.py
index 0de0ac7..c52c6d1 100644
--- a/contents/8_Actor_Critic_Advantage/AC_continue_Pendulum.py
+++ b/contents/8_Actor_Critic_Advantage/AC_continue_Pendulum.py
@@ -65,7 +65,7 @@ def __init__(self, sess, n_features, action_bound, lr=0.0001):
             log_prob = self.normal_dist.log_prob(self.a)  # loss without advantage
             self.exp_v = log_prob * self.td_error  # advantage (TD_error) guided loss
             # Add cross entropy cost to encourage exploration
-            self.exp_v += tf.stop_gradient(0.1*self.normal_dist.entropy())
+            self.exp_v += 0.01*self.normal_dist.entropy()
 
         with tf.name_scope('train'):
             self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v, global_step)    # min(v) = max(-v)