Skip to content

Commit

Permalink
add stop gradient
Browse files Browse the repository at this point in the history
  • Loading branch information
MorvanZhou committed Jan 18, 2018
1 parent 639902b commit e6e59bc
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion contents/10_A3C/A3C_RNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self, scope, globalAC=None):

with tf.name_scope('a_loss'):
log_prob = normal_dist.log_prob(self.a_his)
exp_v = log_prob * td
exp_v = log_prob * tf.stop_gradient(td)
entropy = normal_dist.entropy() # encourage exploration
self.exp_v = ENTROPY_BETA * entropy + exp_v
self.a_loss = tf.reduce_mean(-self.exp_v)
Expand Down
6 changes: 3 additions & 3 deletions contents/10_A3C/A3C_continuous_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self, scope, globalAC=None):

with tf.name_scope('a_loss'):
log_prob = normal_dist.log_prob(self.a_his)
exp_v = log_prob * td
exp_v = log_prob * tf.stop_gradient(td)
entropy = normal_dist.entropy() # encourage exploration
self.exp_v = ENTROPY_BETA * entropy + exp_v
self.a_loss = tf.reduce_mean(-self.exp_v)
Expand Down Expand Up @@ -124,8 +124,8 @@ def work(self):
s = self.env.reset()
ep_r = 0
for ep_t in range(MAX_EP_STEP):
if self.name == 'W_0':
self.env.render()
# if self.name == 'W_0':
# self.env.render()
a = self.AC.choose_action(s)
s_, r, done, info = self.env.step(a)
done = True if ep_t == MAX_EP_STEP - 1 else False
Expand Down
2 changes: 1 addition & 1 deletion contents/10_A3C/A3C_discrete_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(self, scope, globalAC=None):

with tf.name_scope('a_loss'):
log_prob = tf.reduce_sum(tf.log(self.a_prob) * tf.one_hot(self.a_his, N_A, dtype=tf.float32), axis=1, keep_dims=True)
exp_v = log_prob * td
exp_v = log_prob * tf.stop_gradient(td)
entropy = -tf.reduce_sum(self.a_prob * tf.log(self.a_prob + 1e-5),
axis=1, keep_dims=True) # encourage exploration
self.exp_v = ENTROPY_BETA * entropy + exp_v
Expand Down
2 changes: 1 addition & 1 deletion contents/10_A3C/A3C_distributed_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self, scope, opt_a=None, opt_c=None, global_net=None):
log_prob = tf.reduce_sum(
tf.log(self.a_prob) * tf.one_hot(self.a_his, N_A, dtype=tf.float32),
axis=1, keep_dims=True)
exp_v = log_prob * td
exp_v = log_prob * tf.stop_gradient(td)
entropy = -tf.reduce_sum(self.a_prob * tf.log(self.a_prob + 1e-5),
axis=1, keep_dims=True) # encourage exploration
self.exp_v = ENTROPY_BETA * entropy + exp_v
Expand Down

0 comments on commit e6e59bc

Please sign in to comment.