Skip to content

Commit 017072e

Browse files
committedApr 9, 2017
take mean of a_grads
1 parent a4877b8 commit 017072e

File tree

1 file changed

+2
-3
lines changed
  • Reinforcement_learning_TUT/9_Deep_Deterministic_Policy_Gradient_DDPG

1 file changed

+2
-3
lines changed
 

‎Reinforcement_learning_TUT/9_Deep_Deterministic_Policy_Gradient_DDPG/DDPG.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
MAX_EPISODES = 70
2323
MAX_EP_STEPS = 400
24-
LR_A = 0.001 # learning rate for actor
24+
LR_A = 0.01 # learning rate for actor
2525
LR_C = 0.01 # learning rate for critic
2626
GAMMA = 0.9 # reward discount
2727
TAU = 0.01 # Soft update for target param, but this is computationally expansive
@@ -90,11 +90,10 @@ def add_grad_to_graph(self, a_grads):
9090
# xs = policy's parameters;
9191
# self.a_grads = the gradients of the policy to get more Q
9292
# tf.gradients will calculate dys/dxs with a initial gradients for ys, so this is dq/da * da/dparams
93-
a_grads = tf.div(a_grads, tf.cast(tf.shape(a_grads)[0], tf.float32), name='take_mean')
9493
self.policy_grads = tf.gradients(ys=self.a, xs=self.e_params, grad_ys=a_grads)
9594

9695
with tf.variable_scope('A_train'):
97-
opt = tf.train.AdamOptimizer(-self.lr) # (- learning rate) for ascent policy
96+
opt = tf.train.AdamOptimizer(-self.lr / BATCH_SIZE) # (- learning rate) for ascent policy, div to take mean
9897
self.train_op = opt.apply_gradients(zip(self.policy_grads, self.e_params))
9998

10099

0 commit comments

Comments
 (0)
Please sign in to comment.