take mean of a_grads

MorvanZhou · MorvanZhou · commit 017072ea1d50 · 2017-04-09T21:50:44.000+10:00
diff --git a/Reinforcement_learning_TUT/9_Deep_Deterministic_Policy_Gradient_DDPG/DDPG.py b/Reinforcement_learning_TUT/9_Deep_Deterministic_Policy_Gradient_DDPG/DDPG.py
@@ -21,7 +21,7 @@
 
 MAX_EPISODES = 70
 MAX_EP_STEPS = 400
-LR_A = 0.001  # learning rate for actor
+LR_A = 0.01  # learning rate for actor
 LR_C = 0.01  # learning rate for critic
 GAMMA = 0.9  # reward discount
 TAU = 0.01  # Soft update for target param, but this is computationally expansive
@@ -90,11 +90,10 @@ def add_grad_to_graph(self, a_grads):
             # xs = policy's parameters;
             # self.a_grads = the gradients of the policy to get more Q
             # tf.gradients will calculate dys/dxs with a initial gradients for ys, so this is dq/da * da/dparams
-            a_grads = tf.div(a_grads, tf.cast(tf.shape(a_grads)[0], tf.float32), name='take_mean')
             self.policy_grads = tf.gradients(ys=self.a, xs=self.e_params, grad_ys=a_grads)
 
         with tf.variable_scope('A_train'):
-            opt = tf.train.AdamOptimizer(-self.lr)  # (- learning rate) for ascent policy
+            opt = tf.train.AdamOptimizer(-self.lr / BATCH_SIZE)  # (- learning rate) for ascent policy, div to take mean
             self.train_op = opt.apply_gradients(zip(self.policy_grads, self.e_params))