File tree 1 file changed +2
-3
lines changed
Reinforcement_learning_TUT/9_Deep_Deterministic_Policy_Gradient_DDPG
1 file changed +2
-3
lines changed Original file line number Diff line number Diff line change 21
21
22
22
MAX_EPISODES = 70
23
23
MAX_EP_STEPS = 400
24
- LR_A = 0.001 # learning rate for actor
24
+ LR_A = 0.01 # learning rate for actor
25
25
LR_C = 0.01 # learning rate for critic
26
26
GAMMA = 0.9 # reward discount
27
27
TAU = 0.01 # Soft update for target param, but this is computationally expansive
@@ -90,11 +90,10 @@ def add_grad_to_graph(self, a_grads):
90
90
# xs = policy's parameters;
91
91
# self.a_grads = the gradients of the policy to get more Q
92
92
# tf.gradients will calculate dys/dxs with a initial gradients for ys, so this is dq/da * da/dparams
93
- a_grads = tf .div (a_grads , tf .cast (tf .shape (a_grads )[0 ], tf .float32 ), name = 'take_mean' )
94
93
self .policy_grads = tf .gradients (ys = self .a , xs = self .e_params , grad_ys = a_grads )
95
94
96
95
with tf .variable_scope ('A_train' ):
97
- opt = tf .train .AdamOptimizer (- self .lr ) # (- learning rate) for ascent policy
96
+ opt = tf .train .AdamOptimizer (- self .lr / BATCH_SIZE ) # (- learning rate) for ascent policy, div to take mean
98
97
self .train_op = opt .apply_gradients (zip (self .policy_grads , self .e_params ))
99
98
100
99
You can’t perform that action at this time.
0 commit comments