-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathSillyAgent.py
37 lines (30 loc) · 909 Bytes
/
SillyAgent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
class SillyAgent():
def __init__(self, num_state, num_actions, action_space):
"""
Contructor
Args:
num_state: The number of states
num_actions: The number of actions
action_space: To call the random action
"""
self.num_state = num_state
self.num_actions = num_actions
self.action_space = action_space
def update(self, state, state2, reward, action, action2):
"""
Update the action value function using the Q-Learning update.
Q(S, A) = Q(S, A) + alpha(reward + (gamma * Q(S_, A_) - Q(S, A))
Args:
prev_state: The previous state
next_state: The next state
reward: The reward for taking the respective action
prev_action: The previous action
next_action: The next action
Returns:
None
"""
pass
def choose_action(self, state):
action = np.random.randint(0, self.num_actions-1)
return action