-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathReplayBuffer.py
45 lines (36 loc) · 1.39 KB
/
ReplayBuffer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from collections import deque
import random
class ReplayBuffer(object):
def __init__(self, buffer_size):
self.buffer_size = buffer_size
self.num_experiences = 0
self.buffer = deque()
self.mean_reward = 0.0
def getBatch(self, batch_size):
# Randomly sample batch_size examples
if self.num_experiences < batch_size:
return random.sample(self.buffer, self.num_experiences)
else:
return random.sample(self.buffer, batch_size)
def size(self):
return self.buffer_size
def add(self, state, action, reward, new_state, done):
experience = (state, action, reward, new_state, done)
if self.num_experiences < self.buffer_size:
self.buffer.append(experience)
self.num_experiences += 1
else:
self.buffer.popleft()
self.buffer.append(experience)
self.mean_reward = ( (self.mean_reward * (self.num_experiences-1)) + reward) / float( self.num_experiences )
def count(self):
# if buffer is full, return buffer size
# otherwise, return experience counter
return self.num_experiences
def getMeanReward(self):
# if buffer is full, return buffer size
# otherwise, return experience counter
return self.mean_reward
def erase(self):
self.buffer = deque()
self.num_experiences = 0