-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyversion.py
71 lines (59 loc) · 2.51 KB
/
myversion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#Importing the libraries
import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as f
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
#Creating architecture of NN
#Implimenting the init function
class Network(nn.Module):
#3 actions - right, left, forward
#input size is 5 - 2 for orientation, one for forward input, left, and right. If want to get a 360 deg view, add another.
def __init__(self, input_size, nb_action):
super(Network, self).__init__()
self.input_size = input_size
self.np_action = nb_action
self.fc1 = nn.Linear(self, input_size, 30)
self.fc2 = nn.Linear(self, 30, nb_action)
#Here, I've taken only 1 hidden layer with 30 neurons, make more fully connected layers if you want to with any number of neurons.
def forward(self, state):
#x represents hidden neurons
x = f.relu(self.fc1(state))
q_values = self.fc2(x)
return q_values
#Implementing Experience replay
class ReplayMemory(object):
def __init__(self, capacity):
self.capacity = capacity
self.memory = []
#Always contains a fixed number => we use a push function to do that
#Memory can be 100 or 1000 or 100000 or whatever, we use 100 cuz computationally easier
def push(self, event):
self.memory.append(event)
#Event has 4 parts - st, s(t+1), a(t) and R(t)
if len(self.memory) > self.capacity:
del self.memory[0]
#getting random samples from the previous memory will greatly improve the model
def sample(self, batch_size):
samples = zip(*random.sample(self.memory , batch_size))
return map(lambda x: Variable(torch.cat(x, 0)), samples)
#Implementing Deep Q learning
class Dqn():
def __init__(self, input_size, nb_action, gamma):
self.gamma = gamma
self.reward_window = []
self.model = Network(input_size, nb_action)
self.memory = ReplayMemory(100000)
self.optimiser = optim.Adam(self.model.parameters(), lr = 0.001)
self.last_state = torch.Tensor(input_size).unsqueeze(0)
self.last_action = 0
self.last_reward = 0
#Funtion which selects the right action at each time
def select_action(self, state):
probs = f.softmax(self.model(Variable(state , volatile = True))*7) #7 is the temperature paramet
action = probs.multinomial()
return action.data[0,0]