-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlayers.py
147 lines (118 loc) · 4.02 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from scipy.special import expit
import numpy as np
from scipy.special import logsumexp
class ReLU:
"""ReLU layer simply applies elementwise rectified linear unit to all inputs"""
def __init__(self):
self.params = [] # ReLU has no parameters
def forward(self, input):
"""Input shape: (batch, num_units)"""
self.mask = input > 0
return np.where(self.mask, input, 0)
def backward(self, grad_output):
"""
Compute gradient of loss w.r.t. ReLU input
grad_output shape: (batch, num_units)
output 1 shape: (batch, num_units)
output 2: []
"""
return self.mask * grad_output, []
def __repr__(self):
return 'Relu()'
class Tanh:
"""
tanh(y) = (e^y - e^(-y)) / (e^y + e^(-y))
"""
def __init__(self):
self.params = [] # Tanh has no parameters
def forward(self, input):
"""
Apply elementwise Tanh to [batch, num_units] matrix
"""
self.input = input.copy()
return np.tanh(input)
def backward(self, grad_output):
"""
Compute gradient of loss w.r.t. Tanh input
grad_output shape: [batch, num_units]
output 1 shape: [batch, num_units]
output 2: []
"""
return 4 * expit(2 * self.input) ** 2 * (1 - expit(2 * self.input)) * grad_output, []
def __repr__(self):
return 'Tanh()'
class Sigmoid:
"""
sigmoid(y) = 1 / (1 + e^(-y))
"""
def __init__(self):
self.params = [] # Sigmoid has no parameters
def forward(self, input):
"""
Apply elementwise Sigmoid to [batch, num_units] matrix
"""
self.input = input.copy()
return expit(input)
def backward(self, grad_output):
"""
Compute gradient of loss w.r.t. Sigmoid input
grad_output shape: [batch, num_units]
output 1 shape: [batch, num_units]
output 2: []
"""
return expit(self.input) * (1 - expit(self.input)) * grad_output, []
def __repr__(self):
return 'Sigmoid()'
class Dense:
def __init__(self, input_units, output_units):
"""
A dense layer is a layer which performs a learned affine transformation:
f(x) = W x + b
"""
# initialize weights with small random numbers from normal distribution
self.weights = np.random.randn(output_units, input_units) * 0.01
self.biases = np.zeros(output_units)
self.params = [self.weights, self.biases]
def forward(self, input):
"""
Perform an affine transformation:
f(x) = W x + b
input shape: (batch, input_units)
output shape: (batch, output units)
"""
self.input = input.copy()
return input @ self.weights.T + self.biases[None, :]
def backward(self, grad_output):
"""
Compute gradients
grad_output shape: (batch, output_units)
output shapes: (batch, input_units), (num_params,)
"""
grad_input = grad_output @ self.weights
grad_weights = grad_output.T @ self.input
grad_biases = grad_output.sum(axis=0)
return grad_input, [np.r_[grad_weights.ravel(), grad_biases]]
def __repr__(self):
return f'Dense({self.weights.shape[1]}, {self.weights.shape[0]})'
class LogSoftmax:
def __init__(self, n_in):
self.params = []
self.n_in = n_in
def forward(self, input):
"""
Applies softmax to each row and then applies component-wise log
Input shape: (batch, num_units)
Output shape: (batch, num_units)
"""
self.output = input - logsumexp(input, axis=1)[:, None]
return self.output
def backward(self, grad_output):
"""
Input shape: (batch, num_units)
Output shape: (batch, num_units), []
"""
grad_input = grad_output - \
grad_output.sum(axis=1, keepdims=True) * np.exp(self.output)
return grad_input, []
def __repr__(self):
return f'LogSoftmax({self.n_in})'