-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlayers.py
74 lines (54 loc) · 2.19 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from typing import Callable, Tuple
from mathtypes import Vector, Matrix, Tensor
import numpy as np
class Layer:
def forward(input):
raise NotImplementedError
def backward(input):
raise NotImplementedError
class LinearLayer(Layer):
def __init__(self, output_size: int, input_size: int, weight_init: Callable):
self.W = weight_init((output_size, input_size))
self.b = np.zeros(output_size)
def forward(self, input: Vector) -> Vector:
self.input = input
return self.W @ input + self.b
def backward(self, grad: Vector) -> Vector:
self.b_grad = grad
self.W_grad = grad.reshape((grad.shape[0], 1)) @ self.input.reshape((1, self.input.shape[0]))
return self.W.T @ grad
def __repr__(self):
return f"<LinearLayer {self.W.shape}>"
def has_params(self):
return True
def gradient_step(self, learning_rate: int):
self.W -= learning_rate * self.W_grad
self.b -= learning_rate * self.b_grad
class ConvLayer(Layer):
def __init__(filter_count: int, filter_height: int, filter_width: int, weight_init: Callable[[int, int, int], Tensor]):
self.filters = weight_init((filter_count, filter_height, filter_width))
def forward(input: Tensor) -> Tensor:
pass
class ActivationLayer(Layer):
def forward(self, input: Tensor) -> Tensor:
self.input = input
return self.f(input)
def backward(self, grad: Tensor) -> Tensor:
return grad * self.derivative_f(self.input)
def has_params(self):
return False
class SigmoidLayer(ActivationLayer):
def __repr__(self):
return "<SigmoidLayer>"
def f(self, input: Tensor) -> Tensor:
return 1/(1 + np.exp(-input))
def derivative_f(self, input: Tensor) -> Tensor:
return self.f(input) * (1 - self.f(input))
def random(shape: Tuple[int, ...]) -> Tensor:
return np.random.randn(*shape)
def he(shape: Tuple[int, ...]) -> Tensor:
return np.random.randn(*shape) * np.sqrt(2 / shape[1])
def xavier(shape: Tuple[int, ...]) -> Tensor:
return np.random.randn(*shape) * np.sqrt(1 / shape[1])
def zero(shape: Tuple[int, ...]) -> Tensor:
return np.zeros(shape)