-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
197 lines (153 loc) · 7.73 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
####
# different variants of LeNet v5 & friends
import torch
import torch.nn as nn
import torch.nn.functional as F
###
# note: expects an input_size of 32x32 for the image!!
# if you use 28x28 for the image you MUST add padding to conv1
# to make sure the output shape is 28x28!!
class LeNet5(nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
# Layer 1: Convolutional layer with 6 filters of size 5x5
self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2) # Subsampling (average pooling)
# Layer 2: Convolutional layer with 16 filters of size 5x5
self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2) # Subsampling (average pooling)
# Fully connected layers
self.fc1 = nn.Linear(16 * 5 * 5, 120) # Flattened output from previous layer
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10) # Output layer with 10 classes
def forward(self, x):
# Forward pass through the network
x = self.pool1(F.tanh(self.conv1(x))) # Conv1 -> Tanh -> Pooling
x = self.pool2(F.tanh(self.conv2(x))) # Conv2 -> Tanh -> Pooling
x = x.view(-1, 16 * 5 * 5) # Flatten the output for fully connected layers
x = F.tanh(self.fc1(x)) # FC1 -> Tanh
x = F.tanh(self.fc2(x)) # FC2 -> Tanh
x = self.fc3(x) # FC3 (Output layer)
return x
###############
# alt. version with nn.Sequentail
# note - input x MUST always be batch e.g (1,1,32,32) NOT (1,32,32)
# nn.Flatten will NOT work on single inputs
# resulting in
# RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x25 and 400x120)
class LeNet5Seq(nn.Module):
def __init__(self):
super(LeNet5Seq, self).__init__()
self.layers = nn.Sequential(
# Layer 1: Convolutional layer with 6 filters of size 5x5
nn.Conv2d(1, 6, kernel_size=5), nn.Tanh(),
nn.AvgPool2d(kernel_size=2, stride=2), # Subsampling (avg pooling)
# Layer 2: Convolutional layer with 16 filters of size 5x5
nn.Conv2d(6, 16, kernel_size=5), nn.Tanh(),
nn.AvgPool2d(kernel_size=2, stride=2), # Subsampling (avg pooling)
nn.Flatten(),
# Fully connected layers
nn.Linear(16 * 5 * 5, 120), nn.Tanh(), # Flattened output from previous layer
nn.Linear(120, 84), nn.Tanh(),
nn.Linear(84, 10) # Output layer with 10 classes
)
def forward(self, x):
# Forward pass through the network
return self.layers( x )
###
# what's different?
# 1) change activation from tanh to relu
# 2) change avg pooling to max pooling
class LeNet5v2(nn.Module):
def __init__(self):
super(LeNet5v2, self).__init__()
self.conv1 = nn.Conv2d(1, 6, kernel_size=5) # 1 input channel (grayscale), 6 output channels
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # Subsampling (average pooling)
self.conv2 = nn.Conv2d(6, 16, kernel_size=5) # 6 input channels, 16 output channels
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # Subsampling (average pooling)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84) # Fully connected layer
self.fc3 = nn.Linear(84, 10) # 10 output classes (for example, MNIST digits)
def forward(self, x):
# Apply layers with activation functions
x = self.pool1(F.relu(self.conv1(x))) # Convolution + ReLU
x = self.pool2(F.relu(self.conv2(x))) # Convolution + ReLU
x = x.view(-1, 16 * 5 * 5) # Flatten the output for fully connected layers
x = F.relu(self.fc1(x)) # Fully connected + ReLU
x = F.relu(self.fc2(x))
x = self.fc3(x) # Output layer (logits)
return x
###
# what's different?
# - add a third conv2d layer WITHOUT pooling
# gets 16 channels with 5x5 input and
# outputs 120 filters with kernel 5x5 => (120,1,1)
# why? let's ask / prompt the a.i. for an answer / reason
class LeNet5v2b(nn.Module):
def __init__(self):
super(LeNet5v2b, self).__init__()
# Define the layers
self.conv1 = nn.Conv2d(1, 6, kernel_size=5) # 1 input channel (grayscale), 6 output channels
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # Subsampling (average pooling)
self.conv2 = nn.Conv2d(6, 16, kernel_size=5) # 6 input channels, 16 output channels
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # Subsampling (average pooling)
self.conv3 = nn.Conv2d(16, 120, kernel_size=5) # 16 input channels, 120 output channels
self.fc1 = nn.Linear(120, 84) # Fully connected layer
self.fc2 = nn.Linear(84, 10) # 10 output classes (for example, MNIST digits)
def forward(self, x):
# Apply layers with activation functions
x = self.pool1(F.relu(self.conv1(x))) # Convolution + ReLU
x = self.pool2(F.relu(self.conv2(x))) # Convolution + ReLU
x = F.relu(self.conv3(x)) # Convolution + ReLU
x = x.view(-1, 120) # Flatten the tensor
x = F.relu(self.fc1(x)) # Fully connected + ReLU
x = self.fc2(x) # Output layer (logits)
return x
###############
# alt. version with padding 2 in conv1 for 28x28 input_size
#
# note - input x MUST be batch e.g (1,1,28,28) NOT (1,28,28)
# nn.Flatten will NOT work on single inputs
# resulting in
# RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x25 and 400x120)
class LeNet5_28x28(nn.Module):
def __init__(self):
super(LeNet5_28x28, self).__init__()
self.layers = nn.Sequential(
# Layer 1: Convolutional layer with 6 filters of size 5x5
nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # Subsampling (max pooling)
# Layer 2: Convolutional layer with 16 filters of size 5x5
nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # Subsampling (max pooling)
nn.Flatten(),
# Fully connected layers
nn.Linear(16 * 5 * 5, 120), nn.ReLU(), # Flattened output from previous layer
nn.Linear(120, 84), nn.ReLU(),
nn.Linear(84, 10) # Output layer with 10 classes
)
def forward(self, x):
# Forward pass through the network
return self.layers( x )
if __name__ == '__main__':
# Print the model summaries
from torchsummary import summary
def print_model( model, input_size ):
print( "="*20,
f"\n= {model.__class__.__name__} input_size={input_size}" )
print()
print(model)
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:", num_params)
total_bytes = num_params * 4 # assume float32 (4 bytes)
print( f"about {total_bytes / (1028 *1028) :.2f} MBs, {total_bytes / 1028 :.2f} KBs" )
# Print model summary for a batch of 1 grayscale image (1x32x32 size)
## or use channel e.g. (32, 32, 1) - why? why not?
print( "\nsummary:")
summary(model, input_size)
print_model( LeNet5(), input_size=(1,32,32) )
print_model( LeNet5Seq(), input_size=(1,32,32) )
print_model( LeNet5v2(), input_size=(1,32,32) )
print_model( LeNet5v2b(), input_size=(1,32,32) )
print_model( LeNet5_28x28(), input_size=(1,28,28 ))
print("bye")