Skip to content
This repository was archived by the owner on Sep 13, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions build_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# %%
# abstracts away the dataset loading and unloading
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import AutoAugment, AutoAugmentPolicy

CLASS_REPR = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def collate_fn(data):
"""pass the collation function to the Dataloader to batch images
into a single tensor"""
images = [d[0] for d in data]
labels = [d[1] for d in data]
return torch.stack(images, dim=0), torch.tensor(labels)

def get_dataset(batch_size, augment=True):
# some dataset transforms, to introduce some invariances to the model
# such as scale invariance, rotation invariance, etc.
# finally noramlize the image for better training dynamics
transform = [AutoAugment(AutoAugmentPolicy.CIFAR10)] if augment else []
transform = transforms.Compose(
transform + [transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# we don't want augmentations on the val set
transform_val = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2, collate_fn=collate_fn)

# do not train on the test dataset, to better gauge generalization error
# instead of training error, test dataset should not include any training
# images
testset = torchvision.datasets.CIFAR10(root='data', train=False,
download=True, transform=transform_val)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=2, collate_fn=collate_fn)

return trainloader, testloader


# some unit testing to see that it all works
if __name__ == '__main__':
from matplotlib import pyplot as plt
train_loader, test_loader = get_dataset(4, augment=False)
images, labels = next(iter(train_loader))
plt.imshow(images[0].permute(1, 2, 0))
plt.show()
print("class:", CLASS_REPR[labels[0]])
1 change: 1 addition & 0 deletions final_accuracy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Accuracy of the network on the 10000 test images: 87.0 %
18 changes: 18 additions & 0 deletions losses.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[1, 500] train loss: 1.907
[1, 500] eval loss: 1.870
[1, 1000] train loss: 1.123
[1, 1000] eval loss: 1.019
[1, 1500] train loss: 0.806
[1, 1500] eval loss: 0.681
[2, 500] train loss: 0.652
[2, 500] eval loss: 0.552
[2, 1000] train loss: 0.600
[2, 1000] eval loss: 0.481
[2, 1500] train loss: 0.571
[2, 1500] eval loss: 0.452
[3, 500] train loss: 0.509
[3, 500] eval loss: 0.426
[3, 1000] train loss: 0.497
[3, 1000] eval loss: 0.395
[3, 1500] train loss: 0.484
[3, 1500] eval loss: 0.387
149 changes: 137 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,142 @@
"""
This is a starter file to get you going. You may also include other files if you feel it's necessary.
# %%
from itertools import cycle

Make sure to follow the code convention described here:
https://github.com/UWARG/computer-vision-python/blob/main/README.md#naming-and-typing-conventions
import numpy as np
import torch
from torch import nn
import torch.optim as optim

Hints:
* The internet is your friend! Don't be afraid to search for tutorials/intros/etc.
* We suggest using a convolutional neural network.
* TensorFlow Keras has the CIFAR-10 dataset as a module, so you don't need to manually download and unpack it.
"""
from matplotlib import pyplot as plt

# Import whatever libraries/modules you need
# I decided I liked convnext better, being a newer architecture
# released with weights
# although model.py still has my comments, I did not bother with
# modelv2.py, since the architectural changes are already listed
# in their paper
from modelv2 import convnext_small
from build_dataset import get_dataset, CLASS_REPR

import numpy as np
# parameters for training
epochs = 3
loss_file = 'losses.txt'
plot_file = 'plots.png'

batch_size = 32
lr = 1e-5
architecture = 'resnet18'
num_classes = len(CLASS_REPR)

# I want to fine-tune the model, since this results in higher
# total accuracy and is more energy efficient to train
model = convnext_small(pretrained=True, num_classes=10).cuda()

# building the dataset, see build_dataset for more details
train_loader, test_loader = get_dataset(batch_size, augment=True)
# make the test_loader be an infinite cycle, so StopIteration never occurs
test_loader_t = cycle(iter(test_loader))

# construct the losses, in this case, the negative log-likehood loss
# which is used for classification tasks. It is the log of the softmax
# of the logits multiplied by the label. I don't want to implement label
# smoothing here, since its over-kill
criterion = nn.CrossEntropyLoss()

# Use the adam optimizer for lesser hyper-parameters
optimizer = optim.Adam(model.parameters(), lr=lr)

# save the loss ever log_steps
log_step = 500
val_losses = []
train_losses = []
loss_msg = []
# start the training loop
# in a more complex project, I would usually separate this
# in a separate function or class

total_steps = 0
for epoch in range(epochs): # loop over the dataset multiple times

running_loss = 0.0
running_val_loss = 0.0
for i, data in enumerate(train_loader, 0):
# set the model to the train stage, since sometimes
# dropout has different behaviors
model.train()
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# move data to gpu for acceleration
inputs = inputs.cuda()
labels = labels.cuda()

# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

# also compute the evalulation loss
with torch.no_grad():
e_x, e_y = next(test_loader_t)
model.eval()
eval_loss = criterion(model(e_x.cuda()), e_y.cuda())
# transfer back to cpu
running_val_loss += float(eval_loss)

# print statistics
running_loss += loss.item()
if i % log_step == (log_step - 1): # print every log_step mini-batches
train_msg = f'[{epoch + 1}, {i + 1:5d}] train loss: {running_loss / log_step:.3f} \n'
eval_msg = f'[{epoch + 1}, {i + 1:5d}] eval loss: {running_val_loss / log_step:.3f} \n'
print(train_msg)
print(eval_msg)

loss_msg.append(train_msg)
loss_msg.append(eval_msg)
train_losses.append(running_loss / log_step)
val_losses.append(running_val_loss / log_step)

running_loss = 0.0
running_val_loss = 0.0
total_steps += 1


print('Finished Training')

# plot losses over time
plt.plot(np.array(train_losses), label='train-loss')
plt.plot(np.array(val_losses), label='val-loss')
plt.xlabel("steps")
plt.ylabel('mean-crossentropy-loss')
plt.title('mean-crossentropy-loss over steps')
plt.legend()
plt.savefig(plot_file)
plt.show()

# %%
# save losses in a file
with open(loss_file, 'w') as f:
for t_msg in loss_msg:
f.write(t_msg)

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in test_loader:
images, labels = data
# calculate outputs by running images through the network
outputs = model(images.cuda())
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += float((predicted == labels.cuda()).sum().item())


print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

# Your working code here
# save accuracy in a file
with open('final_accuracy.txt', 'w') as f:
f.write(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
124 changes: 124 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# %%
# Mostly copy paste from https://github.com/facebookresearch/detr

import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torchvision.models._utils import IntermediateLayerGetter
import torchvision.models.resnet

# We freeze batch norm for better fine-tuning
class FrozenBatchNorm2d(torch.nn.Module):
"""
BatchNorm2d where the batch statistics and the affine parameters are fixed.

Copy-paste from torchvision.misc.ops with added eps before rqsrt,
without which any other models than torchvision.models.resnet[18,34,50,101]
produce nans.
"""

def __init__(self, n):
super(FrozenBatchNorm2d, self).__init__()
self.register_buffer("weight", torch.ones(n))
self.register_buffer("bias", torch.zeros(n))
self.register_buffer("running_mean", torch.zeros(n))
self.register_buffer("running_var", torch.ones(n))

def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
num_batches_tracked_key = prefix + 'num_batches_tracked'
if num_batches_tracked_key in state_dict:
del state_dict[num_batches_tracked_key]

super(FrozenBatchNorm2d, self)._load_from_state_dict(
state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs)

def forward(self, x):
# move reshapes to the beginning
# to make it fuser-friendly
w = self.weight.reshape(1, -1, 1, 1)
b = self.bias.reshape(1, -1, 1, 1)
rv = self.running_var.reshape(1, -1, 1, 1)
rm = self.running_mean.reshape(1, -1, 1, 1)
eps = 1e-5
scale = w * (rv + eps).rsqrt()
bias = b - rm * scale
return x * scale + bias


# This class serves to restitch the pretrained resnet models
# and remove the classification head for a custom one
# this was a copy and paste from a object detection model I have been working on
# so extra parameters are not really necessary for classification.
class BackboneBase(nn.Module):
def __init__(self, num_classes: int, backbone: nn.Module, train_backbone: bool, num_channels: int, return_interm_layers: bool):
super().__init__()
for name, parameter in backbone.named_parameters():
if not train_backbone or 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
parameter.requires_grad_(False)
# the resnet class of models have 4 stages, with each stage shrinking the
# spatial dimension by a factor of 2 (strided convolution).
# probably overkill for cifar10 though.
if return_interm_layers:
return_layers = {"layer1": "0", "layer2": "1", "layer3": "2", "layer4": "3"}
else:
return_layers = {'layer4': "0"}
# since the layers are ordered, we can hook into it by merely providing the ordering
self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
self.num_channels = num_channels
# finally, remove any spatial dimensions
self.avg = nn.AdaptiveAvgPool2d((1, 1))
# and project activations onto the output classes
self.out_proj = nn.Linear(num_channels, num_classes)

def forward(self, input):
xs = self.body(input)
act = xs["0"]
act = self.avg(act)
# now, act.shape = [batch_size, num_channels, 1, 1]
act = act.flatten(1, 3)
return self.out_proj(act)


class Backbone(BackboneBase):
"""ResNet backbone with frozen BatchNorm."""
def __init__(self,
num_classes: int,
name: str,
train_backbone: bool,
return_interm_layers: bool,
dilation: bool):
backbone = getattr(torchvision.models, name)(
replace_stride_with_dilation=[False, False, dilation],
pretrained=True, norm_layer=FrozenBatchNorm2d)
num_channels = 512 if name in ('resnet18', 'resnet34') else 2048
super().__init__(num_classes, backbone, train_backbone, num_channels, return_interm_layers)


class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x


if __name__ == '__main__':
x = torch.rand(4, 3, 32, 32)
model = Backbone(10, 'resnet18', True, False, False)
y = model(x)
print(y.shape)
Loading