UWARG · Aalanli · Sep 24, 2022
diff --git a/build_dataset.py b/build_dataset.py
@@ -0,0 +1,55 @@
+# %%
+# abstracts away the dataset loading and unloading
+import torch
+import torchvision
+import torchvision.transforms as transforms
+from torchvision.transforms import AutoAugment, AutoAugmentPolicy
+
+CLASS_REPR = ('plane', 'car', 'bird', 'cat',
+           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
+
+def collate_fn(data):
+    """pass the collation function to the Dataloader to batch images
+       into a single tensor"""
+    images = [d[0] for d in data]
+    labels = [d[1] for d in data]
+    return torch.stack(images, dim=0), torch.tensor(labels)
+
+def get_dataset(batch_size, augment=True):
+    # some dataset transforms, to introduce some invariances to the model
+    # such as scale invariance, rotation invariance, etc.
+    # finally noramlize the image for better training dynamics
+    transform = [AutoAugment(AutoAugmentPolicy.CIFAR10)] if augment else []
+    transform = transforms.Compose(
+     transform + [transforms.ToTensor(),
+     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+    # we don't want augmentations on the val set
+    transform_val = transforms.Compose(
+     [transforms.ToTensor(),
+     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+    trainset = torchvision.datasets.CIFAR10(root='data', train=True,
+                                            download=True, transform=transform)
+    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
+                                            shuffle=True, num_workers=2, collate_fn=collate_fn)
+
+    # do not train on the test dataset, to better gauge generalization error
+    # instead of training error, test dataset should not include any training
+    # images
+    testset = torchvision.datasets.CIFAR10(root='data', train=False,
+                                        download=True, transform=transform_val)
+    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
+                                            shuffle=False, num_workers=2, collate_fn=collate_fn)
+
+    return trainloader, testloader
+
+
+# some unit testing to see that it all works
+if __name__ == '__main__':
+    from matplotlib import pyplot as plt
+    train_loader, test_loader = get_dataset(4, augment=False)
+    images, labels = next(iter(train_loader))
+    plt.imshow(images[0].permute(1, 2, 0))
+    plt.show()
+    print("class:", CLASS_REPR[labels[0]])
diff --git a/final_accuracy.txt b/final_accuracy.txt
@@ -0,0 +1 @@
+Accuracy of the network on the 10000 test images: 87.0 %
diff --git a/losses.txt b/losses.txt
@@ -0,0 +1,18 @@
+[1,   500] train loss: 1.907 
+[1,   500] eval loss: 1.870 
+[1,  1000] train loss: 1.123 
+[1,  1000] eval loss: 1.019 
+[1,  1500] train loss: 0.806 
+[1,  1500] eval loss: 0.681 
+[2,   500] train loss: 0.652 
+[2,   500] eval loss: 0.552 
+[2,  1000] train loss: 0.600 
+[2,  1000] eval loss: 0.481 
+[2,  1500] train loss: 0.571 
+[2,  1500] eval loss: 0.452 
+[3,   500] train loss: 0.509 
+[3,   500] eval loss: 0.426 
+[3,  1000] train loss: 0.497 
+[3,  1000] eval loss: 0.395 
+[3,  1500] train loss: 0.484 
+[3,  1500] eval loss: 0.387 
diff --git a/main.py b/main.py
@@ -1,17 +1,142 @@
-"""
-This is a starter file to get you going. You may also include other files if you feel it's necessary.
+# %%
+from itertools import cycle
 
-Make sure to follow the code convention described here:
-https://github.com/UWARG/computer-vision-python/blob/main/README.md#naming-and-typing-conventions
+import numpy as np
+import torch
+from torch import nn
+import torch.optim as optim
 
-Hints:
-* The internet is your friend! Don't be afraid to search for tutorials/intros/etc.
-* We suggest using a convolutional neural network.
-* TensorFlow Keras has the CIFAR-10 dataset as a module, so you don't need to manually download and unpack it.
-"""
+from matplotlib import pyplot as plt
 
-# Import whatever libraries/modules you need
+# I decided I liked convnext better, being a newer architecture
+# released with weights
+# although model.py still has my comments, I did not bother with
+# modelv2.py, since the architectural changes are already listed
+# in their paper
+from modelv2 import convnext_small
+from build_dataset import get_dataset, CLASS_REPR
 
-import numpy as np
+# parameters for training
+epochs       = 3
+loss_file    = 'losses.txt'
+plot_file    = 'plots.png'
+
+batch_size   = 32
+lr           = 1e-5
+architecture = 'resnet18'
+num_classes  = len(CLASS_REPR)
+
+# I want to fine-tune the model, since this results in higher
+# total accuracy and is more energy efficient to train
+model = convnext_small(pretrained=True, num_classes=10).cuda()
+
+# building the dataset, see build_dataset for more details
+train_loader, test_loader = get_dataset(batch_size, augment=True)
+# make the test_loader be an infinite cycle, so StopIteration never occurs
+test_loader_t = cycle(iter(test_loader))
+
+# construct the losses, in this case, the negative log-likehood loss
+# which is used for classification tasks. It is the log of the softmax
+# of the logits multiplied by the label. I don't want to implement label
+# smoothing here, since its over-kill
+criterion = nn.CrossEntropyLoss()
+
+# Use the adam optimizer for lesser hyper-parameters
+optimizer = optim.Adam(model.parameters(), lr=lr)
+
+# save the loss ever log_steps
+log_step     = 500
+val_losses   = []
+train_losses = []
+loss_msg     = []
+# start the training loop
+# in a more complex project, I would usually separate this
+# in a separate function or class
+
+total_steps = 0
+for epoch in range(epochs):  # loop over the dataset multiple times
+
+    running_loss = 0.0
+    running_val_loss = 0.0
+    for i, data in enumerate(train_loader, 0):
+        # set the model to the train stage, since sometimes
+        # dropout has different behaviors
+        model.train()
+        # get the inputs; data is a list of [inputs, labels]
+        inputs, labels = data
+        # move data to gpu for acceleration
+        inputs = inputs.cuda()
+        labels = labels.cuda()
+
+        # zero the parameter gradients
+        optimizer.zero_grad()
+
+        # forward + backward + optimize
+        outputs = model(inputs)
+        loss = criterion(outputs, labels)
+        loss.backward()
+        optimizer.step()
+
+        # also compute the evalulation loss
+        with torch.no_grad():
+            e_x, e_y = next(test_loader_t)
+            model.eval()
+            eval_loss = criterion(model(e_x.cuda()), e_y.cuda())
+            # transfer back to cpu
+            running_val_loss += float(eval_loss)
+
+        # print statistics
+        running_loss += loss.item()
+        if i % log_step == (log_step - 1):    # print every log_step mini-batches
+            train_msg = f'[{epoch + 1}, {i + 1:5d}] train loss: {running_loss / log_step:.3f} \n'
+            eval_msg = f'[{epoch + 1}, {i + 1:5d}] eval loss: {running_val_loss / log_step:.3f} \n'
+            print(train_msg)
+            print(eval_msg)
+
+            loss_msg.append(train_msg)
+            loss_msg.append(eval_msg)
+            train_losses.append(running_loss / log_step)
+            val_losses.append(running_val_loss / log_step)
+
+            running_loss = 0.0
+            running_val_loss = 0.0
+        total_steps += 1
+
+
+print('Finished Training')
+
+# plot losses over time
+plt.plot(np.array(train_losses), label='train-loss')
+plt.plot(np.array(val_losses), label='val-loss')
+plt.xlabel("steps")
+plt.ylabel('mean-crossentropy-loss')
+plt.title('mean-crossentropy-loss over steps')
+plt.legend()
+plt.savefig(plot_file)
+plt.show()
+
+# %%
+# save losses in a file
+with open(loss_file, 'w') as f:
+    for t_msg in loss_msg:
+        f.write(t_msg)
+
+correct = 0
+total = 0
+# since we're not training, we don't need to calculate the gradients for our outputs
+with torch.no_grad():
+    for data in test_loader:
+        images, labels = data
+        # calculate outputs by running images through the network
+        outputs = model(images.cuda())
+        # the class with the highest energy is what we choose as prediction
+        _, predicted = torch.max(outputs.data, 1)
+        total += labels.size(0)
+        correct += float((predicted == labels.cuda()).sum().item())
+
+
+print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
 
-# Your working code here
+# save accuracy in a file
+with open('final_accuracy.txt', 'w') as f:
+    f.write(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
diff --git a/model.py b/model.py
@@ -0,0 +1,124 @@
+# %%
+# Mostly copy paste from https://github.com/facebookresearch/detr
+
+import torch
+import torch.nn.functional as F
+import torchvision
+from torch import nn
+from torchvision.models._utils import IntermediateLayerGetter
+import torchvision.models.resnet
+
+# We freeze batch norm for better fine-tuning
+class FrozenBatchNorm2d(torch.nn.Module):
+    """
+    BatchNorm2d where the batch statistics and the affine parameters are fixed.
+
+    Copy-paste from torchvision.misc.ops with added eps before rqsrt,
+    without which any other models than torchvision.models.resnet[18,34,50,101]
+    produce nans.
+    """
+
+    def __init__(self, n):
+        super(FrozenBatchNorm2d, self).__init__()
+        self.register_buffer("weight", torch.ones(n))
+        self.register_buffer("bias", torch.zeros(n))
+        self.register_buffer("running_mean", torch.zeros(n))
+        self.register_buffer("running_var", torch.ones(n))
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        num_batches_tracked_key = prefix + 'num_batches_tracked'
+        if num_batches_tracked_key in state_dict:
+            del state_dict[num_batches_tracked_key]
+
+        super(FrozenBatchNorm2d, self)._load_from_state_dict(
+            state_dict, prefix, local_metadata, strict,
+            missing_keys, unexpected_keys, error_msgs)
+
+    def forward(self, x):
+        # move reshapes to the beginning
+        # to make it fuser-friendly
+        w = self.weight.reshape(1, -1, 1, 1)
+        b = self.bias.reshape(1, -1, 1, 1)
+        rv = self.running_var.reshape(1, -1, 1, 1)
+        rm = self.running_mean.reshape(1, -1, 1, 1)
+        eps = 1e-5
+        scale = w * (rv + eps).rsqrt()
+        bias = b - rm * scale
+        return x * scale + bias
+
+
+# This class serves to restitch the pretrained resnet models
+# and remove the classification head for a custom one
+# this was a copy and paste from a object detection model I have been working on
+# so extra parameters are not really necessary for classification.
+class BackboneBase(nn.Module):
+    def __init__(self, num_classes: int, backbone: nn.Module, train_backbone: bool, num_channels: int, return_interm_layers: bool):
+        super().__init__()
+        for name, parameter in backbone.named_parameters():
+            if not train_backbone or 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
+                parameter.requires_grad_(False)
+        # the resnet class of models have 4 stages, with each stage shrinking the 
+        # spatial dimension by a factor of 2 (strided convolution).
+        # probably overkill for cifar10 though.
+        if return_interm_layers:
+            return_layers = {"layer1": "0", "layer2": "1", "layer3": "2", "layer4": "3"}
+        else:
+            return_layers = {'layer4': "0"}
+        # since the layers are ordered, we can hook into it by merely providing the ordering
+        self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
+        self.num_channels = num_channels
+        # finally, remove any spatial dimensions
+        self.avg = nn.AdaptiveAvgPool2d((1, 1))
+        # and project activations onto the output classes
+        self.out_proj = nn.Linear(num_channels, num_classes)
+
+    def forward(self, input):
+        xs = self.body(input)
+        act = xs["0"]
+        act = self.avg(act)
+        # now, act.shape = [batch_size, num_channels, 1, 1]
+        act = act.flatten(1, 3)
+        return self.out_proj(act)
+
+
+class Backbone(BackboneBase):
+    """ResNet backbone with frozen BatchNorm."""
+    def __init__(self, 
+                 num_classes: int,
+                 name: str,
+                 train_backbone: bool,
+                 return_interm_layers: bool,
+                 dilation: bool):
+        backbone = getattr(torchvision.models, name)(
+            replace_stride_with_dilation=[False, False, dilation],
+            pretrained=True, norm_layer=FrozenBatchNorm2d)
+        num_channels = 512 if name in ('resnet18', 'resnet34') else 2048
+        super().__init__(num_classes, backbone, train_backbone, num_channels, return_interm_layers)
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = torch.flatten(x, 1) # flatten all dimensions except batch
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+
+if __name__ == '__main__':
+    x = torch.rand(4, 3, 32, 32)
+    model = Backbone(10, 'resnet18', True, False, False)
+    y = model(x)
+    print(y.shape)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Accuracy of the network on the 10000 test images: 87.0 %