diff --git a/MNIST_VAE.ipynb b/MNIST_VAE.ipynb new file mode 100644 index 0000000..55f2167 --- /dev/null +++ b/MNIST_VAE.ipynb @@ -0,0 +1,236 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "V100", + "authorship_tag": "ABX9TyNFv7cc7urUaP8u9gPrfY3s", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torchvision import datasets, transforms\n", + "from torch.autograd import Variable\n", + "from torchvision.utils import save_image\n", + "\n", + "class VAE(nn.Module):\n", + " def __init__(self, x_dim, h_dim1, h_dim2, z_dim):\n", + " super(VAE, self).__init__()\n", + "\n", + " # encoder part\n", + " self.fc1 = nn.Linear(x_dim, h_dim1)\n", + " self.fc2 = nn.Linear(h_dim1, h_dim2)\n", + " self.fc31 = nn.Linear(h_dim2, z_dim)\n", + " self.fc32 = nn.Linear(h_dim2, z_dim)\n", + " # decoder part\n", + " self.fc4 = nn.Linear(z_dim, h_dim2)\n", + " self.fc5 = nn.Linear(h_dim2, h_dim1)\n", + " self.fc6 = nn.Linear(h_dim1, x_dim)\n", + "\n", + " def encoder(self, x):\n", + " h = F.relu(self.fc1(x))\n", + " h = F.relu(self.fc2(h))\n", + " return self.fc31(h), self.fc32(h) # mu, log_var\n", + "\n", + " def sampling(self, mu, log_var):\n", + " std = torch.exp(0.5*log_var)\n", + " eps = torch.randn_like(std)\n", + " return eps.mul(std).add_(mu) # return z sample\n", + "\n", + " def decoder(self, z):\n", + " h = F.relu(self.fc4(z))\n", + " h = F.relu(self.fc5(h))\n", + " return F.sigmoid(self.fc6(h))\n", + "\n", + " def forward(self, x):\n", + " mu, log_var = self.encoder(x.view(-1, 784))\n", + " z = self.sampling(mu, log_var)\n", + " return self.decoder(z), mu, log_var\n", + "\n", + "# build model\n", + "vae = VAE(x_dim=784, h_dim1= 512, h_dim2=256, z_dim=2)\n", + "if torch.cuda.is_available():\n", + " vae.cuda()" + ], + "metadata": { + "id": "aUrAwRsuczhT" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "optimizer = optim.Adam(vae.parameters())\n", + "# return reconstruction error + KL divergence losses\n", + "def loss_function(recon_x, x, mu, log_var):\n", + " BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')\n", + " KLD = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())\n", + " return BCE + KLD" + ], + "metadata": { + "id": "wjm2uRBuCwRw" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def train(epoch):\n", + " vae.train()\n", + " train_loss = 0\n", + " for batch_idx, (data, _) in enumerate(train_loader):\n", + " data = data.cuda()\n", + " optimizer.zero_grad()\n", + "\n", + " recon_batch, mu, log_var = vae(data)\n", + " loss = loss_function(recon_batch, data, mu, log_var)\n", + "\n", + " loss.backward()\n", + " train_loss += loss.item()\n", + " optimizer.step()\n", + "\n", + " if batch_idx % 100 == 0:\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " epoch, batch_idx * len(data), len(train_loader.dataset),\n", + " 100. * batch_idx / len(train_loader), loss.item() / len(data)))\n", + " print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))" + ], + "metadata": { + "id": "uEW-KyH9CyRl" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def test():\n", + " vae.eval()\n", + " test_loss= 0\n", + " with torch.no_grad():\n", + " for data, _ in test_loader:\n", + " data = data.cuda()\n", + " recon, mu, log_var = vae(data)\n", + "\n", + " # sum up batch loss\n", + " test_loss += loss_function(recon, data, mu, log_var).item()\n", + "\n", + " test_loss /= len(test_loader.dataset)\n", + " print('====> Test set loss: {:.4f}'.format(test_loss))" + ], + "metadata": { + "id": "KzRp7-ejC3-c" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "bs = 100\n", + "# MNIST Dataset\n", + "train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=transforms.ToTensor(), download=True)\n", + "test_dataset = datasets.MNIST(root='./mnist_data/', train=False, transform=transforms.ToTensor(), download=False)\n", + "\n", + "# Data Loader (Input Pipeline)\n", + "train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True)\n", + "test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=bs, shuffle=False)\n", + "\n", + "for epoch in range(1, 5):\n", + " train(epoch)\n", + " test()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZZw1jZc6C5ej", + "outputId": "6313b1c8-1b6d-4ab3-f6aa-a96b893bcbe4" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train Epoch: 1 [0/60000 (0%)]\tLoss: 132.513184\n", + "Train Epoch: 1 [10000/60000 (17%)]\tLoss: 145.551279\n", + "Train Epoch: 1 [20000/60000 (33%)]\tLoss: 143.799668\n", + "Train Epoch: 1 [30000/60000 (50%)]\tLoss: 144.562568\n", + "Train Epoch: 1 [40000/60000 (67%)]\tLoss: 133.205791\n", + "Train Epoch: 1 [50000/60000 (83%)]\tLoss: 134.887256\n", + "====> Epoch: 1 Average loss: 141.1480\n", + "====> Test set loss: 142.4788\n", + "Train Epoch: 2 [0/60000 (0%)]\tLoss: 141.568389\n", + "Train Epoch: 2 [10000/60000 (17%)]\tLoss: 131.814111\n", + "Train Epoch: 2 [20000/60000 (33%)]\tLoss: 131.404941\n", + "Train Epoch: 2 [30000/60000 (50%)]\tLoss: 135.604023\n", + "Train Epoch: 2 [40000/60000 (67%)]\tLoss: 138.471006\n", + "Train Epoch: 2 [50000/60000 (83%)]\tLoss: 143.901602\n", + "====> Epoch: 2 Average loss: 140.6990\n", + "====> Test set loss: 141.7708\n", + "Train Epoch: 3 [0/60000 (0%)]\tLoss: 151.875791\n", + "Train Epoch: 3 [10000/60000 (17%)]\tLoss: 143.209922\n", + "Train Epoch: 3 [20000/60000 (33%)]\tLoss: 136.077578\n", + "Train Epoch: 3 [30000/60000 (50%)]\tLoss: 137.678623\n", + "Train Epoch: 3 [40000/60000 (67%)]\tLoss: 132.750098\n", + "Train Epoch: 3 [50000/60000 (83%)]\tLoss: 138.461543\n", + "====> Epoch: 3 Average loss: 140.3149\n", + "====> Test set loss: 141.3095\n", + "Train Epoch: 4 [0/60000 (0%)]\tLoss: 141.891445\n", + "Train Epoch: 4 [10000/60000 (17%)]\tLoss: 136.609824\n", + "Train Epoch: 4 [20000/60000 (33%)]\tLoss: 138.829805\n", + "Train Epoch: 4 [30000/60000 (50%)]\tLoss: 139.648555\n", + "Train Epoch: 4 [40000/60000 (67%)]\tLoss: 140.374023\n", + "Train Epoch: 4 [50000/60000 (83%)]\tLoss: 132.872568\n", + "====> Epoch: 4 Average loss: 140.0112\n", + "====> Test set loss: 141.5408\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "with torch.no_grad():\n", + " z = torch.randn(64, 2).cuda()\n", + " sample = vae.decoder(z).cuda()\n", + "\n", + " save_image(sample.view(64, 1, 28, 28), 'a.png')" + ], + "metadata": { + "id": "11Vv5N75g1ND" + }, + "execution_count": 20, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/detection/backbone.py b/detection/backbone.py index 77b0069..cbf0a7a 100644 --- a/detection/backbone.py +++ b/detection/backbone.py @@ -8,9 +8,8 @@ import itertools from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from timm.models.layers import DropPath, trunc_normal_ +from timm.models.layers import DropPath, trunc_normal_, to_2tuple from timm.models.registry import register_model -from timm.models.layers.helpers import to_2tuple try: from mmdet.models.builder import BACKBONES as det_BACKBONES diff --git a/detection/backbonev2.py b/detection/backbonev2.py index 7a35cb4..f11669e 100644 --- a/detection/backbonev2.py +++ b/detection/backbonev2.py @@ -8,9 +8,8 @@ import itertools from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from timm.models.layers import DropPath, trunc_normal_ +from timm.models.layers import DropPath, trunc_normal_, to_2tuple from timm.models.registry import register_model -from timm.models.layers.helpers import to_2tuple try: from mmdet.models.builder import BACKBONES as det_BACKBONES diff --git a/models/efficientformer.py b/models/efficientformer.py index a379823..342c9b3 100644 --- a/models/efficientformer.py +++ b/models/efficientformer.py @@ -10,9 +10,8 @@ import itertools from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from timm.models.layers import DropPath, trunc_normal_ +from timm.models.layers import DropPath, trunc_normal_, to_2tuple from timm.models.registry import register_model -from timm.models.layers.helpers import to_2tuple EfficientFormer_width = { 'l1': [48, 96, 224, 448], diff --git a/models/efficientformer_v2.py b/models/efficientformer_v2.py index 48234a4..d2899e2 100644 --- a/models/efficientformer_v2.py +++ b/models/efficientformer_v2.py @@ -11,9 +11,8 @@ import itertools from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from timm.models.layers import DropPath, trunc_normal_ +from timm.models.layers import DropPath, trunc_normal_, to_2tuple from timm.models.registry import register_model -from timm.models.layers.helpers import to_2tuple EfficientFormer_width = { 'L': [40, 80, 192, 384], # 26m 83.3% 6attn diff --git a/segmentation/backbone.py b/segmentation/backbone.py index 3a3648e..f61323d 100644 --- a/segmentation/backbone.py +++ b/segmentation/backbone.py @@ -11,9 +11,8 @@ import itertools from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from timm.models.layers import DropPath, trunc_normal_ +from timm.models.layers import DropPath, trunc_normal_, to_2tuple from timm.models.registry import register_model -from timm.models.layers.helpers import to_2tuple try: from mmseg.models.builder import BACKBONES as seg_BACKBONES diff --git a/segmentation/backbonev2.py b/segmentation/backbonev2.py index d4a91cc..bed22f3 100644 --- a/segmentation/backbonev2.py +++ b/segmentation/backbonev2.py @@ -8,9 +8,8 @@ import itertools from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from timm.models.layers import DropPath, trunc_normal_ +from timm.models.layers import DropPath, trunc_normal_, to_2tuple from timm.models.registry import register_model -from timm.models.layers.helpers import to_2tuple try: from mmseg.models.builder import BACKBONES as seg_BACKBONES