Ewha-Euron · YoonseoChu · Dec 23, 2025
diff --git a/Week15_LDM.ipynb b/Week15_LDM.ipynb
@@ -0,0 +1,164 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "https://github.com/CompVis/latent-diffusion/blob/main/ldm/modules/diffusionmodules/model.py\n",
+        "위 링크의 model.py 파일 중, 아래의 부분을 필사 및 주석을 달아서 ipynb 파일로 제출해주시면 됩니다.\n",
+        "\n",
+        "1. ResnetBlock.forward (121 ~ 141줄)\n",
+        "2. Model.forward (316 ~ 356줄)\n"
+      ],
+      "metadata": {
+        "id": "jSYHgQUtu0_H"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Residual Block\n",
+        "\n",
+        "def forward(self, x, temb):\n",
+        "    # residual connection을 위해 입력을 그대로 보존\n",
+        "    h = x\n",
+        "\n",
+        "    # 1번째 정규화 (보통 GroupNorm)\n",
+        "    h = self.norm1(h)\n",
+        "\n",
+        "    # 비선형 함수 (ex. SiLU / Swish)\n",
+        "    h = nonlinearity(h)\n",
+        "\n",
+        "    # 1번째 convolution\n",
+        "    h = self.conv1(h)\n",
+        "\n",
+        "    # timestep embedding이 있을 경우 feature map에 더해줌\n",
+        "    # temb: (B, C) → (B, C, 1, 1) 로 변환하여 spatial broadcast\n",
+        "    if temb is not None:\n",
+        "        h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None]\n",
+        "\n",
+        "    # 2번째 정규화\n",
+        "    h = self.norm2(h)\n",
+        "\n",
+        "    # 비선형 함수\n",
+        "    h = nonlinearity(h)\n",
+        "\n",
+        "    # dropout (과적합 방지)\n",
+        "    h = self.dropout(h)\n",
+        "\n",
+        "    # 2번째 convolution\n",
+        "    h = self.conv2(h)\n",
+        "\n",
+        "    # 입력 채널 수와 출력 채널 수가 다르면 shortcut을 맞춰줌\n",
+        "    if self.in_channels != self.out_channels:\n",
+        "        if self.use_conv_shortcut:\n",
+        "            # conv 기반 shortcut\n",
+        "            x = self.conv_shortcut(x)\n",
+        "        else:\n",
+        "            # 1x1 convolution (Network-in-Network)\n",
+        "            x = self.nin_shortcut(x)\n",
+        "\n",
+        "    # residual connection: 입력 x + 변환된 h\n",
+        "    return x + h\n"
+      ],
+      "metadata": {
+        "id": "ctZ_YhB_vwza"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Diffusion U-Net 전체 forward\n",
+        "\n",
+        "def forward(self, x, t=None, context=None):\n",
+        "    # context가 있으면 channel 방향으로 concat (conditional diffusion)\n",
+        "    if context is not None:\n",
+        "        x = torch.cat((x, context), dim=1)\n",
+        "\n",
+        "    # timestep embedding 사용 여부\n",
+        "    if self.use_timestep:\n",
+        "        # timestep conditioning을 쓰는 경우 t는 반드시 필요\n",
+        "        assert t is not None\n",
+        "\n",
+        "        # timestep t → sinusoidal embedding\n",
+        "        temb = get_timestep_embedding(t, self.ch)\n",
+        "\n",
+        "        # timestep embedding을 MLP에 통과\n",
+        "        temb = self.temb.dense[0](temb)\n",
+        "        temb = nonlinearity(temb)\n",
+        "        temb = self.temb.dense[1](temb)\n",
+        "    else:\n",
+        "        temb = None\n",
+        "\n",
+        "    # Downsampling path\n",
+        "    # 입력을 초기 convolution에 통과\n",
+        "    hs = [self.conv_in(x)]\n",
+        "\n",
+        "    for i_level in range(self.num_resolutions):\n",
+        "        for i_block in range(self.num_res_blocks):\n",
+        "            # ResBlock (timestep embedding 조건부)\n",
+        "            h = self.down[i_level].block[i_block](hs[-1], temb)\n",
+        "\n",
+        "            # Attention block이 있으면 적용\n",
+        "            if len(self.down[i_level].attn) > 0:\n",
+        "                h = self.down[i_level].attn[i_block](h)\n",
+        "\n",
+        "            # skip connection을 위해 중간 feature 저장\n",
+        "            hs.append(h)\n",
+        "\n",
+        "        # 마지막 resolution이 아니면 downsample 수행\n",
+        "        if i_level != self.num_resolutions - 1:\n",
+        "            hs.append(self.down[i_level].downsample(hs[-1]))\n",
+        "\n",
+        "\n",
+        "    # Middle block\n",
+        "    # 가장 낮은 해상도의 feature\n",
+        "    h = hs[-1]\n",
+        "\n",
+        "    # ResBlock → Attention → ResBlock\n",
+        "    h = self.mid.block_1(h, temb)\n",
+        "    h = self.mid.attn_1(h)\n",
+        "    h = self.mid.block_2(h, temb)\n",
+        "\n",
+        "    # Upsampling path\n",
+        "    for i_level in reversed(range(self.num_resolutions)):\n",
+        "        for i_block in range(self.num_res_blocks + 1):\n",
+        "            # Downsampling 단계에서 저장한 feature와 concat (skip connection)\n",
+        "            h = self.up[i_level].block[i_block](\n",
+        "                torch.cat([h, hs.pop()], dim=1), temb\n",
+        "            )\n",
+        "\n",
+        "            # Attention block 적용 (있을 경우)\n",
+        "            if len(self.up[i_level].attn) > 0:\n",
+        "                h = self.up[i_level].attn[i_block](h)\n",
+        "\n",
+        "        # 최상위 resolution이 아니면 upsample\n",
+        "        if i_level != 0:\n",
+        "            h = self.up[i_level].upsample(h)\n",
+        "\n",
+        "    # 최종 출력 feature map 반환\n",
+        "    return h\n"
+      ],
+      "metadata": {
+        "id": "5tcN0lLnvs8y"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}