Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions Week15_복습과제_김지은.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"### **Week 15 복습과제**\n",
"\n",
"https://github.com/CompVis/latent-diffusion/blob/main/ldm/modules/diffusionmodules/model.py"
],
"metadata": {
"id": "fJCHLiImgVtD"
}
},
{
"cell_type": "markdown",
"source": [
"`ResnetBlock.forward`"
],
"metadata": {
"id": "sFD35zGig3aM"
}
},
{
"cell_type": "code",
"source": [
"def forward(self, x, temb):\n",
" # x: 현재 레이어의 입력 feature map (latent space에서의 representation)\n",
" # temb: diffusion timestep t를 embedding한 벡터, 논문에서 말하는 \"time conditioning\"에 해당\n",
"\n",
" h = x\n",
"\n",
" # 첫 번째 normalization: latent feature를 안정화하여 diffusion step마다 분포 변화에 강건하게 만듦\n",
" h = self.norm1(h)\n",
"\n",
" # SiLU / Swish 계열 nonlinearity - score-based diffusion model에서 표준적으로 사용\n",
" h = nonlinearity(h)\n",
"\n",
" # 첫 번째 convolution: local spatial feature 추출 (UNet backbone의 기본 연산)\n",
" h = self.conv1(h)\n",
"\n",
" if temb is not None:\n",
" # timestep embedding을 projection하여 feature map에 더함\n",
" h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None]\n",
"\n",
" # 두 번째 normalization\n",
" h = self.norm2(h)\n",
"\n",
" # nonlinearity\n",
" h = nonlinearity(h)\n",
"\n",
" # dropout (diffusion 학습 시 과적합 방지)\n",
" h = self.dropout(h)\n",
"\n",
" # 두 번째 convolution: residual block의 main transformation\n",
" h = self.conv2(h)\n",
"\n",
" # shortcut (residual connection) 경로\n",
" # 입력과 출력 channel 수가 다를 경우 dimension matching 필요\n",
" if self.in_channels != self.out_channels:\n",
" if self.use_conv_shortcut:\n",
" # convolution shortcut\n",
" x = self.conv_shortcut(x)\n",
" else:\n",
" # nin (1x1 convolution) shortcut\n",
" x = self.nin_shortcut(x)\n",
"\n",
" # residual connection\n",
" return x + h"
],
"metadata": {
"id": "VGWSYvv3gf1R"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"`Model.forward`"
],
"metadata": {
"id": "OcbCIshOg6ru"
}
},
{
"cell_type": "code",
"source": [
"def forward(self, x, t=None, context=None):\n",
" # x: 현재 diffusion step에서의 latent feature map\n",
" # t: diffusion timestep (정수 index)\n",
" # context: conditioning input (e.g., concat conditioning, spatial conditioning)\n",
"\n",
" # context가 주어지면 channel dimension으로 concat\n",
" if context is not None:\n",
" x = torch.cat((x, context), dim=1)\n",
"\n",
" # timestep conditioning 사용 여부\n",
" if self.use_timestep:\n",
" assert t is not None\n",
"\n",
" # diffusion timestep t를 sinusoidal embedding으로 변환\n",
" temb = get_timestep_embedding(t, self.ch)\n",
"\n",
" # MLP를 통해 timestep embedding을 네트워크 내부 차원으로 projection\n",
" temb = self.temb.dense[0](temb)\n",
" temb = nonlinearity(temb)\n",
" temb = self.temb.dense[1](temb)\n",
" else:\n",
" temb = None\n",
"\n",
" # 입력을 첫 convolution으로 latent feature로 변환\n",
" # hs: skip connection을 저장하기 위한 리스트\n",
" hs = [self.conv_in(x)]\n",
"\n",
" for i_level in range(self.num_resolutions):\n",
" for i_block in range(self.num_res_blocks):\n",
" # Residual block + timestep conditioning\n",
" h = self.down[i_level].block[i_block](hs[-1], temb)\n",
"\n",
" # 해당 resolution에서 attention을 사용하는 경우\n",
" if len(self.down[i_level].attn) > 0:\n",
" h = self.down[i_level].attn[i_block](h)\n",
"\n",
" # skip connection으로 저장\n",
" hs.append(h)\n",
"\n",
" # 마지막 resolution이 아니면 downsample 수행\n",
" if i_level != self.num_resolutions - 1:\n",
" hs.append(self.down[i_level].downsample(hs[-1]))\n",
"\n",
" # UNet의 가장 깊은 부분: 가장 global한 semantic representation\n",
" h = hs[-1]\n",
"\n",
" # residual block + timestep conditioning\n",
" h = self.mid.block_1(h, temb)\n",
"\n",
" # self-attention\n",
" h = self.mid.attn_1(h)\n",
"\n",
" # 두 번째 residual block\n",
" h = self.mid.block_2(h, temb)\n",
"\n",
" for i_level in reversed(range(self.num_resolutions)):\n",
" for i_block in range(self.num_res_blocks + 1):\n",
" # skip connection과 concat\n",
" h = self.up[i_level].block[i_block](\n",
" torch.cat([h, hs.pop()], dim=1), temb)\n",
"\n",
" # 해당 resolution에서 attention 사용\n",
" if len(self.up[i_level].attn) > 0:\n",
" h = self.up[i_level].attn[i_block](h)\n",
"\n",
" # 마지막 resolution이 아니면 upsample\n",
" if i_level != 0:\n",
" h = self.up[i_level].upsample(h)"
],
"metadata": {
"id": "6kLb35hzgxG-"
},
"execution_count": null,
"outputs": []
}
]
}
1 change: 1 addition & 0 deletions Week15_예습과제_김지은.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://equatorial-chard-0cb.notion.site/LDM-High-Resolution-Image-Synthesis-with-Latent-Diffusion-Model-2cac71118dac80f98e3dde7e80d66095?source=copy_link