upd finetune yaml

OSU-NLP-Group · Mar 27, 2024 · 7fea960 · 7fea960
1 parent 8d7e2ee
commit 7fea960
Showing 1 changed file with 29 additions and 40 deletions.
diff --git a/finetune.yaml b/finetune.yaml
@@ -1,53 +1,59 @@
-# File modified by authors of InstructPix2Pix from original (https://github.com/CompVis/stable-diffusion).
-# See more details in LICENSE.
-
 model:
-  base_learning_rate: 1e-04
+  base_learning_rate: 5.0e-05
   target: ldm.models.diffusion.ddpm_edit.LatentDiffusion
   params:
     ckpt_path: [TODO]
     linear_start: 0.00085
-    linear_end: 0.0120
+    linear_end: 0.012
     num_timesteps_cond: 1
     log_every_t: 50
     timesteps: 1000
     first_stage_key: edited
     cond_stage_key: edit
     image_size: 32
     channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
+    cond_stage_trainable: false
     conditioning_key: hybrid
     monitor: val/loss_simple_ema
     scale_factor: 0.18215
     use_ema: true
-    load_ema: false
-
-    scheduler_config: # 10000 warmup steps
+    load_ema: true
+    scheduler_config:
       target: ldm.lr_scheduler.LambdaLinearScheduler
       params:
-        warm_up_steps: [ 0 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-
+        warm_up_steps:
+        - 1000
+        cycle_lengths:
+        - 10000000000000
+        f_start:
+        - 1.0e-06
+        f_max:
+        - 1.0
+        f_min:
+        - 1.0
     unet_config:
       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
       params:
-        image_size: 32 # unused
+        image_size: 32
         in_channels: 8
         out_channels: 4
         model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
+        attention_resolutions:
+        - 4
+        - 2
+        - 1
         num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
+        channel_mult:
+        - 1
+        - 2
+        - 4
+        - 4
         num_heads: 8
-        use_spatial_transformer: True
+        use_spatial_transformer: true
         transformer_depth: 1
         context_dim: 768
-        use_checkpoint: True
-        legacy: False
-
+        use_checkpoint: true
+        legacy: false
     first_stage_config:
       target: ldm.models.autoencoder.AutoencoderKL
       params:
@@ -70,14 +76,12 @@ model:
           dropout: 0.0
         lossconfig:
           target: torch.nn.Identity
-
     cond_stage_config:
       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-
 data:
   target: main.DataModuleFromConfig
   params:
-    batch_size: 32
+    batch_size: 16
     num_workers: 2
     train:
       target: edit_dataset.EditDataset
@@ -96,18 +100,3 @@ data:
         min_resize_res: 256
         max_resize_res: 256
         crop_res: 256
-
-lightning:
-  callbacks:
-    image_logger:
-      target: main.ImageLogger
-      params:
-        batch_frequency: 2000
-        max_images: 2
-        increase_log_steps: False
-
-  trainer:
-    max_epochs: 1000
-    benchmark: True
-    accumulate_grad_batches: 1
-    check_val_every_n_epoch: 4