CarperAI · shahbuland · Sep 17, 2023 · Sep 27, 2023 · Sep 27, 2023 · Jan 23, 2024
diff --git a/configs/dpo_pickapic.yml b/configs/dpo_pickapic.yml
@@ -0,0 +1,41 @@
+method:
+  name : "DPO"
+
+model:
+  model_path: "stabilityai/stable-diffusion-2-1-base"
+  model_arch_type: "LDMUnet"
+  attention_slicing: True
+  xformers_memory_efficient: False
+  gradient_checkpointing: True
+
+sampler:
+  guidance_scale: 7.5
+  num_inference_steps: 50
+
+optimizer:
+  name: "adamw"
+  kwargs:
+    lr: 1.0e-5
+    weight_decay: 1.0e-4
+    betas: [0.9, 0.999]
+
+scheduler:
+  name: "linear" # Name of learning rate scheduler
+  kwargs:
+    start_factor: 1.0
+    end_factor: 1.0
+
+logging:
+  run_name: 'dpo_pickapic'
+  #wandb_entity: None 
+  #wandb_project: None
+
+train:
+  num_epochs: 500
+  num_samples_per_epoch: 256
+  batch_size: 4
+  sample_batch_size: 32
+  grad_clip: 1.0
+  checkpoint_interval: 50
+  tf32: True
+  suppress_log_keywords: "diffusers.pipelines,transformers"
diff --git a/examples/DPO/download_pickapic_wds.py b/examples/DPO/download_pickapic_wds.py
@@ -0,0 +1,54 @@
+from datasets import load_dataset
+import requests
+import os
+from tqdm import tqdm
+import tarfile
+from multiprocessing import Pool, cpu_count
+
+"""
+This script takes the filtered version of the PickAPic prompt dataset
+and downloads the associated images, then tars them. This tar file can then
+be moved to S3 or loaded directly if needed. Number of samples can be specified
+"""
+
+n_samples = 1000
+data_root = "./pickapic_sample"
+url = "CarperAI/pickapic_v1_no_images_training_sfw"
+n_cpus = cpu_count()  # Detect the number of CPUs
+
+base_name = os.path.basename(data_root).replace('.', '').replace('/', '')
+
+def make_tarfile(output_filename, source_dir):
+    with tarfile.open(output_filename, "w") as tar:
+        tar.add(source_dir, arcname=os.path.basename(source_dir))
+
+def download_image(args):
+    url, filename = args
+    response = requests.get(url)
+    with open(filename, 'wb') as f:
+        f.write(response.content)
+
+if __name__ == "__main__":
+    ds = load_dataset("CarperAI/pickapic_v1_no_images_training_sfw")['train']
+    os.makedirs(data_root, exist_ok = True)
+
+    id_counter = 0
+    with Pool(n_cpus) as p:
+        for row in tqdm(ds, total = n_samples):
+            if id_counter >= n_samples:
+                break
+            if row['has_label']:
+                id_str = str(id_counter).zfill(8)
+                with open(os.path.join(data_root, f'{id_str}.prompt.txt'), 'w', encoding='utf-8') as f:
+                    # Ensure the caption is in UTF-8 format
+                    caption = row['caption'].encode('utf-8').decode('utf-8')
+                    f.write(caption)
+                if row['label_0']:
+                    p.map(download_image, [(row['image_0_url'], os.path.join(data_root, f'{id_str}.chosen.png')), 
+                                           (row['image_1_url'], os.path.join(data_root, f'{id_str}.rejected.png'))])
+                else:
+                    p.map(download_image, [(row['image_1_url'], os.path.join(data_root, f'{id_str}.chosen.png')), 
+                                           (row['image_0_url'], os.path.join(data_root, f'{id_str}.rejected.png'))])
+                id_counter += 1
+
+    make_tarfile(f"{base_name}.tar", data_root)
diff --git a/examples/DPO/train_pickapic.py b/examples/DPO/train_pickapic.py
@@ -0,0 +1,14 @@
+import sys
+sys.path.append("./src")
+
+from drlx.pipeline.pickapic_wds import PickAPicPipeline
+from drlx.trainer.dpo_trainer import DPOTrainer
+from drlx.configs import DRLXConfig
+
+pipe = PickAPicPipeline()
+resume = False
+
+config = DRLXConfig.load_yaml("configs/dpo_pickapic.yml")
+trainer = DPOTrainer(config)
+
+trainer.train(pipe)
diff --git a/src/drlx/configs.py b/src/drlx/configs.py
@@ -91,6 +91,22 @@ class DDPOConfig(MethodConfig):
     buffer_size: int = 32 # Set to None to avoid using per prompt stat tracker
     min_count: int = 16
 
+@register_method("DPO")
+@dataclass
+class DPOConfig(MethodConfig):
+    """
+    Config for DPO-related hyperparams
+
+    :param beta: Deviation from initial model
+    :type beta: float
+
+    :param ref_mem_strategy: Strategy for managing reference model on memory. By default, puts it in 16 bit.
+    :type ref_mem_strategy: str
+    """
+    name : str = "DPO"
+    beta : float = 0.9
+    ref_mem_strategy : str = None # None or "half"
+
 @dataclass
 class TrainConfig(ConfigClass):
     """

diff --git a/src/drlx/denoisers/ldm_unet.py b/src/drlx/denoisers/ldm_unet.py
@@ -165,5 +165,16 @@ def forward(
             encoder_hidden_states = text_embeds
         ).sample
 
+    @property
+    def device(self):
+        return self.unet.device
+
+    def enable_adapters(self):
+        if self.config.lora_rank:
+            self.unet.enable_adapters()
+
+    def disable_adapters(self):
+        if self.config.lora_rank:
+            self.unet.disable_adapters()
 
 
diff --git a/src/drlx/pipeline/dpo_pipeline.py b/src/drlx/pipeline/dpo_pipeline.py
@@ -0,0 +1,30 @@
+from abc import abstractmethod
+from typing import Tuple, Callable
+
+from PIL import Image
+
+from drlx.pipeline import Pipeline
+
+class DPOPipeline(Pipeline):
+    """
+    Pipeline for training with DPO. Returns prompts, chosen images, and rejected images
+    """
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    @abstractmethod
+    def __getitem__(self, index : int) -> Tuple[str, Image.Image, Image.Image]:
+        pass
+
+    def make_default_collate(self, prep : Callable):
+        def collate(batch : Iterable[Tuple[str, Image.Image, Image.Image]]):
+            prompts = [d[0] for d in batch]
+            chosen = [d[1] for d in batch]
+            rejected = [d[2] for d in batch]
+
+            return prep(prompts, chosen, rejected)
+
+        return collate
+
+
+
diff --git a/src/drlx/pipeline/pickapic_dpo.py b/src/drlx/pipeline/pickapic_dpo.py
@@ -0,0 +1,65 @@
+from datasets import load_dataset
+import io
+
+from drlx.pipeline.dpo_pipeline import DPOPipeline
+
+import torch
+from torchvision import transforms
+from torch.utils.data import Dataset, DataLoader
+from PIL import Image
+
+def convert_bytes_to_image(image_bytes, id):
+    try:
+        image = Image.open(io.BytesIO(image_bytes))
+        image = image.resize((512, 512))
+        return image
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+def create_train_dataset():
+    ds = load_dataset("yuvalkirstain/pickapic_v2",split='train')
+    ds = ds.filter(lambda example: example['has_label'] == True and example['label_0'] != 0.5)
+    return ds
+
+class Collator:
+    def __call__(self, batch):
+        # Batch is list of rows which are dicts
+        image_0_bytes = [b['jpg_0'] for b in batch]
+        image_1_bytes = [b['jpg_1'] for b in batch]
+        uid_0 = [b['image_0_uid'] for b in batch]
+        uid_1 = [b['image_1_uid'] for b in batch]
+
+        label_0s = [b['label_0'] for b in batch]
+
+        for i in range(len(batch)):
+            if not label_0s[i]: # label_1 is 1 => jpg_1 is the chosen one
+                image_0_bytes[i], image_1_bytes[i] = image_1_bytes[i], image_0_bytes[i]
+                # Swap so image_0 is always the chosen one
+
+        prompts = [b['caption'] for b in batch]
+
+        images_0 = [convert_bytes_to_image(i, id) for (i, id) in zip(image_0_bytes, uid_0)]
+        images_1 = [convert_bytes_to_image(i, id) for (i, id) in zip(image_1_bytes, uid_1)]
+
+        images_0 = torch.stack([transforms.ToTensor()(image) for image in images_0])
+        images_0 = images_0 * 2 - 1
+
+        images_1 = torch.stack([transforms.ToTensor()(image) for image in images_1])
+        images_1 = images_1 * 2 - 1
+
+        return {
+            "chosen_pixel_values" : images_0,
+            "rejected_pixel_values" : images_1,
+            "prompts" : prompts
+        }
+
+class PickAPicDPOPipeline(DPOPipeline):
+    """
+    Pipeline for training LDM with DPO
+    """
+    def __init__(self):
+        self.train_ds = create_train_dataset()
+        self.dc = Collator()
+
+    def create_loader(self, **kwargs):
+        return DataLoader(self.train_ds, collate_fn = self.dc, **kwargs)