easydiffusion
diff --git a/‎sdkit/__init__.py‎
Lines changed: 28 additions & 2 deletions b/‎sdkit/__init__.py‎
Lines changed: 28 additions & 2 deletions
diff --git a/‎sdkit/filter/codeformer/__init__.py‎
Lines changed: 5 additions & 4 deletions b/‎sdkit/filter/codeformer/__init__.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎sdkit/filter/gfpgan.py‎
Lines changed: 1 addition & 1 deletion b/‎sdkit/filter/gfpgan.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdkit/generate/image_generator.py‎
Lines changed: 7 additions & 10 deletions b/‎sdkit/generate/image_generator.py‎
Lines changed: 7 additions & 10 deletions
diff --git a/‎sdkit/generate/sampler/sampler_main.py‎
Lines changed: 1 addition & 1 deletion b/‎sdkit/generate/sampler/sampler_main.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdkit/models/model_loader/codeformer/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎sdkit/models/model_loader/codeformer/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdkit/models/model_loader/controlnet.py‎
Lines changed: 4 additions & 4 deletions b/‎sdkit/models/model_loader/controlnet.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sdkit/models/model_loader/controlnet_filters.py‎
Lines changed: 1 addition & 1 deletion b/‎sdkit/models/model_loader/controlnet_filters.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdkit/models/model_loader/embeddings.py‎
Lines changed: 1 addition & 1 deletion b/‎sdkit/models/model_loader/embeddings.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdkit/models/model_loader/gfpgan.py‎
Lines changed: 2 additions & 2 deletions b/‎sdkit/models/model_loader/gfpgan.py‎
Lines changed: 2 additions & 2 deletions
@@ -1,9 +1,22 @@
+import sys
 from threading import local
 
+if sys.version_info < (3, 9):
+    # polyfill for callable static methods. required for pytorch-directml
+    class CallableStaticMethod(staticmethod):
+        def __call__(self, *args, **kwargs):
+            return self.__func__(*args, **kwargs)
+
+    # Patch the built-in staticmethod with CallableStaticMethod
+    import builtins
+
+    builtins.staticmethod = CallableStaticMethod
+
 
 class Context(local):
     def __init__(self) -> None:
-        self._device: str = "cuda:0"
+        self._device: str = ""
+        self._torch_device = None
         self._half_precision: bool = True
         self._vram_usage_level = None
 
@@ -45,6 +58,10 @@ def __init__(self) -> None:
         https://github.com/sczhou/CodeFormer/blob/master/LICENSE
         """
 
+        from sdkit.utils import get_torch_platform
+
+        self.device = get_torch_platform()[0]
+
     # hacky approach, but we need to enforce full precision for some devices
     # we also need to force full precision for these devices (haven't implemented this yet):
     # (('nvidia' in device_name or 'geforce' in device_name) and (' 1660' in device_name or ' 1650' in device_name)) or ('Quadro T2000' in device_name)
@@ -55,12 +72,21 @@ def device(self):
     @device.setter
     def device(self, d):
         self._device = d
-        if "cuda" not in d:
+
+        from sdkit.utils import get_device
+
+        if d.split(":")[0] in ("cpu", "mps"):
             from sdkit.utils import log
 
             log.info(f"forcing full precision for device: {d}")
             self._half_precision = False
 
+        self._torch_device = get_device(d)
+
+    @property
+    def torch_device(self):
+        return self._torch_device
+
     @property
     def half_precision(self):
         return self._half_precision
 
@@ -3,6 +3,7 @@
 
 from sdkit import Context
 from sdkit.models import load_model, unload_model
+from sdkit.utils import empty_cache
 
 from torchvision.transforms.functional import normalize
 from threading import Lock
@@ -16,7 +17,7 @@
 
 
 def inference(context: Context, image, upscale_bg, upscale_faces, upscale_factor, codeformer_fidelity, codeformer_net):
-    device = torch.device(context.device)
+    device = context.torch_device
     face_helper = FaceRestoreHelper(upscale_factor=upscale_factor, use_parse=True, device=device)
     face_helper.clean_all()
     face_helper.read_image(image)
@@ -37,7 +38,7 @@ def inference(context: Context, image, upscale_bg, upscale_faces, upscale_factor
                 output = codeformer_net(cropped_face_t, w=codeformer_fidelity, adain=True)[0]
                 restored_face = tensor2img(output.squeeze(0), rgb2bgr=True, min_max=(-1, 1))
             del output
-            torch.cuda.empty_cache()
+            empty_cache()
         except RuntimeError as error:
             print(f"Failed inference for CodeFormer: {error}")
             restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
@@ -72,7 +73,7 @@ def apply(
     if (upscale_background or upscale_faces) and "realesrgan" not in context.models:
         raise Exception("realesrgan not loaded in context.models! Required for upscaling in CodeFormer.")
 
-    device = torch.device(context.device)
+    device = context.torch_device
     codeformer_net = context.models["codeformer"]
 
     # Convert PIL Image to numpy array and ensure it's in BGR format for OpenCV
@@ -84,7 +85,7 @@ def apply(
         # hack for a bug in facexlib: https://github.com/xinntao/facexlib/pull/19/files
         from facexlib.detection import retinaface
 
-        retinaface.device = torch.device(context.device)
+        retinaface.device = context.torch_device
 
         result = inference(
             context, input_img, upscale_background, upscale_faces, upscale_factor, codeformer_fidelity, codeformer_net
 
@@ -15,7 +15,7 @@ def apply(context: Context, image, **kwargs):
         # hack for a bug in facexlib: https://github.com/xinntao/facexlib/pull/19/files
         from facexlib.detection import retinaface
 
-        retinaface.device = torch.device(context.device)
+        retinaface.device = context.torch_device
 
         image = image.convert("RGB")
         image = np.array(image, dtype=np.uint8)[..., ::-1]
 
@@ -57,9 +57,6 @@ def generate_images(
     try:
         images = []
 
-        seed_everything(seed)
-        precision_scope = torch.autocast if context.half_precision else nullcontext
-
         if "stable-diffusion" not in context.models:
             raise RuntimeError(
                 "The model for Stable Diffusion has not been loaded yet! If you've tried to load it, please check the logs above this message for errors (while loading the model)."
@@ -96,7 +93,10 @@ def generate_images(
         if "hypernetwork" in context.models:
             context.models["hypernetwork"]["hypernetwork_strength"] = hypernetwork_strength
 
-        with precision_scope("cuda"):
+        seed_everything(seed)
+        precision_scope = torch.autocast if context.half_precision else nullcontext
+
+        with precision_scope(context.torch_device.type):
             cond, uncond = get_cond_and_uncond(prompt, negative_prompt, num_outputs, model)
 
         generate_fn = txt2img if init_image is None else img2img
@@ -113,7 +113,7 @@ def generate_images(
             "callback": callback,
         }
 
-        with torch.no_grad(), precision_scope("cuda"):
+        with torch.no_grad(), precision_scope(context.torch_device.type):
             for _ in trange(1, desc="Sampling"):
                 images += generate_fn(common_sampler_params.copy(), **req_args)
                 gc(context)
@@ -229,10 +229,7 @@ def make_with_diffusers(
 
     model = context.models["stable-diffusion"]
     default_pipe = model["default"]
-    if context.device == "mps" and hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-        generator = torch.Generator().manual_seed(seed)
-    else:
-        generator = torch.Generator(context.device).manual_seed(seed)
+    generator = torch.Generator(context.torch_device).manual_seed(seed)
 
     is_sd_xl = isinstance(
         default_pipe,
@@ -462,7 +459,7 @@ def lora_conv_forward(self, hidden_states, scale=1.0):
     if hasattr(operation_to_apply.unet, "_allocate_trt_buffers"):
         dtype = torch.float16 if context.half_precision else torch.float32
         operation_to_apply.unet._allocate_trt_buffers(
-            operation_to_apply, context.device, dtype, num_outputs, width, height
+            operation_to_apply, context.torch_device, dtype, num_outputs, width, height
         )
 
     # apply
 
@@ -50,7 +50,7 @@ def make_samples(
     if sampler_module is None:
         raise RuntimeError(f'Unknown sampler "{sampler_name}"!')
 
-    noise = make_some_noise(seed, batch_size, shape, context.device)
+    noise = make_some_noise(seed, batch_size, shape, context.torch_device)
 
     return sampler_module.sample(
         context, sampler_name, noise, batch_size, shape, steps, cond, uncond, guidance_scale, callback, **kwargs
 
@@ -26,7 +26,7 @@ def load_model(context: Context, **kwargs):
     sd = sd["params_ema"]
 
     model = CodeFormer(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=["32", "64", "128", "256"])
-    model = model.to(context.device)
+    model = model.to(context.torch_device)
 
     model.load_state_dict(sd)
     model.eval()
 
@@ -18,7 +18,7 @@ def load_controlnet(context, controlnet_path):
     import torch
     from sdkit.models import get_model_info_from_db
     from sdkit.models import models_db
-    from sdkit.utils import load_tensor_file
+    from sdkit.utils import load_tensor_file, is_cpu_device
 
     from accelerate import cpu_offload
 
@@ -76,13 +76,13 @@ def load_controlnet(context, controlnet_path):
 
     # memory optimizations
 
-    if context.vram_usage_level == "low" and "cuda" in context.device:
+    if context.vram_usage_level == "low" and not is_cpu_device(context.torch_device):
         controlnet = controlnet.to("cpu", torch.float16 if context.half_precision else torch.float32)
 
         offload_buffers = len(controlnet._parameters) > 0
-        cpu_offload(controlnet, context.device, offload_buffers=offload_buffers)
+        cpu_offload(controlnet, context.torch_device, offload_buffers=offload_buffers)
     else:
-        controlnet = controlnet.to(context.device, torch.float16 if context.half_precision else torch.float32)
+        controlnet = controlnet.to(context.torch_device, torch.float16 if context.half_precision else torch.float32)
 
     controlnet.set_attention_slice(1)
 
 
@@ -41,7 +41,7 @@ def load_model(context: Context, **kwargs):
             model = Processor(model_type)
 
             if hasattr(model.processor, "to"):
-                model.processor = model.processor.to(context.device)
+                model.processor = model.processor.to(context.torch_device)
 
         return model
 
 
@@ -105,7 +105,7 @@ def attach_hooks(context, components):
     from accelerate import cpu_offload
 
     for _, te in components:
-        cpu_offload(te, context.device, offload_buffers=len(te._parameters) > 0)
+        cpu_offload(te, context.torch_device, offload_buffers=len(te._parameters) > 0)
 
 
 def get_embedding(embedding):
 
@@ -30,10 +30,10 @@ def load_model(context: Context, **kwargs):
         # hack for a bug in facexlib: https://github.com/xinntao/facexlib/pull/19/files
         from facexlib.detection import retinaface
 
-        retinaface.device = torch.device(context.device)
+        retinaface.device = context.torch_device
 
         return GFPGANer(
-            device=torch.device(context.device),
+            device=context.torch_device,
             model_path=model_path,
             upscale=1,
             arch="clean",