Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[tests] make tests device-agnostic (part 4) #10508

Merged
merged 48 commits into from
Mar 4, 2025
Merged
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
8d0f387
initial comit
faaany Jan 3, 2025
88919c0
fix empty cache
faaany Jan 3, 2025
e32a9ac
fix one more
faaany Jan 3, 2025
cb7d9d5
fix style
faaany Jan 3, 2025
a393860
update device functions
faaany Jan 6, 2025
2f3ad32
update
faaany Jan 6, 2025
f3a519f
update
faaany Jan 6, 2025
b402629
Merge branch 'main' into xpu-enabling
faaany Jan 6, 2025
d1532d2
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
16cca22
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
3420e1f
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
d15618b
Update tests/pipelines/controlnet/test_controlnet.py
faaany Jan 7, 2025
e814635
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
e799516
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
d3e8678
Update tests/pipelines/controlnet/test_controlnet.py
faaany Jan 7, 2025
fed282b
with gc.collect
faaany Jan 7, 2025
8577a14
update
faaany Jan 7, 2025
f08a849
Merge branch 'huggingface:main' into xpu-enabling
faaany Jan 7, 2025
35d7a7a
make style
hlky Jan 7, 2025
736cc7c
Merge branch 'main' into xpu-enabling
hlky Jan 7, 2025
c8661f0
check_torch_dependencies
hlky Jan 7, 2025
d4266a7
Merge branch 'main' into xpu-enabling
faaany Jan 8, 2025
d820f75
add mps empty cache
faaany Jan 8, 2025
6ed4523
add changes
faaany Jan 9, 2025
b813f16
bug fix
faaany Jan 9, 2025
c091bcc
Merge branch 'xpu-enabling' into xpu-enabling2
faaany Jan 9, 2025
f6ae056
enable on xpu
faaany Jan 9, 2025
013f555
Merge branch 'main' into xpu-enabling2
faaany Jan 21, 2025
bb49cab
update more cases
faaany Jan 22, 2025
ae92d53
Merge branch 'main' into xpu-enabling2
faaany Jan 22, 2025
1d1c13d
revert
faaany Jan 22, 2025
5226094
revert back
faaany Jan 22, 2025
6238972
Merge branch 'main' into xpu-enabling2
faaany Feb 5, 2025
b07c6a4
Merge branch 'main' into xpu-enabling2
hlky Feb 7, 2025
5efca96
Merge branch 'main' into xpu-enabling2
faaany Feb 11, 2025
58a3cb0
Merge branch 'main' into xpu-enabling2
faaany Feb 25, 2025
faa1615
Update test_stable_diffusion_xl.py
hlky Feb 27, 2025
7fcb0da
Merge branch 'main' into xpu-enabling2
hlky Feb 27, 2025
fc57898
Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
faaany Feb 27, 2025
55f9658
Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
faaany Feb 27, 2025
d647900
Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
faaany Feb 27, 2025
cfbf601
Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
faaany Feb 27, 2025
88263e8
Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
faaany Feb 27, 2025
2e181a3
Apply suggestions from code review
faaany Feb 27, 2025
486f7cc
add test marker
faaany Feb 27, 2025
74c53c8
Merge branch 'main' into xpu-enabling2
faaany Feb 28, 2025
2d79c4e
Merge branch 'main' into xpu-enabling2
hlky Mar 3, 2025
3873465
Merge branch 'main' into xpu-enabling2
faaany Mar 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions tests/lora/test_lora_layers_sd.py
Original file line number Diff line number Diff line change
@@ -33,11 +33,12 @@
)
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
load_image,
nightly,
numpy_cosine_similarity_distance,
require_peft_backend,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -101,7 +102,7 @@ def tearDown(self):
# Keeping this test here makes sense because it doesn't look any integration
# (value assertions on logits).
@slow
@require_torch_gpu
@require_torch_accelerator
def test_integration_move_lora_cpu(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
@@ -158,7 +159,7 @@ def test_integration_move_lora_cpu(self):
self.assertTrue(m.weight.device != torch.device("cpu"))

@slow
@require_torch_gpu
@require_torch_accelerator
def test_integration_move_lora_dora_cpu(self):
from peft import LoraConfig

@@ -209,18 +210,18 @@ def test_integration_move_lora_dora_cpu(self):

@slow
@nightly
@require_torch_gpu
@require_torch_accelerator
@require_peft_backend
class LoraIntegrationTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_integration_logits_with_scale(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
@@ -378,7 +379,7 @@ def test_a1111_with_model_cpu_offload(self):
generator = torch.Generator().manual_seed(0)

pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -400,7 +401,7 @@ def test_a1111_with_sequential_cpu_offload(self):
generator = torch.Generator().manual_seed(0)

pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
@@ -656,7 +657,7 @@ def test_sd_load_civitai_empty_network_alpha(self):
See: https://github.com/huggingface/diffusers/issues/5606
"""
pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipeline.enable_sequential_cpu_offload()
pipeline.enable_sequential_cpu_offload(device=torch_device)
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")

11 changes: 6 additions & 5 deletions tests/lora/test_lora_layers_sd3.py
Original file line number Diff line number Diff line change
@@ -30,12 +30,13 @@
from diffusers.utils import load_image
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
is_flaky,
nightly,
numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda,
require_peft_backend,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)

@@ -93,7 +94,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
def output_shape(self):
return (1, 32, 32, 3)

@require_torch_gpu
@require_torch_accelerator
def test_sd3_lora(self):
"""
Test loading the loras that are saved with the diffusers and peft formats.
@@ -135,7 +136,7 @@ def test_multiple_wrong_adapter_name_raises_error(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
@require_peft_backend
@require_big_gpu_with_torch_cuda
@pytest.mark.big_gpu_with_torch_cuda
@@ -146,12 +147,12 @@ class SD3LoraIntegrationTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, seed=0):
init_image = load_image(
55 changes: 29 additions & 26 deletions tests/models/unets/test_models_unet_2d_condition.py
Original file line number Diff line number Diff line change
@@ -36,6 +36,9 @@
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
floats_tensor,
is_peft_available,
@@ -1002,7 +1005,7 @@ def test_load_sharded_checkpoint_from_hub_subfolder(self, repo_id, variant):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
@@ -1013,7 +1016,7 @@ def test_load_sharded_checkpoint_from_hub_local(self):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
@@ -1024,7 +1027,7 @@ def test_load_sharded_checkpoint_from_hub_local_subfolder(self):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
@parameterized.expand(
[
("hf-internal-testing/unet2d-sharded-dummy", None),
@@ -1039,7 +1042,7 @@ def test_load_sharded_checkpoint_device_map_from_hub(self, repo_id, variant):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
@parameterized.expand(
[
("hf-internal-testing/unet2d-sharded-dummy-subfolder", None),
@@ -1054,7 +1057,7 @@ def test_load_sharded_checkpoint_device_map_from_hub_subfolder(self, repo_id, va
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
@@ -1064,7 +1067,7 @@ def test_load_sharded_checkpoint_device_map_from_hub_local(self):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
@@ -1164,11 +1167,11 @@ def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"):

return model

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_auto(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

unet = self.get_unet_model()
unet.set_attention_slice("auto")
@@ -1180,15 +1183,15 @@ def test_set_attention_slice_auto(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_max(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

unet = self.get_unet_model()
unet.set_attention_slice("max")
@@ -1200,15 +1203,15 @@ def test_set_attention_slice_max(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_int(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

unet = self.get_unet_model()
unet.set_attention_slice(2)
@@ -1220,15 +1223,15 @@ def test_set_attention_slice_int(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_list(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

# there are 32 sliceable layers
slice_list = 16 * [2, 3]
@@ -1242,7 +1245,7 @@ def test_set_attention_slice_list(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

2 changes: 1 addition & 1 deletion tests/pipelines/controlnet/test_controlnet.py
Original file line number Diff line number Diff line change
@@ -79,7 +79,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.to("cuda")
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)

pipe.unet.to(memory_format=torch.channels_last)
8 changes: 4 additions & 4 deletions tests/pipelines/controlnet/test_controlnet_inpaint_sdxl.py
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)

@@ -245,7 +245,7 @@ def test_xformers_attention_forwardGenerator_pass(self):
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=2e-3)

@require_torch_gpu
@require_torch_accelerator
def test_stable_diffusion_xl_offloads(self):
pipes = []
components = self.get_dummy_components()
@@ -254,12 +254,12 @@ def test_stable_diffusion_xl_offloads(self):

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

image_slices = []
4 changes: 2 additions & 2 deletions tests/pipelines/controlnet/test_controlnet_sdxl.py
Original file line number Diff line number Diff line change
@@ -223,12 +223,12 @@ def test_stable_diffusion_xl_offloads(self):

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

image_slices = []
5 changes: 3 additions & 2 deletions tests/pipelines/controlnet_flux/test_controlnet_flux.py
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@
from diffusers.models import FluxControlNetModel
from diffusers.utils import load_image
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
numpy_cosine_similarity_distance,
@@ -217,12 +218,12 @@ class FluxControlNetPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_canny(self):
controlnet = FluxControlNetModel.from_pretrained(
2 changes: 1 addition & 1 deletion tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
Original file line number Diff line number Diff line change
@@ -239,7 +239,7 @@ def test_canny(self):
pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)

generator = torch.Generator(device="cpu").manual_seed(0)
5 changes: 3 additions & 2 deletions tests/pipelines/flux/test_pipeline_flux.py
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@

from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers.utils.testing_utils import (
backend_empty_cache,
nightly,
numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda,
@@ -212,12 +213,12 @@ class FluxPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, seed=0):
generator = torch.Generator(device="cpu").manual_seed(seed)
Loading