Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[tests] make tests device-agnostic (part 4) #10508

Merged
merged 48 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
8d0f387
initial comit
faaany Jan 3, 2025
88919c0
fix empty cache
faaany Jan 3, 2025
e32a9ac
fix one more
faaany Jan 3, 2025
cb7d9d5
fix style
faaany Jan 3, 2025
a393860
update device functions
faaany Jan 6, 2025
2f3ad32
update
faaany Jan 6, 2025
f3a519f
update
faaany Jan 6, 2025
b402629
Merge branch 'main' into xpu-enabling
faaany Jan 6, 2025
d1532d2
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
16cca22
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
3420e1f
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
d15618b
Update tests/pipelines/controlnet/test_controlnet.py
faaany Jan 7, 2025
e814635
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
e799516
Update src/diffusers/utils/testing_utils.py
faaany Jan 7, 2025
d3e8678
Update tests/pipelines/controlnet/test_controlnet.py
faaany Jan 7, 2025
fed282b
with gc.collect
faaany Jan 7, 2025
8577a14
update
faaany Jan 7, 2025
f08a849
Merge branch 'huggingface:main' into xpu-enabling
faaany Jan 7, 2025
35d7a7a
make style
hlky Jan 7, 2025
736cc7c
Merge branch 'main' into xpu-enabling
hlky Jan 7, 2025
c8661f0
check_torch_dependencies
hlky Jan 7, 2025
d4266a7
Merge branch 'main' into xpu-enabling
faaany Jan 8, 2025
d820f75
add mps empty cache
faaany Jan 8, 2025
6ed4523
add changes
faaany Jan 9, 2025
b813f16
bug fix
faaany Jan 9, 2025
c091bcc
Merge branch 'xpu-enabling' into xpu-enabling2
faaany Jan 9, 2025
f6ae056
enable on xpu
faaany Jan 9, 2025
013f555
Merge branch 'main' into xpu-enabling2
faaany Jan 21, 2025
bb49cab
update more cases
faaany Jan 22, 2025
ae92d53
Merge branch 'main' into xpu-enabling2
faaany Jan 22, 2025
1d1c13d
revert
faaany Jan 22, 2025
5226094
revert back
faaany Jan 22, 2025
6238972
Merge branch 'main' into xpu-enabling2
faaany Feb 5, 2025
b07c6a4
Merge branch 'main' into xpu-enabling2
hlky Feb 7, 2025
5efca96
Merge branch 'main' into xpu-enabling2
faaany Feb 11, 2025
58a3cb0
Merge branch 'main' into xpu-enabling2
faaany Feb 25, 2025
faa1615
Update test_stable_diffusion_xl.py
hlky Feb 27, 2025
7fcb0da
Merge branch 'main' into xpu-enabling2
hlky Feb 27, 2025
fc57898
Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
faaany Feb 27, 2025
55f9658
Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
faaany Feb 27, 2025
d647900
Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
faaany Feb 27, 2025
cfbf601
Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
faaany Feb 27, 2025
88263e8
Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
faaany Feb 27, 2025
2e181a3
Apply suggestions from code review
faaany Feb 27, 2025
486f7cc
add test marker
faaany Feb 27, 2025
74c53c8
Merge branch 'main' into xpu-enabling2
faaany Feb 28, 2025
2d79c4e
Merge branch 'main' into xpu-enabling2
hlky Mar 3, 2025
3873465
Merge branch 'main' into xpu-enabling2
faaany Mar 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions tests/lora/test_lora_layers_sd.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@
)
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
load_image,
nightly,
numpy_cosine_similarity_distance,
require_peft_backend,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
Expand Down Expand Up @@ -101,7 +102,7 @@ def tearDown(self):
# Keeping this test here makes sense because it doesn't look any integration
# (value assertions on logits).
@slow
@require_torch_gpu
@require_torch_accelerator
def test_integration_move_lora_cpu(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
Expand Down Expand Up @@ -158,7 +159,7 @@ def test_integration_move_lora_cpu(self):
self.assertTrue(m.weight.device != torch.device("cpu"))

@slow
@require_torch_gpu
@require_torch_accelerator
def test_integration_move_lora_dora_cpu(self):
from peft import LoraConfig

Expand Down Expand Up @@ -209,18 +210,18 @@ def test_integration_move_lora_dora_cpu(self):

@slow
@nightly
@require_torch_gpu
@require_torch_accelerator
@require_peft_backend
class LoraIntegrationTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_integration_logits_with_scale(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
Expand Down Expand Up @@ -378,7 +379,7 @@ def test_a1111_with_model_cpu_offload(self):
generator = torch.Generator().manual_seed(0)

pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
Expand All @@ -400,7 +401,7 @@ def test_a1111_with_sequential_cpu_offload(self):
generator = torch.Generator().manual_seed(0)

pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
Expand Down Expand Up @@ -656,7 +657,7 @@ def test_sd_load_civitai_empty_network_alpha(self):
See: https://github.com/huggingface/diffusers/issues/5606
"""
pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipeline.enable_sequential_cpu_offload()
pipeline.enable_sequential_cpu_offload(device=torch_device)
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")

Expand Down
11 changes: 6 additions & 5 deletions tests/lora/test_lora_layers_sd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@
from diffusers.utils import load_image
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
is_flaky,
nightly,
numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda,
require_peft_backend,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)

Expand Down Expand Up @@ -93,7 +94,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
def output_shape(self):
return (1, 32, 32, 3)

@require_torch_gpu
@require_torch_accelerator
def test_sd3_lora(self):
"""
Test loading the loras that are saved with the diffusers and peft formats.
Expand Down Expand Up @@ -135,7 +136,7 @@ def test_multiple_wrong_adapter_name_raises_error(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
@require_peft_backend
@require_big_gpu_with_torch_cuda
@pytest.mark.big_gpu_with_torch_cuda
Expand All @@ -146,12 +147,12 @@ class SD3LoraIntegrationTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, seed=0):
init_image = load_image(
Expand Down
55 changes: 29 additions & 26 deletions tests/models/unets/test_models_unet_2d_condition.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
floats_tensor,
is_peft_available,
Expand Down Expand Up @@ -1002,7 +1005,7 @@ def test_load_sharded_checkpoint_from_hub_subfolder(self, repo_id, variant):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
Expand All @@ -1013,7 +1016,7 @@ def test_load_sharded_checkpoint_from_hub_local(self):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
Expand All @@ -1024,7 +1027,7 @@ def test_load_sharded_checkpoint_from_hub_local_subfolder(self):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
@parameterized.expand(
[
("hf-internal-testing/unet2d-sharded-dummy", None),
Expand All @@ -1039,7 +1042,7 @@ def test_load_sharded_checkpoint_device_map_from_hub(self, repo_id, variant):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
@parameterized.expand(
[
("hf-internal-testing/unet2d-sharded-dummy-subfolder", None),
Expand All @@ -1054,7 +1057,7 @@ def test_load_sharded_checkpoint_device_map_from_hub_subfolder(self, repo_id, va
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
Expand All @@ -1064,7 +1067,7 @@ def test_load_sharded_checkpoint_device_map_from_hub_local(self):
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)

@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
Expand Down Expand Up @@ -1164,11 +1167,11 @@ def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"):

return model

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_auto(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

unet = self.get_unet_model()
unet.set_attention_slice("auto")
Expand All @@ -1180,15 +1183,15 @@ def test_set_attention_slice_auto(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_max(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

unet = self.get_unet_model()
unet.set_attention_slice("max")
Expand All @@ -1200,15 +1203,15 @@ def test_set_attention_slice_max(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_int(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

unet = self.get_unet_model()
unet.set_attention_slice(2)
Expand All @@ -1220,15 +1223,15 @@ def test_set_attention_slice_int(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_list(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

# there are 32 sliceable layers
slice_list = 16 * [2, 3]
Expand All @@ -1242,7 +1245,7 @@ def test_set_attention_slice_list(self):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)

assert mem_bytes < 5 * 10**9

Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/controlnet/test_controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.to("cuda")
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)

pipe.unet.to(memory_format=torch.channels_last)
Expand Down
8 changes: 4 additions & 4 deletions tests/pipelines/controlnet/test_controlnet_inpaint_sdxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)

Expand Down Expand Up @@ -245,7 +245,7 @@ def test_xformers_attention_forwardGenerator_pass(self):
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=2e-3)

@require_torch_gpu
@require_torch_accelerator
def test_stable_diffusion_xl_offloads(self):
pipes = []
components = self.get_dummy_components()
Expand All @@ -254,12 +254,12 @@ def test_stable_diffusion_xl_offloads(self):

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

image_slices = []
Expand Down
4 changes: 2 additions & 2 deletions tests/pipelines/controlnet/test_controlnet_sdxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,12 @@ def test_stable_diffusion_xl_offloads(self):

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)

image_slices = []
Expand Down
5 changes: 3 additions & 2 deletions tests/pipelines/controlnet_flux/test_controlnet_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from diffusers.models import FluxControlNetModel
from diffusers.utils import load_image
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
numpy_cosine_similarity_distance,
Expand Down Expand Up @@ -215,12 +216,12 @@ class FluxControlNetPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_canny(self):
controlnet = FluxControlNetModel.from_pretrained(
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/controlnet_sd3/test_controlnet_sd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def test_canny(self):
pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)

generator = torch.Generator(device="cpu").manual_seed(0)
Expand Down
5 changes: 3 additions & 2 deletions tests/pipelines/flux/test_pipeline_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers.utils.testing_utils import (
backend_empty_cache,
nightly,
numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda,
Expand Down Expand Up @@ -236,12 +237,12 @@ class FluxPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, seed=0):
generator = torch.Generator(device="cpu").manual_seed(seed)
Expand Down
Loading