We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9c3a460 commit fff53baCopy full SHA for fff53ba
QEfficient/cloud/finetune.py
@@ -96,8 +96,8 @@ def setup_distributed_training(train_config: TrainConfig) -> None:
96
97
dist.init_process_group(backend=train_config.dist_backend)
98
if train_config.enable_pp:
99
- assert dist.get_world_size() % train_config.num_pp_stages == 0, (
100
- "total available devices should be multiple of number of pipeline stages"
+ assert dist.get_world_size() * train_config.num_pp_stages == getattr(torch, torch_device.type).device_count(), (
+ "Total available devices should be multiple of number of pipeline stages."
101
)
102
else:
103
# from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
0 commit comments