diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index 91006258efa..56fb754b9f1 100755 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -310,7 +310,7 @@ def __init__( if os.environ.get("ACCELERATE_USE_FSDP", "false") == "true" or isinstance( fsdp_plugin, FullyShardedDataParallelPlugin ): - if is_torch_version("<", FSDP_PYTORCH_VERSION): + if not is_torch_version(">=", FSDP_PYTORCH_VERSION): raise ValueError(f"FSDP requires PyTorch >= {FSDP_PYTORCH_VERSION}") if fsdp_plugin is None: # init from env variables diff --git a/src/accelerate/utils/constants.py b/src/accelerate/utils/constants.py index 11485252095..a5f33279a41 100644 --- a/src/accelerate/utils/constants.py +++ b/src/accelerate/utils/constants.py @@ -37,7 +37,9 @@ FSDP_AUTO_WRAP_POLICY = ["TRANSFORMER_BASED_WRAP", "SIZE_BASED_WRAP", "NO_WRAP"] FSDP_BACKWARD_PREFETCH = ["BACKWARD_PRE", "BACKWARD_POST", "NO_PREFETCH"] FSDP_STATE_DICT_TYPE = ["FULL_STATE_DICT", "LOCAL_STATE_DICT", "SHARDED_STATE_DICT"] -FSDP_PYTORCH_VERSION = "2.1.0" +FSDP_PYTORCH_VERSION = ( + "2.1.0.a0+32f93b1" # Technically should be 2.1.0, but MS-AMP uses this specific prerelease in their Docker image. +) FSDP_MODEL_NAME = "pytorch_model_fsdp" DEEPSPEED_MULTINODE_LAUNCHERS = ["pdsh", "standard", "openmpi", "mvapich", "mpich"] TORCH_DYNAMO_MODES = ["default", "reduce-overhead", "max-autotune"]