Skip to content

Commit

Permalink
Edge-casing for multi-GPU HF-to-NeoX conversion (#1065)
Browse files Browse the repository at this point in the history
* edge-casing for multiGPU hf to sequential case

* cleanup whitespace

* Update NeoXArgs docs automatically

* Update NeoXArgs docs automatically

---------

Co-authored-by: github-actions <[email protected]>
Co-authored-by: Quentin Anthony <[email protected]>
  • Loading branch information
3 people authored Nov 1, 2023
1 parent f574f22 commit fcc5af5
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion configs/neox_arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Logging Arguments

- **git_hash**: str

Default = 2ff807d
Default = 7c50e77

current git hash of repository

Expand Down
6 changes: 3 additions & 3 deletions tools/ckpts/convert_hf_to_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,15 +519,15 @@ def get_non_existing_dir(tmp_dir):
model, optimizer, _, lr_scheduler = deepspeed.initialize(
model=model,
optimizer=optimizer,
args=neox_args,
# args=neox_args,
lr_scheduler=lr_scheduler,
dist_init_required=False,
model_parameters=None,
config_params=neox_args.deepspeed_config,
mpu=mpu if not neox_args.is_pipe_parallel else None,
)

if os.environ["OMPI_COMM_WORLD_RANK"] == "0":
if os.environ.get("OMPI_COMM_WORLD_RANK", "1") == "0":
os.makedirs(f"{tmp_cache_dir}", exist_ok=True)

torch.distributed.barrier()
Expand Down Expand Up @@ -566,7 +566,7 @@ def get_non_existing_dir(tmp_dir):
print("==========================================")
convert(hf_model, ckpt_dir=ckpt_dir, output_dir=args.output_dir)

if os.environ["OMPI_COMM_WORLD_RANK"] == "0":
if os.environ.get("OMPI_COMM_WORLD_RANK", "1") == "0":
# cleanup temp dir
os.system(f"rm -r {tmp_cache_dir}")

Expand Down

0 comments on commit fcc5af5

Please sign in to comment.