From fc17ada6040c1b3da3969a64c916f940ce9d443d Mon Sep 17 00:00:00 2001 From: haileyschoelkopf Date: Mon, 30 Oct 2023 16:13:32 +0000 Subject: [PATCH 1/4] edge-casing for multiGPU hf to sequential case --- tools/ckpts/convert_hf_to_sequential.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/ckpts/convert_hf_to_sequential.py b/tools/ckpts/convert_hf_to_sequential.py index 8a3902bce..53dea6e66 100644 --- a/tools/ckpts/convert_hf_to_sequential.py +++ b/tools/ckpts/convert_hf_to_sequential.py @@ -57,7 +57,7 @@ "--output-dir checkpoints/neox_converted/pythia/70m", "--cache-dir checkpoints/HF", "--config configs/pythia/70M.yml configs/local_setup.yml", - "--test", + "--test", ] ) @@ -519,7 +519,7 @@ def get_non_existing_dir(tmp_dir): model, optimizer, _, lr_scheduler = deepspeed.initialize( model=model, optimizer=optimizer, - args=neox_args, + # args=neox_args, lr_scheduler=lr_scheduler, dist_init_required=False, model_parameters=None, @@ -527,7 +527,7 @@ def get_non_existing_dir(tmp_dir): mpu=mpu if not neox_args.is_pipe_parallel else None, ) - if os.environ["OMPI_COMM_WORLD_RANK"] == "0": + if os.environ.get("OMPI_COMM_WORLD_RANK", "1") == "0": os.makedirs(f"{tmp_cache_dir}", exist_ok=True) torch.distributed.barrier() @@ -566,7 +566,7 @@ def get_non_existing_dir(tmp_dir): print("==========================================") convert(hf_model, ckpt_dir=ckpt_dir, output_dir=args.output_dir) - if os.environ["OMPI_COMM_WORLD_RANK"] == "0": + if os.environ.get("OMPI_COMM_WORLD_RANK", "1") == "0": # cleanup temp dir os.system(f"rm -r {tmp_cache_dir}") From 4579a33fc58b13a096f8b288f852b0c8beb67e12 Mon Sep 17 00:00:00 2001 From: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Date: Mon, 30 Oct 2023 12:16:10 -0400 Subject: [PATCH 2/4] cleanup whitespace --- tools/ckpts/convert_hf_to_sequential.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ckpts/convert_hf_to_sequential.py b/tools/ckpts/convert_hf_to_sequential.py index 53dea6e66..be445ec72 100644 --- a/tools/ckpts/convert_hf_to_sequential.py +++ b/tools/ckpts/convert_hf_to_sequential.py @@ -57,7 +57,7 @@ "--output-dir checkpoints/neox_converted/pythia/70m", "--cache-dir checkpoints/HF", "--config configs/pythia/70M.yml configs/local_setup.yml", - "--test", + "--test", ] ) From 71ca6cf6bf3059355841d93f81fe2e9bd5ef9576 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 30 Oct 2023 16:16:25 +0000 Subject: [PATCH 3/4] Update NeoXArgs docs automatically --- configs/neox_arguments.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index 1f3511456..43f9c08d8 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = a97bd1f + Default = 4579a33 current git hash of repository @@ -605,7 +605,7 @@ Optimizer Arguments Default = adam - Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd', 'lion'] + Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd'] NOTE: sgd will use MuSGD from Mup. Mup must be enabled for this optimizer. From 6cb40776191fc6b81c93015fae9f5d5e2514e420 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 31 Oct 2023 01:51:09 +0000 Subject: [PATCH 4/4] Update NeoXArgs docs automatically --- configs/neox_arguments.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index b0de4b637..2b23f2207 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = 2ff807d + Default = 7c50e77 current git hash of repository @@ -605,7 +605,7 @@ Optimizer Arguments Default = adam - Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd'] + Type of optimizer to use. Choose from ['adam', 'onebitadam', 'cpu_adam', 'cpu_torch_adam', 'sm3', 'madgrad_wd', 'sgd', 'lion'] NOTE: sgd will use MuSGD from Mup. Mup must be enabled for this optimizer.