Skip to content

Commit 870751f

Browse files
Fix for MVAPICH-PLUS support for MCR-DL (#14)
* Fix for MVAPICH-PLUS Signed-off-by: Radha Guhane <[email protected]> * Rename config to build config for clarity * Changes required for config.yml-> build_config.yml , Revert testing changes Signed-off-by: Radha Guhane <[email protected]> * Remove build_config.yml cached file Signed-off-by: Radha Guhane <[email protected]> --------- Signed-off-by: Radha Guhane <[email protected]> Co-authored-by: Quentin Anthony <[email protected]>
1 parent dbb4acf commit 870751f

File tree

5 files changed

+6
-18
lines changed

5 files changed

+6
-18
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
__pycache__
22
mcr_dl/git_version_info_installed.py
33
mcr_dl.egg-info/
4-
mcr_dl/config.yml
4+
mcr_dl/build_config.yml

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ python setup.py install
3030
```
3131

3232
### Update Configurations
33-
Update mpi, cuda, and nccl paths appropriately in [mcr_dl/config.yml](/mcr_dl/config.yml)
33+
Update mpi, cuda, and nccl paths appropriately in [mcr_dl/config.yml](/mcr_dl/build_config.yml)
3434

3535
### The MCR-DL Communication Benchmarking Suite
3636

mcr_dl/config.yml

-11
This file was deleted.

mcr_dl/ops/op_builder/config.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222

2323
class ConfigPath():
2424
def __init__(self, file_path = None):
25-
self.file_path = os.path.join(os.path.dirname(mcr_dl.__file__), "config.yml") if file_path is None else file_path
26-
print(self.file_path)
25+
self.file_path = os.path.join(os.path.dirname(mcr_dl.__file__), "build_config.yml") if file_path is None else file_path
2726
self.config_data = self.load_config()
2827
self.mpi_path = self.config_data.get("mpi", {}).get("path")
2928
self.mpi_include = self.config_data.get("mpi", {}).get("include")

mcr_dl/utils/dist.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,12 @@ def set_mpi_dist_environemnt(master_addr = None):
156156
if master_addr is not None:
157157
os.environ['MASTER_ADDR'] = master_addr
158158
local_rank = env2int(
159-
['LOCAL_RANK', 'MPI_LOCALRANKID', 'OMPI_COMM_WORLD_LOCAL_RANK', 'MV2_COMM_WORLD_LOCAL_RANK', 'SLURM_LOCALID'])
159+
['LOCAL_RANK', 'MPI_LOCALRANKID', 'OMPI_COMM_WORLD_LOCAL_RANK', 'MV2_COMM_WORLD_LOCAL_RANK', 'SLURM_LOCALID', 'MVP_COMM_WORLD_LOCAL_RANK'])
160160
if 'LOCAL_RANK' not in os.environ:
161161
os.environ['LOCAL_RANK'] = str(local_rank)
162-
rank = env2int(['RANK', 'MPI_RANKID', 'OMPI_COMM_WORLD_RANK', 'MV2_COMM_WORLD_RANK', 'SLURM_PROCID'])
162+
rank = env2int(['RANK', 'MPI_RANKID', 'OMPI_COMM_WORLD_RANK', 'MV2_COMM_WORLD_RANK', 'SLURM_PROCID', 'MVP_COMM_WORLD_LOCAL_RANK'])
163163
if 'RANK' not in os.environ:
164164
os.environ['RANK'] = str(rank)
165-
world_size = env2int(['WORLD_SIZE', 'OMPI_COMM_WORLD_SIZE', 'MV2_COMM_WORLD_SIZE', 'SLURM_NPROCS'])
165+
world_size = env2int(['WORLD_SIZE', 'OMPI_COMM_WORLD_SIZE', 'MV2_COMM_WORLD_SIZE', 'SLURM_NPROCS', 'MVP_COMM_WORLD_LOCAL_RANK'])
166166
if 'WORLD_SIZE' not in os.environ:
167167
os.environ['WORLD_SIZE'] = str(world_size)

0 commit comments

Comments
 (0)