Skip to content

Commit 983e275

Browse files
author
root
committed
Quentin - fix format issues in mpi run
1 parent 68e3da7 commit 983e275

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

deepspeed/launcher/multinode_runner.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ def get_cmd(self, environment, active_resources):
124124
btl_tcp_opt = []
125125
break
126126

127+
print("===================== MPI =====================")
128+
127129
# Custom from previous PI cluster
128130
mpirun_cmd = [
129131
'mpirun',
@@ -133,11 +135,17 @@ def get_cmd(self, environment, active_resources):
133135
'-hostfile',
134136
f'{self.args.hostfile}',
135137
'-mca',
136-
'btl tcp,self',
138+
'btl',
139+
'tcp,self',
140+
'-mca',
141+
'coll_hcoll_enable',
142+
'0',
137143
'-mca',
138-
'coll_hcoll_enable 0',
144+
'plm_rsh_agent',
145+
'ssh',
139146
'-mca',
140-
'plm_rsh_args "-p 2222"',
147+
'plm_rsh_args',
148+
'-p 2222',
141149
'-x',
142150
'PATH',
143151
'-x',

0 commit comments

Comments
 (0)