forked from plasmasim/sceptic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsor2dmpi.f
328 lines (257 loc) · 9.54 KB
/
sor2dmpi.f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
c***************************************************
subroutine sorparinit(myid2,sor_comm)
c Creates a new communicator, sor_comm, who contains a subset of
c MPI_COMM_WORLD, because we don't need all the processes for the
c bloc SOR
c Not to be confused with mpicommcart, which is a reorganisation of
c sor_comm
c myid2 is the process id in the new communicator. If myid2 is negative,
c the process does not belong to the communicator
integer myid2,nprocsor
integer members(0:100)
integer group_world,sor_group,sor_comm
include 'piccom.f'
include 'mpif.h'
idim1=nint(sqrt(numprocs*NRUSED/NTHUSED*1.))
idim2=nint(numprocs/idim1*1.)
if((idim1.ge.numprocs)) then
idim1=numprocs
idim2=1
elseif((idim2.ge.numprocs)) then
idim2=numprocs
idim1=1
endif
nprocsor=idim1*idim2
do i=0,nprocsor-1
members(i)=i
enddo
call MPI_COMM_GROUP(MPI_COMM_WORLD,group_world,ierr)
call MPI_GROUP_INCL(group_world,nprocsor,members,sor_group,ierr)
call MPI_GROUP_RANK(sor_group,myid2,ierr)
call MPI_COMM_CREATE(MPI_COMM_WORLD,sor_group,sor_comm,ierr)
if (myid2.ge.0) then
call MPI_COMM_RANK(sor_comm,myid2,ierr)
endif
if (myid2.eq.0) then
c write(*,*) "sorp : ",idim1,idim2,nprocsor,numprocs
write(*,510) idim1,idim2,nprocsor
510 format('BlocX=',i2,' BlocY=',i2,' SorProc=',i2)
endif
end
c ------------------------------------------------------------------
c Solve an elliptical problem in 2d by simultaneous overrelaxation
c L(u) + f(u) = q(x,y),
c where L is a second order elliptical differential operator
c represented by a difference stencil of specified coefficients,
c f is some additional function, and q is the "charge density".
c Ian Hutchinson, January 2006.
c
subroutine sor2dmpi(sor_comm,Li,Lj,ni,nj,debyelen,bcphi,u,q,ictl,
$ ierr,mpiid,idim1,idim2,apc,bpc,cpc,dpc,fpc,gpc)
integer sor_comm,mpiid
c The number of dimensions, 2 here.
integer nd
parameter (nd=2,nd2=nd*2)
c Li leading dimension of i
c ni active dimension of i
c nj active dimension of j
integer Li,Lj,ni,nj
c cij coefficients of the finite difference scheme, in the order
c east,west,north,south [regarding i,j, as x,y].
c real cij(nd2,Li,nj)
c In this mpi routine we must use linear addressing for cij,u,q
c so that the pointers can be used for each block.
real apc(*),bpc(*),cpc(*),dpc(*),fpc(*)
real gpc(0:Lj-1,1:5)
c u potential to be solved for (initialized on entry).
c its boundaries are at 1,ni; 1,nj;
c real u(Li,nj)
real u(*)
c q "charge density" input
c real q(Li,nj)
real q(*)
c User supplied functions which should be declared external in the
c calling routine.
c mpiid Returns my MPI process number for this mpi version.
integer ictl,ierr
c idim1,2 The number of blocks in dimensions 1 and 2.
integer idim1,idim2
c Other things that we might want control over include the maximum
c number of iterations, the Jacobi radius, and the convergence size.
common /sor2dctl/isor_mi,sor_jac,sor_eps,sor_del,isor_k
real delta
logical lconverged
c Debyelength
real debyelen
c BC
integer bcphi
c lflag, decide if we call bbdy for the first time or not
logical lflag
include 'piccomsor.f'
c Whether the topology dimension is periodic or not
logical lperiod(ndims)
data lperiod/ndims*.false./
c The origin of blocks structure may be considered
c integer iorig(idim1+1,idim2+1)
c we declare it as a 1-d array. This is the first time here:
c It must be of dimension greater than the number of processes (blocks)
c parameter (norigmax=1000)
c integer iorig(norigmax)
c parameter (ndims=2)
c bbdydecl declares most things for bbdy, using parameter ndims.
c-------------------------------------------------------------------
save lflag
idims(1)=idim1
idims(2)=idim2
iuds(1)=ni
iuds(2)=nj
ifull(1)=Li
ifull(2)=nj
if((idims(1)+1)*(idims(2)+1).gt.norigmax)then
write(*,*)'Too many processes',idims(1),'x',idims(2),
$ ' for norigmax=',norigmax
stop
endif
c Define mpi block structure.
if(.not.lflag)
$ call bbdydefine(ndims,idims,ifull,iuds,iorig,iLs)
c End of control functions.
c-------------------------------------------------------------------
ierr=0
omega=1.
lconverged=.false.
c Main iteration
do isor_k=1,isor_mi
delta=0.
umin=1.e20
umax=0.
relax=(omega*debyelen**2+1)/(debyelen**2+1)
c Do block boundary communications, returns block info icoords...myid.
c if (mod(isor_k+2,3).eq.0) then
call bbdy(sor_comm,iLs,iuds,u,isor_k,iorig,ndims,idims,lperiod,
$ icoords,iLcoords,myside,myorig,myorig1,myorig2,icommcart
$ ,mpiid,lflag,out)
c endif
c Do a relaxation.
call sorrelax(isor_k,Li,Lj,myside(1),myside(2),
$ u(myorig),q(myorig),apc(myorig1),bpc(myorig1),
$ cpc(myorig),dpc(myorig),fpc(myorig),gpc(myorig2,1),out,
$ relax,delta,bcphi)
c Test convergence
if (mod(isor_k+2,30).eq.0) then
call testifconverged(sor_eps,delta, lconverged,sor_comm)
endif
if(lconverged.and.isor_k.ge.2)goto 11
c Chebychev acceleration:
if(isor_k.eq.1)then
omega=1./(1.-0.5*sor_jac**2)
else
omega=1./(1.-0.25*sor_jac**2*omega)
endif
enddo
c We finished the loop, implies we did not converge.
c isor_k=-isor_mi
c-------------------------------------------------------------------
11 continue
c Do the final mpi_gather [or allgather if all processes need
c the result].
nk=-1
call bbdy(sor_comm,iLs,iuds,u,nk,iorig,ndims,idims,lperiod,
$ icoords,iLcoords,myside,myorig,myorig1,myorig2, icommcart
$ ,mpiid,lflag,out)
sor_del=delta
ierr=isor_k
c Indirection is needed here because otherwise the finalize call
c seems to cause the return to fail. Probably unnecessary.
c mpiid=myid
end
c**********************************************************************
c Do a single relaxation.
subroutine sorrelax(isor_k,Li,Lj,ni,nj,
$ u,q,a,b,c,d,f,g,out,
$ relax,rdelta,bcphi)
integer isor_k,Li,Lj,ni,nj,bcphi
logical out
real u(Li,nj),q(Li,nj)
real a(Li),b(Li),c(Li,nj),d(Li,nj),f(Li,nj),g(Lj,5)
parameter (nd2=4)
real relax,delta,rdelta
rdelta=0.
c Hard-wired red-black relaxation order. isor_k odd => block is odd;
c odd meaning that (1,1) and hence (2,2) is active.
km=mod(isor_k+1,2)
c (Remember that ni=rshield+1)
do j=2,nj-1,1
do i=2+mod(j+km,2),ni-1,2
c Handle BC
if(out.and.i.eq.ni-1) then
delta=g(j,1)*u(i-1,j)+g(j,2)*u(i,j-1)+
$ g(j,3)*u(i,j+1)+g(j,4)+g(j,5)*u(i,j)
u(i,j)=u(i,j)+relax*delta
delta=abs(delta)
if(delta.gt.rdelta)rdelta=delta
c Handle the bulk calculations
else
expu=exp(u(i,j))
dnum= a(i)*u(i+1,j)+b(i)*u(i-1,j) + c(i,j)
$ *u(i,j+1)+d(i,j)*u(i,j-1) -f(i,j)*u(i,j)
$ + q(i,j) - expu
dden=f(i,j) + expu
delta=relax*dnum/dden
u(i,j)=u(i,j)+delta
delta=abs(delta)
if(delta.gt.rdelta)rdelta=delta
endif
enddo
enddo
end
c***********************************************************************
c The challenge here is to ensure that all processes decide to end
c at the same time. If not then a process will hang waiting for message.
c So we have to universalize the convergence test. All block must be
c converged, but the total spread depends on multiple blocks.
subroutine testifconverged(sor_eps,delta,lconverged,sor_comm)
include 'mpif.h'
logical lconverged
real delta,deltaR,sor_eps
integer sor_comm,id
c Here we need to allreduce the data, selecting the maximum values,
c doing it in place.
c Loki's version of mpich seems not to like MPI_IN_PLACE
call MPI_ALLREDUCE(delta,deltaR,1,MPI_REAL,MPI_MAX
$ ,sor_comm,ierr)
delta=deltaR
if(delta.lt.sor_eps) then
lconverged=.true.
else
lconverged=.false.
endif
end
c***********************************************************************
c Cut here and throw the rest away for the basic routines
c***********************************************************************
subroutine fcalc_sor(Li,Lj,ni,nj,jac,mi,dconverge,k,sor_comm,myid2
$ )
include 'piccom.f'
integer Li,Lj,ni,nj
integer sor_comm,myid2
integer nd
parameter (nd=2,nd2=nd*2)
real sor_jac,sor_eps,sor_del,jac,dconverge
integer isor_k,isor_mi,mi
common /sor2dctl/isor_mi,sor_jac,sor_eps,sor_del,isor_k
c testing arrays
integer iuds(nd),ifull(nd)
isor_mi=mi
sor_jac=jac
sor_eps=dconverge
ifull(1)=Li
ifull(2)=Lj
iuds(1)=ni
iuds(2)=nj
ictl=1
call sor2dmpi(sor_comm,Li,Lj,ni,nj,debyelen,bcphi, phi(1,0) ,rho(1
$ ,0),ictl,ierr,myid2,idim1,idim2, apc(1),bpc(1),cpc(1,0)
$ ,dpc(1,0),fpc(1,0),gpc(0,1))
k=isor_k
end