7 #define STANDARD_ERROR 2
9 #define STANDARD_OUTPUT 1
16 #define UP_EVEN(A) ((A)+abs((A)%2))
17 #define DOWN_EVEN(A) ((A) - abs((A)%2))
18 #define UP_ODD(A) ((A) + abs(((A)+1)%2))
19 #define DOWN_ODD(A) ((A) - abs(((A)+1)%2))
20 #define MIN(A,B) ((A)<(B)?(A):(B))
21 #define MAX(A,B) ((A)>(B)?(A):(B))
23 static int *y_curs
= NULL
;
24 static int *x_curs
= NULL
;
25 static int *x_peermask
= NULL
;
26 static int *nbytes
= NULL
;
28 static MPI_Request
*x_recv
= NULL
, *x_send
= NULL
;
31 void RSL_LITE_INIT_SWAP (
34 int * n3dR0
, int *n2dR0
, int * typesizeR0
,
35 int * n3dI0
, int *n2dI0
, int * typesizeI0
,
36 int * n3dD0
, int *n2dD0
, int * typesizeD0
,
37 int * n3dL0
, int *n2dL0
, int * typesizeL0
,
38 int * me0
, int * np0
, int * np_x0
, int * np_y0
,
40 int * ids0
, int * ide0
, int * jds0
, int * jde0
, int * kds0
, int * kde0
,
41 int * ips0
, int * ipe0
, int * jps0
, int * jpe0
, int * kps0
, int * kpe0
)
44 int n3dR
, n2dR
, typesizeR
;
45 int n3dI
, n2dI
, typesizeI
;
46 int n3dD
, n2dD
, typesizeD
;
47 int n3dL
, n2dL
, typesizeL
;
49 int me
, np
, np_x
, np_y
;
50 int ids
, ide
, jds
, jde
, kds
, kde
;
51 int ips
, ipe
, jps
, jpe
, kps
, kpe
;
52 int ips_send
, ipe_send
;
53 int npts
, i
, ii
, j
, m
, n
, ps
, pe
, ops
, ope
;
54 int Px
, Py
, P
, coords
[2] ;
55 int ips_swap
, ipe_swap
;
56 MPI_Comm
*comm
, dummy_comm
;
60 *comm
= MPI_Comm_f2c( *Fcomm
) ;
63 n3dR
= *n3dR0
; n2dR
= *n2dR0
; typesizeR
= *typesizeR0
;
64 n3dI
= *n3dI0
; n2dI
= *n2dI0
; typesizeI
= *typesizeI0
;
65 n3dD
= *n3dD0
; n2dD
= *n2dD0
; typesizeD
= *typesizeD0
;
66 n3dL
= *n3dL0
; n2dL
= *n2dL0
; typesizeL
= *typesizeL0
;
67 me
= *me0
; np
= *np0
; np_x
= *np_x0
; np_y
= *np_y0
;
68 ids
= *ids0
-1 ; ide
= *ide0
-1 ; jds
= *jds0
-1 ; jde
= *jde0
-1 ; kds
= *kds0
-1 ; kde
= *kde0
-1 ;
69 ips
= *ips0
-1 ; ipe
= *ipe0
-1 ; jps
= *jps0
-1 ; jpe
= *jpe0
-1 ; kps
= *kps0
-1 ; kpe
= *kpe0
-1 ;
71 if ( nbytes
== NULL
) nbytes
= RSL_MALLOC ( int , np
) ;
72 if ( x_curs
== NULL
) x_curs
= RSL_MALLOC ( int , np
) ;
73 if ( x_peermask
== NULL
) x_peermask
= RSL_MALLOC ( int , np
) ;
74 if ( x_recv
== NULL
) x_recv
= RSL_MALLOC ( MPI_Request
, np
) ;
75 if ( x_send
== NULL
) x_send
= RSL_MALLOC ( MPI_Request
, np
) ;
76 for ( i
= 0 ; i
< np
; i
++ ) { nbytes
[i
] = 0 ; x_curs
[i
] = 0 ; x_peermask
[i
] = 0 ; }
78 if ( xy
== 1 ) { /* xy = 1, swap in X, otherwise Y */
94 for ( i
= UP_ODD( ps
) ; i
<= MIN(pe
,m
) ; i
+=2 ) {
97 TASK_FOR_POINT ( &ii
, &jps
, &ids
, &ide
, &jds
, &jde
, &np_x
, &np_y
, &Px
, &Py
,
98 min_subdomain
, min_subdomain
, &ierr
) ;
99 coords
[1] = Px
; coords
[0] = Py
;
100 MPI_Cart_rank( *comm
, coords
, &P
) ;
102 TASK_FOR_POINT ( &ips
, &ii
, &ids
, &ide
, &jds
, &jde
, &np_x
, &np_y
, &Px
, &Py
,
103 min_subdomain
, min_subdomain
, &ierr
) ;
104 coords
[1] = Px
; coords
[0] = Py
;
105 MPI_Cart_rank( *comm
, coords
, &P
) ;
107 nbytes
[P
] += typesizeR
*(ope
-ops
+1)*(n3dR
*(kpe
-kps
+1)+n2dR
) +
108 typesizeI
*(ope
-ops
+1)*(n3dI
*(kpe
-kps
+1)+n2dI
) +
109 typesizeD
*(ope
-ops
+1)*(n3dD
*(kpe
-kps
+1)+n2dD
) +
110 typesizeL
*(ope
-ops
+1)*(n3dL
*(kpe
-kps
+1)+n2dL
) ;
114 for ( P
= 0 ; P
< np
; P
++ ) {
115 if ( x_peermask
[P
] ) {
116 buffer_for_proc ( P
, nbytes
[P
], RSL_RECVBUF
) ;
117 buffer_for_proc ( P
, nbytes
[P
], RSL_SENDBUF
) ;
123 void RSL_LITE_PACK_SWAP ( int * Fcomm
, char * buf
, int * odd0
, int * typesize0
, int * xy0
, int * pu0
, char * memord
, int * xstag0
,
124 int *me0
, int * np0
, int * np_x0
, int * np_y0
,
125 int * min_subdomain
,
126 int * ids0
, int * ide0
, int * jds0
, int * jde0
, int * kds0
, int * kde0
,
127 int * ims0
, int * ime0
, int * jms0
, int * jme0
, int * kms0
, int * kme0
,
128 int * ips0
, int * ipe0
, int * jps0
, int * jpe0
, int * kps0
, int * kpe0
)
131 int me
, np
, np_x
, np_y
;
133 int ids
, ide
, jds
, jde
, kds
, kde
;
134 int ims
, ime
, jms
, jme
, kms
, kme
;
135 int ips
, ipe
, jps
, jpe
, kps
, kpe
;
136 int xstag
; /* 0 not stag, 1 stag */
137 int xy
; /* y = 0 , x = 1 */
138 int pu
; /* pack = 0 , unpack = 1 */
139 int i
, ii
, j
, jj
, m
, n
;
140 int ps
, pe
, ops
, ope
;
143 register int i2
,i3
,i4
,i_offset
;
147 int Px
, Py
, P
, coords
[2] ;
149 register int *pi
, *qi
;
151 MPI_Comm
*comm
, dummy_comm
;
154 *comm
= MPI_Comm_f2c( *Fcomm
) ;
156 me
= *me0
; np
= *np0
; np_x
= *np_x0
; np_y
= *np_y0
;
158 odd
= *odd0
; typesize
= *typesize0
;
159 ids
= *ids0
-1 ; ide
= *ide0
-1 ; jds
= *jds0
-1 ; jde
= *jde0
-1 ; kds
= *kds0
-1 ; kde
= *kde0
-1 ;
160 ims
= *ims0
-1 ; ime
= *ime0
-1 ; jms
= *jms0
-1 ; jme
= *jme0
-1 ; kms
= *kms0
-1 ; kme
= *kme0
-1 ;
161 ips
= *ips0
-1 ; ipe
= *ipe0
-1 ; jps
= *jps0
-1 ; jpe
= *jpe0
-1 ; kps
= *kps0
-1 ; kpe
= *kpe0
-1 ;
165 /* need to adapt for other memory orders */
166 #define RANGE(S1,E1,S2,E2,S3,E3,S4,E4) (((E1)-(S1)+1)*((E2)-(S2)+1)*(((E3)-(S3)+1)/2)*((E4)-(S4)+1))
167 #define IMAX(A) (((A)>ids)?(A):ids)
168 #define IMIN(A) (((A)<ide)?(A):ide)
169 #define JMAX(A) (((A)>jds)?(A):jds)
170 #define JMIN(A) (((A)<jde)?(A):jde)
172 da_buf
= ( pu
== 0 ) ? RSL_SENDBUF
: RSL_RECVBUF
;
175 if ( xy
== 1 ) { /* xy = 1, swap in X, otherwise Y */
176 n
= (ide
-ids
+1)/4*2 ;
179 n
= (jde
-jds
+1)/4*2 ;
183 if ( np_x
> 1 && xy
== 1 ) {
185 for ( i
= UP_ODD(ips
) ; i
<= MIN(ipe
,m
) ; i
+=2 ) {
187 TASK_FOR_POINT ( &ii
, &jps
, &ids
, &ide
, &jds
, &jde
, &np_x
, &np_y
, &Px
, &Py
,
188 min_subdomain
, min_subdomain
, &ierr
) ;
189 coords
[1] = Px
; coords
[0] = Py
;
190 MPI_Cart_rank( *comm
, coords
, &P
) ;
191 p
= buffer_for_proc( P
, 0 , da_buf
) ;
193 if ( typesize
== sizeof(int) ) {
194 for ( j
= JMAX(jps
) ; j
<= JMIN(jpe
) ; j
++ ) {
195 for ( k
= kps
; k
<= kpe
; k
++ ) {
196 pi
= (int *)(p
+x_curs
[P
]) ;
197 qi
= (int *)((buf
+ typesize
*( (i
-ims
) + (ime
-ims
+1)*(
198 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))))) ;
200 x_curs
[P
] += typesize
;
205 for ( j
= JMAX(jps
) ; j
<= JMIN(jpe
) ; j
++ ) {
206 for ( k
= kps
; k
<= kpe
; k
++ ) {
207 for ( t
= 0 ; t
< typesize
; t
++ ) {
209 *(buf
+ t
+ typesize
*(
210 (i
-ims
) + (ime
-ims
+1)*(
211 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))) ) ;
218 if ( typesize
== sizeof(int) ) {
219 for ( j
= JMAX(jps
) ; j
<= JMIN(jpe
) ; j
++ ) {
220 for ( k
= kps
; k
<= kpe
; k
++ ) {
221 pi
= (int *)(p
+x_curs
[P
]) ;
222 qi
= (int *)((buf
+ typesize
*( (i
-ims
) + (ime
-ims
+1)*(
223 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))))) ;
225 x_curs
[P
] += typesize
;
230 for ( j
= JMAX(jps
) ; j
<= JMIN(jpe
) ; j
++ ) {
231 for ( k
= kps
; k
<= kpe
; k
++ ) {
232 for ( t
= 0 ; t
< typesize
; t
++ ) {
233 *(buf
+ t
+ typesize
*(
234 (i
-ims
) + (ime
-ims
+1)*(
235 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))) ) =
244 } else if ( np_y
> 1 && xy
== 0 ) {
245 for ( j
= UP_ODD(jps
) ; j
<= MIN(jpe
,m
) ; j
+=2 ) {
247 TASK_FOR_POINT ( &ips
, &jj
, &ids
, &ide
, &jds
, &jde
, &np_x
, &np_y
, &Px
, &Py
,
248 min_subdomain
, min_subdomain
, &ierr
) ;
249 coords
[1] = Px
; coords
[0] = Py
;
250 MPI_Cart_rank( *comm
, coords
, &P
) ;
251 p
= buffer_for_proc( P
, 0 , da_buf
) ;
253 if ( typesize
== sizeof(int) ) {
254 for ( i
= IMAX(ips
) ; i
<= IMIN(ipe
) ; i
++ ) {
255 for ( k
= kps
; k
<= kpe
; k
++ ) {
256 pi
= (int *)(p
+x_curs
[P
]) ;
257 qi
= (int *)((buf
+ typesize
*( (i
-ims
) + (ime
-ims
+1)*(
258 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))))) ;
260 x_curs
[P
] += typesize
;
265 for ( i
= IMAX(ips
) ; i
<= IMIN(ipe
) ; i
++ ) {
266 for ( k
= kps
; k
<= kpe
; k
++ ) {
267 for ( t
= 0 ; t
< typesize
; t
++ ) {
269 *(buf
+ t
+ typesize
*(
270 (i
-ims
) + (ime
-ims
+1)*(
271 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))) ) ;
278 if ( typesize
== sizeof(int) ) {
279 for ( i
= IMAX(ips
) ; i
<= IMIN(ipe
) ; i
++ ) {
280 for ( k
= kps
; k
<= kpe
; k
++ ) {
281 pi
= (int *)(p
+x_curs
[P
]) ;
282 qi
= (int *)((buf
+ typesize
*( (i
-ims
) + (ime
-ims
+1)*(
283 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))))) ;
285 x_curs
[P
] += typesize
;
290 for ( i
= IMAX(ips
) ; i
<= IMIN(ipe
) ; i
++ ) {
291 for ( k
= kps
; k
<= kpe
; k
++ ) {
292 for ( t
= 0 ; t
< typesize
; t
++ ) {
293 *(buf
+ t
+ typesize
*(
294 (i
-ims
) + (ime
-ims
+1)*(
295 (k
-kms
) + (j
-jms
)*(kme
-kms
+1))) ) =
308 void RSL_LITE_SWAP ( int * Fcomm0
, int *me0
, int * np0
, int * np_x0
, int * np_y0
)
311 int me
, np
, np_x
, np_y
;
312 int yp
, ym
, xp
, xm
, nb
;
314 MPI_Comm comm
, *comm0
, dummy_comm
;
317 comm0
= &dummy_comm
;
318 *comm0
= MPI_Comm_f2c( *Fcomm0
) ;
321 comm
= *comm0
; me
= *me0
; np
= *np0
; np_x
= *np_x0
; np_y
= *np_y0
;
323 /* fprintf(stderr,"RSL_LITE_SWAP\n") ; */
325 for ( P
= 0 ; P
< np
; P
++ ) {
326 if ( x_peermask
[P
] ) {
327 nb
= buffer_size_for_proc( P
, RSL_RECVBUF
) ;
328 /* fprintf(stderr,"posting irecv from %d, nb = %d\n",P,nb) ; */
329 MPI_Irecv ( buffer_for_proc( P
, x_curs
[P
], RSL_RECVBUF
), nb
, MPI_CHAR
, P
, me
, comm
, &(x_recv
[P
]) ) ;
330 /* fprintf(stderr,"sending to %d, nb = %d\n",P,x_curs[P]) ; */
331 MPI_Isend ( buffer_for_proc( P
, 0, RSL_SENDBUF
), x_curs
[P
], MPI_CHAR
, P
, P
, comm
, &(x_send
[P
]) ) ;
334 for ( P
= 0 ; P
< np
; P
++ ) {
335 if ( x_peermask
[P
] ) {
336 MPI_Wait( &x_recv
[P
], &stat
) ;
337 MPI_Wait( &x_send
[P
], &stat
) ;
342 fprintf(stderr
,"RSL_LITE_SWAP disabled\n") ;
345 for ( i
= 0 ; i
< np
; i
++ ) { x_curs
[i
] = 0 ; }