1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * Gromacs Copyright (c) 1991-2005
5 * David van der Spoel, Erik Lindahl, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
34 #include "gmx_parallel_3dfft.h"
36 #include "gmxcomplex.h"
37 #include "gmx_fatal.h"
41 struct gmx_parallel_3dfft
{
46 static int *copy_int_array(int n
,int *src
)
50 dest
= (int*)malloc(n
*sizeof(int));
57 static int *make_slab2grid(int nnodes
,int ngrid
)
61 s2g
= (int*)malloc((nnodes
+1)*sizeof(int));
62 for(i
=0; i
<nnodes
+1; i
++) {
63 /* We always round up */
64 s2g
[i
] = (i
*ngrid
+ nnodes
- 1)/nnodes
;
71 gmx_parallel_3dfft_init (gmx_parallel_3dfft_t
* pfft_setup
,
74 t_complex
** complex_data
,
76 int * slab2index_major
,
77 int * slab2index_minor
,
78 gmx_bool bReproducible
)
80 int rN
=ndata
[2],M
=ndata
[1],K
=ndata
[0];
81 int flags
= FFT5D_REALCOMPLEX
| FFT5D_ORDER_YZ
; /* FFT5D_DEBUG */
82 MPI_Comm rcomm
[]={comm
[1],comm
[0]};
83 int Nb
,Mb
,Kb
; /* dimension for backtransform (in starting order) */
86 if (bReproducible
) flags
|= FFT5D_NOMEASURE
;
88 if (!(flags
&FFT5D_ORDER_YZ
)) {
91 Nb
=K
;Mb
=rN
;Kb
=M
; /* currently always true because ORDER_YZ always set */
94 (*pfft_setup
)->p1
= fft5d_plan_3d(rN
,M
,K
,rcomm
, flags
, (t_complex
**)real_data
, complex_data
);
96 (*pfft_setup
)->p2
= fft5d_plan_3d(Nb
,Mb
,Kb
,rcomm
,
97 (flags
|FFT5D_BACKWARD
|FFT5D_NOMALLOC
)^FFT5D_ORDER_YZ
, complex_data
, (t_complex
**)real_data
);
99 return (*pfft_setup
)->p1
!= 0 && (*pfft_setup
)->p2
!=0;
104 fft5d_limits(fft5d_plan p
,
109 int N1
,M0
,K0
,K1
,*coor
;
110 fft5d_local_size(p
,&N1
,&M0
,&K0
,&K1
,&coor
); /* M0=MG/P[0], K1=KG/P[1], NG,MG,KG global sizes */
113 local_offset
[1]=p
->oM
[0]; /*=p->coor[0]*p->MG/p->P[0]; */
114 local_offset
[0]=p
->oK
[0]; /*=p->coor[1]*p->KG/p->P[1]; */
116 local_ndata
[2]=p
->rC
[0];
117 local_ndata
[1]=p
->pM
[0];
118 local_ndata
[0]=p
->pK
[0];
120 if ((!(p
->flags
&FFT5D_BACKWARD
)) && (p
->flags
&FFT5D_REALCOMPLEX
)) {
121 local_size
[2]=p
->C
[0]*2;
123 local_size
[2]=p
->C
[0];
125 local_size
[1]=p
->pM
[0];
126 local_size
[0]=p
->pK
[0];
131 gmx_parallel_3dfft_real_limits(gmx_parallel_3dfft_t pfft_setup
,
135 return fft5d_limits(pfft_setup
->p1
,local_ndata
,local_offset
,local_size
);
138 static void reorder_ivec_yzx(ivec v
)
149 gmx_parallel_3dfft_complex_limits(gmx_parallel_3dfft_t pfft_setup
,
157 /* For now everything is in-order, but prepare to save communication by avoiding transposes */
158 complex_order
[0] = 0;
159 complex_order
[1] = 1;
160 complex_order
[2] = 2;
162 ret
= fft5d_limits(pfft_setup
->p2
,local_ndata
,local_offset
,local_size
);
164 reorder_ivec_yzx(local_ndata
);
165 reorder_ivec_yzx(local_offset
);
166 reorder_ivec_yzx(local_size
);
173 gmx_parallel_3dfft_execute(gmx_parallel_3dfft_t pfft_setup
,
174 enum gmx_fft_direction dir
,
177 if ((!(pfft_setup
->p1
->flags
&FFT5D_REALCOMPLEX
)) ^ (dir
==GMX_FFT_FORWARD
||dir
==GMX_FFT_BACKWARD
)) {
178 gmx_fatal(FARGS
,"Invalid transform. Plan and execution don't match regarding reel/complex");
180 if (dir
==GMX_FFT_FORWARD
|| dir
==GMX_FFT_REAL_TO_COMPLEX
) {
181 fft5d_execute(pfft_setup
->p1
,0);
183 fft5d_execute(pfft_setup
->p2
,0);
189 gmx_parallel_3dfft_destroy(gmx_parallel_3dfft_t pfft_setup
) {
190 fft5d_destroy(pfft_setup
->p2
);
191 fft5d_destroy(pfft_setup
->p1
);