2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2005 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2017, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "parallel_3dfft.h"
44 #include "gromacs/fft/fft.h"
45 #include "gromacs/fft/fft5d.h"
46 #include "gromacs/math/gmxcomplex.h"
47 #include "gromacs/utility/fatalerror.h"
48 #include "gromacs/utility/gmxmpi.h"
49 #include "gromacs/utility/smalloc.h"
51 struct gmx_parallel_3dfft
{
56 gmx_parallel_3dfft_init (gmx_parallel_3dfft_t
* pfft_setup
,
59 t_complex
** complex_data
,
61 gmx_bool bReproducible
,
63 gmx::PinningPolicy realGridAllocation
)
65 int rN
= ndata
[2], M
= ndata
[1], K
= ndata
[0];
66 int flags
= FFT5D_REALCOMPLEX
| FFT5D_ORDER_YZ
; /* FFT5D_DEBUG */
67 MPI_Comm rcomm
[] = {comm
[1], comm
[0]};
68 int Nb
, Mb
, Kb
; /* dimension for backtransform (in starting order) */
69 t_complex
*buf1
, *buf2
; /*intermediate buffers - used internally.*/
74 flags
|= FFT5D_NOMEASURE
;
77 if (!(flags
&FFT5D_ORDER_YZ
))
79 Nb
= M
; Mb
= K
; Kb
= rN
;
83 Nb
= K
; Mb
= rN
; Kb
= M
; /* currently always true because ORDER_YZ always set */
86 (*pfft_setup
)->p1
= fft5d_plan_3d(rN
, M
, K
, rcomm
, flags
, (t_complex
**)real_data
, complex_data
, &buf1
, &buf2
, nthreads
, realGridAllocation
);
88 (*pfft_setup
)->p2
= fft5d_plan_3d(Nb
, Mb
, Kb
, rcomm
,
89 (flags
|FFT5D_BACKWARD
|FFT5D_NOMALLOC
)^FFT5D_ORDER_YZ
, complex_data
, (t_complex
**)real_data
, &buf1
, &buf2
, nthreads
);
91 return (*pfft_setup
)->p1
!= nullptr && (*pfft_setup
)->p2
!= nullptr;
96 fft5d_limits(fft5d_plan p
,
102 local_offset
[1] = p
->oM
[0]; /*=p->coor[0]*p->MG/p->P[0]; */
103 local_offset
[0] = p
->oK
[0]; /*=p->coor[1]*p->KG/p->P[1]; */
105 local_ndata
[2] = p
->rC
[0];
106 local_ndata
[1] = p
->pM
[0];
107 local_ndata
[0] = p
->pK
[0];
109 if ((!(p
->flags
&FFT5D_BACKWARD
)) && (p
->flags
&FFT5D_REALCOMPLEX
))
111 //C is length in multiples of complex local_size in multiples of real
112 local_size
[2] = p
->C
[0]*2;
116 local_size
[2] = p
->C
[0];
118 local_size
[1] = p
->pM
[0];
119 local_size
[0] = p
->pK
[0];
124 gmx_parallel_3dfft_real_limits(gmx_parallel_3dfft_t pfft_setup
,
129 return fft5d_limits(pfft_setup
->p1
, local_ndata
, local_offset
, local_size
);
132 static void reorder_ivec_yzx(ivec v
)
143 gmx_parallel_3dfft_complex_limits(gmx_parallel_3dfft_t pfft_setup
,
151 /* For now everything is in-order, but prepare to save communication by avoiding transposes */
152 complex_order
[0] = 0;
153 complex_order
[1] = 1;
154 complex_order
[2] = 2;
156 ret
= fft5d_limits(pfft_setup
->p2
, local_ndata
, local_offset
, local_size
);
158 reorder_ivec_yzx(local_ndata
);
159 reorder_ivec_yzx(local_offset
);
160 reorder_ivec_yzx(local_size
);
167 gmx_parallel_3dfft_execute(gmx_parallel_3dfft_t pfft_setup
,
168 enum gmx_fft_direction dir
,
170 gmx_wallcycle_t wcycle
)
172 if ((!(pfft_setup
->p1
->flags
&FFT5D_REALCOMPLEX
)) ^ (dir
== GMX_FFT_FORWARD
|| dir
== GMX_FFT_BACKWARD
))
174 gmx_fatal(FARGS
, "Invalid transform. Plan and execution don't match regarding reel/complex");
176 if (dir
== GMX_FFT_FORWARD
|| dir
== GMX_FFT_REAL_TO_COMPLEX
)
178 fft5d_execute(pfft_setup
->p1
, thread
, wcycle
);
182 fft5d_execute(pfft_setup
->p2
, thread
, wcycle
);
188 gmx_parallel_3dfft_destroy(gmx_parallel_3dfft_t pfft_setup
)
192 fft5d_destroy(pfft_setup
->p2
);
193 fft5d_destroy(pfft_setup
->p1
);