2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/fft/fft.h"
46 #include "gromacs/utility/exceptions.h"
47 #include "gromacs/utility/fatalerror.h"
48 #include "gromacs/utility/mutex.h"
51 #define FFTWPREFIX(name) fftw_ ## name
53 #define FFTWPREFIX(name) fftwf_ ## name
56 /* none of the fftw3 calls, except execute(), are thread-safe, so
57 we need to serialize them with this mutex. */
58 static gmx::Mutex big_fftw_mutex
;
59 #define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
60 #define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
62 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
64 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
65 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
66 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
70 * Contents of the FFTW3 fft datatype.
72 * Note that this is one of several possible implementations of gmx_fft_t.
83 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
84 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
85 * first index: 0=unaligned, 1=aligned
86 * second index: 0=out-of-place, 1=in-place
87 * third index: 0=backward, 1=forward
89 FFTWPREFIX(plan
) plan
[2][2][2];
90 /** Used to catch user mistakes */
92 /** Number of dimensions in the FFT */
97 gmx_fft_init_1d(gmx_fft_t
* pfft
,
101 return gmx_fft_init_many_1d(pfft
, nx
, 1, flags
);
106 gmx_fft_init_many_1d(gmx_fft_t
* pfft
,
112 FFTWPREFIX(complex) *p1
, *p2
, *up1
, *up2
;
117 #if GMX_DISABLE_FFTW_MEASURE
118 flags
|= GMX_FFT_FLAG_CONSERVATIVE
;
121 fftw_flags
= (flags
& GMX_FFT_FLAG_CONSERVATIVE
) ? FFTW_ESTIMATE
: FFTW_MEASURE
;
125 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
131 if ( (fft
= static_cast<gmx_fft_t
>(FFTWPREFIX(malloc
)(sizeof(struct gmx_fft
)))) == nullptr)
137 /* allocate aligned, and extra memory to make it unaligned */
138 p1
= static_cast<FFTWPREFIX(complex) *>(FFTWPREFIX(malloc
)(sizeof(FFTWPREFIX(complex))*(nx
+2)*howmany
));
141 FFTWPREFIX(free
)(fft
);
146 p2
= static_cast<FFTWPREFIX(complex) *>(FFTWPREFIX(malloc
)(sizeof(FFTWPREFIX(complex))*(nx
+2)*howmany
));
149 FFTWPREFIX(free
)(p1
);
150 FFTWPREFIX(free
)(fft
);
155 /* make unaligned pointers.
156 * In double precision the actual complex datatype will be 16 bytes,
157 * so go to a char pointer and force an offset of 8 bytes instead.
159 pc
= reinterpret_cast<char*>(p1
);
161 up1
= reinterpret_cast<FFTWPREFIX(complex) *>(pc
);
163 pc
= reinterpret_cast<char*>(p2
);
165 up2
= reinterpret_cast<FFTWPREFIX(complex) *>(pc
);
167 /* int rank, const int *n, int howmany,
168 fftw_complex *in, const int *inembed,
169 int istride, int idist,
170 fftw_complex *out, const int *onembed,
171 int ostride, int odist,
172 int sign, unsigned flags */
173 fft
->plan
[0][0][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up2
, &nx
, 1, nx
, FFTW_BACKWARD
, fftw_flags
);
174 fft
->plan
[0][0][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up2
, &nx
, 1, nx
, FFTW_FORWARD
, fftw_flags
);
175 fft
->plan
[0][1][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up1
, &nx
, 1, nx
, FFTW_BACKWARD
, fftw_flags
);
176 fft
->plan
[0][1][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up1
, &nx
, 1, nx
, FFTW_FORWARD
, fftw_flags
);
177 fft
->plan
[1][0][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p2
, &nx
, 1, nx
, FFTW_BACKWARD
, fftw_flags
);
178 fft
->plan
[1][0][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p2
, &nx
, 1, nx
, FFTW_FORWARD
, fftw_flags
);
179 fft
->plan
[1][1][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p1
, &nx
, 1, nx
, FFTW_BACKWARD
, fftw_flags
);
180 fft
->plan
[1][1][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p1
, &nx
, 1, nx
, FFTW_FORWARD
, fftw_flags
);
182 for (i
= 0; i
< 2; i
++)
184 for (j
= 0; j
< 2; j
++)
186 for (k
= 0; k
< 2; k
++)
188 if (fft
->plan
[i
][j
][k
] == nullptr)
190 gmx_fatal(FARGS
, "Error initializing FFTW3 plan.");
192 gmx_fft_destroy(fft
);
194 FFTWPREFIX(free
)(p1
);
195 FFTWPREFIX(free
)(p2
);
203 FFTWPREFIX(free
)(p1
);
204 FFTWPREFIX(free
)(p2
);
206 fft
->real_transform
= 0;
215 gmx_fft_init_1d_real(gmx_fft_t
* pfft
,
219 return gmx_fft_init_many_1d_real(pfft
, nx
, 1, flags
);
223 gmx_fft_init_many_1d_real(gmx_fft_t
* pfft
,
229 real
*p1
, *p2
, *up1
, *up2
;
234 #if GMX_DISABLE_FFTW_MEASURE
235 flags
|= GMX_FFT_FLAG_CONSERVATIVE
;
238 fftw_flags
= (flags
& GMX_FFT_FLAG_CONSERVATIVE
) ? FFTW_ESTIMATE
: FFTW_MEASURE
;
242 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
248 if ( (fft
= static_cast<gmx_fft_t
>(FFTWPREFIX(malloc
)(sizeof(struct gmx_fft
)))) == nullptr)
254 /* allocate aligned, and extra memory to make it unaligned */
255 p1
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
)*(nx
/2+1)*2*howmany
+ 8));
258 FFTWPREFIX(free
)(fft
);
263 p2
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
)*(nx
/2+1)*2*howmany
+ 8));
266 FFTWPREFIX(free
)(p1
);
267 FFTWPREFIX(free
)(fft
);
272 /* make unaligned pointers.
273 * In double precision the actual complex datatype will be 16 bytes,
274 * so go to a char pointer and force an offset of 8 bytes instead.
276 pc
= reinterpret_cast<char*>(p1
);
278 up1
= reinterpret_cast<real
*>(pc
);
280 pc
= reinterpret_cast<char*>(p2
);
282 up2
= reinterpret_cast<real
*>(pc
);
284 /* int rank, const int *n, int howmany,
285 double *in, const int *inembed,
286 int istride, int idist,
287 fftw_complex *out, const int *onembed,
288 int ostride, int odist,
290 fft
->plan
[0][0][1] = FFTWPREFIX(plan_many_dft_r2c
)(1, &nx
, howmany
, up1
, nullptr, 1, (nx
/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(up2
), nullptr, 1, (nx
/2+1), fftw_flags
);
291 fft
->plan
[0][1][1] = FFTWPREFIX(plan_many_dft_r2c
)(1, &nx
, howmany
, up1
, nullptr, 1, (nx
/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(up1
), nullptr, 1, (nx
/2+1), fftw_flags
);
292 fft
->plan
[1][0][1] = FFTWPREFIX(plan_many_dft_r2c
)(1, &nx
, howmany
, p1
, nullptr, 1, (nx
/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(p2
), nullptr, 1, (nx
/2+1), fftw_flags
);
293 fft
->plan
[1][1][1] = FFTWPREFIX(plan_many_dft_r2c
)(1, &nx
, howmany
, p1
, nullptr, 1, (nx
/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(p1
), nullptr, 1, (nx
/2+1), fftw_flags
);
295 fft
->plan
[0][0][0] = FFTWPREFIX(plan_many_dft_c2r
)(1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex) *>(up1
), nullptr, 1, (nx
/2+1), up2
, nullptr, 1, (nx
/2+1) *2, fftw_flags
);
296 fft
->plan
[0][1][0] = FFTWPREFIX(plan_many_dft_c2r
)(1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex) *>(up1
), nullptr, 1, (nx
/2+1), up1
, nullptr, 1, (nx
/2+1) *2, fftw_flags
);
297 fft
->plan
[1][0][0] = FFTWPREFIX(plan_many_dft_c2r
)(1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex) *>(p1
), nullptr, 1, (nx
/2+1), p2
, nullptr, 1, (nx
/2+1) *2, fftw_flags
);
298 fft
->plan
[1][1][0] = FFTWPREFIX(plan_many_dft_c2r
)(1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex) *>(p1
), nullptr, 1, (nx
/2+1), p1
, nullptr, 1, (nx
/2+1) *2, fftw_flags
);
300 for (i
= 0; i
< 2; i
++)
302 for (j
= 0; j
< 2; j
++)
304 for (k
= 0; k
< 2; k
++)
306 if (fft
->plan
[i
][j
][k
] == nullptr)
308 gmx_fatal(FARGS
, "Error initializing FFTW3 plan.");
310 gmx_fft_destroy(fft
);
312 FFTWPREFIX(free
)(p1
);
313 FFTWPREFIX(free
)(p2
);
321 FFTWPREFIX(free
)(p1
);
322 FFTWPREFIX(free
)(p2
);
324 fft
->real_transform
= 1;
334 gmx_fft_init_2d_real(gmx_fft_t
* pfft
,
340 real
*p1
, *p2
, *up1
, *up2
;
345 #if GMX_DISABLE_FFTW_MEASURE
346 flags
|= GMX_FFT_FLAG_CONSERVATIVE
;
349 fftw_flags
= (flags
& GMX_FFT_FLAG_CONSERVATIVE
) ? FFTW_ESTIMATE
: FFTW_MEASURE
;
353 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
359 if ( (fft
= static_cast<gmx_fft_t
>(FFTWPREFIX(malloc
)(sizeof(struct gmx_fft
)))) == nullptr)
365 /* allocate aligned, and extra memory to make it unaligned */
366 p1
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
) *( nx
*(ny
/2+1)*2 + 2) ));
369 FFTWPREFIX(free
)(fft
);
374 p2
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
) *( nx
*(ny
/2+1)*2 + 2) ));
377 FFTWPREFIX(free
)(p1
);
378 FFTWPREFIX(free
)(fft
);
383 /* make unaligned pointers.
384 * In double precision the actual complex datatype will be 16 bytes,
385 * so go to a char pointer and force an offset of 8 bytes instead.
387 pc
= reinterpret_cast<char*>(p1
);
389 up1
= reinterpret_cast<real
*>(pc
);
391 pc
= reinterpret_cast<char*>(p2
);
393 up2
= reinterpret_cast<real
*>(pc
);
396 fft
->plan
[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d
)(nx
, ny
, reinterpret_cast<FFTWPREFIX(complex) *>(up1
), up2
, fftw_flags
);
397 fft
->plan
[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d
)(nx
, ny
, up1
, reinterpret_cast<FFTWPREFIX(complex) *>(up2
), fftw_flags
);
398 fft
->plan
[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d
)(nx
, ny
, reinterpret_cast<FFTWPREFIX(complex) *>(up1
), up1
, fftw_flags
);
399 fft
->plan
[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d
)(nx
, ny
, up1
, reinterpret_cast<FFTWPREFIX(complex) *>(up1
), fftw_flags
);
401 fft
->plan
[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d
)(nx
, ny
, reinterpret_cast<FFTWPREFIX(complex) *>(p1
), p2
, fftw_flags
);
402 fft
->plan
[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d
)(nx
, ny
, p1
, reinterpret_cast<FFTWPREFIX(complex) *>(p2
), fftw_flags
);
403 fft
->plan
[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d
)(nx
, ny
, reinterpret_cast<FFTWPREFIX(complex) *>(p1
), p1
, fftw_flags
);
404 fft
->plan
[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d
)(nx
, ny
, p1
, reinterpret_cast<FFTWPREFIX(complex) *>(p1
), fftw_flags
);
407 for (i
= 0; i
< 2; i
++)
409 for (j
= 0; j
< 2; j
++)
411 for (k
= 0; k
< 2; k
++)
413 if (fft
->plan
[i
][j
][k
] == nullptr)
415 gmx_fatal(FARGS
, "Error initializing FFTW3 plan.");
417 gmx_fft_destroy(fft
);
419 FFTWPREFIX(free
)(p1
);
420 FFTWPREFIX(free
)(p2
);
428 FFTWPREFIX(free
)(p1
);
429 FFTWPREFIX(free
)(p2
);
431 fft
->real_transform
= 1;
440 gmx_fft_1d (gmx_fft_t fft
,
441 enum gmx_fft_direction dir
,
445 int aligned
= (((size_t(in_data
) | size_t(out_data
)) & 0xf) == 0);
446 int inplace
= (in_data
== out_data
);
447 int isforward
= (dir
== GMX_FFT_FORWARD
);
450 if ( (fft
->real_transform
== 1) || (fft
->ndim
!= 1) ||
451 ((dir
!= GMX_FFT_FORWARD
) && (dir
!= GMX_FFT_BACKWARD
)) )
453 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
457 FFTWPREFIX(execute_dft
)(fft
->plan
[aligned
][inplace
][isforward
],
458 static_cast<FFTWPREFIX(complex) *>(in_data
),
459 static_cast<FFTWPREFIX(complex) *>(out_data
));
465 gmx_fft_many_1d (gmx_fft_t fft
,
466 enum gmx_fft_direction dir
,
470 return gmx_fft_1d(fft
, dir
, in_data
, out_data
);
474 gmx_fft_1d_real (gmx_fft_t fft
,
475 enum gmx_fft_direction dir
,
479 int aligned
= (((size_t(in_data
) | size_t(out_data
)) & 0xf) == 0);
480 int inplace
= (in_data
== out_data
);
481 int isforward
= (dir
== GMX_FFT_REAL_TO_COMPLEX
);
484 if ( (fft
->real_transform
!= 1) || (fft
->ndim
!= 1) ||
485 ((dir
!= GMX_FFT_REAL_TO_COMPLEX
) && (dir
!= GMX_FFT_COMPLEX_TO_REAL
)) )
487 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
493 FFTWPREFIX(execute_dft_r2c
)(fft
->plan
[aligned
][inplace
][isforward
],
494 static_cast<real
*>(in_data
), static_cast<FFTWPREFIX(complex) *>(out_data
));
498 FFTWPREFIX(execute_dft_c2r
)(fft
->plan
[aligned
][inplace
][isforward
],
499 static_cast<FFTWPREFIX(complex) *>(in_data
), static_cast<real
*>(out_data
));
506 gmx_fft_many_1d_real (gmx_fft_t fft
,
507 enum gmx_fft_direction dir
,
511 return gmx_fft_1d_real(fft
, dir
, in_data
, out_data
);
515 gmx_fft_2d_real (gmx_fft_t fft
,
516 enum gmx_fft_direction dir
,
520 int aligned
= (((size_t(in_data
) | size_t(out_data
)) & 0xf) == 0);
521 int inplace
= (in_data
== out_data
);
522 int isforward
= (dir
== GMX_FFT_REAL_TO_COMPLEX
);
525 if ( (fft
->real_transform
!= 1) || (fft
->ndim
!= 2) ||
526 ((dir
!= GMX_FFT_REAL_TO_COMPLEX
) && (dir
!= GMX_FFT_COMPLEX_TO_REAL
)) )
528 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
534 FFTWPREFIX(execute_dft_r2c
)(fft
->plan
[aligned
][inplace
][isforward
],
535 static_cast<real
*>(in_data
),
536 static_cast<FFTWPREFIX(complex) *>(out_data
));
540 FFTWPREFIX(execute_dft_c2r
)(fft
->plan
[aligned
][inplace
][isforward
],
541 static_cast<FFTWPREFIX(complex) *>(in_data
),
542 static_cast<real
*>(out_data
));
550 gmx_fft_destroy(gmx_fft_t fft
)
556 for (i
= 0; i
< 2; i
++)
558 for (j
= 0; j
< 2; j
++)
560 for (k
= 0; k
< 2; k
++)
562 if (fft
->plan
[i
][j
][k
] != nullptr)
565 FFTWPREFIX(destroy_plan
)(fft
->plan
[i
][j
][k
]);
567 fft
->plan
[i
][j
][k
] = nullptr;
573 FFTWPREFIX(free
)(fft
);
580 gmx_many_fft_destroy(gmx_fft_t fft
)
582 gmx_fft_destroy(fft
);
585 void gmx_fft_cleanup()
587 FFTWPREFIX(cleanup
)();