2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
42 #include <mkl_service.h>
44 #include "gromacs/fft/fft.h"
45 #include "gromacs/utility/fatalerror.h"
48 /* For MKL version (<10.0), we should define MKL_LONG. */
50 #define MKL_LONG long int
55 #define GMX_DFTI_PREC DFTI_DOUBLE
57 #define GMX_DFTI_PREC DFTI_SINGLE
62 * Contents of the Intel MKL FFT fft datatype.
64 * Note that this is one of several possible implementations of gmx_fft_t.
66 * The MKL _API_ supports 1D,2D, and 3D transforms, including real-to-complex.
67 * Unfortunately the actual library implementation does not support 3D real
68 * transforms as of version 7.2, and versions before 7.0 don't support 2D real
69 * either. In addition, the multi-dimensional storage format for real data
70 * is not compatible with our padding.
72 * To work around this we roll our own 2D and 3D real-to-complex transforms,
73 * using separate X/Y/Z handles defined to perform (ny*nz), (nx*nz), and
74 * (nx*ny) transforms at once when necessary. To perform strided multiple
75 * transforms out-of-place (i.e., without padding in the last dimension)
76 * on the fly we also need to separate the forward and backward
77 * handles for real-to-complex/complex-to-real data permutation.
79 * This makes it necessary to define 3 handles for in-place FFTs, and 4 for
80 * the out-of-place transforms. Still, whenever possible we try to use
81 * a single 3D-transform handle instead.
83 * So, the handles are enumerated as follows:
85 * 1D FFT (real too): Index 0 is the handle for the entire FFT
86 * 2D complex FFT: Index 0 is the handle for the entire FFT
87 * 3D complex FFT: Index 0 is the handle for the entire FFT
88 * 2D, inplace real FFT: 0=FFTx, 1=FFTy handle
89 * 2D, ooplace real FFT: 0=FFTx, 1=real-to-complex FFTy, 2=complex-to-real FFTy
90 * 3D, inplace real FFT: 0=FFTx, 1=FFTy, 2=FFTz handle
91 * 3D, ooplace real FFT: 0=FFTx, 1=FFTy, 2=r2c FFTz, 3=c2r FFTz
93 * Intel people reading this: Learn from FFTW what a good interface looks like :-)
101 int ndim
; /**< Number of dimensions in FFT */
102 int nx
; /**< Length of X transform */
103 int ny
; /**< Length of Y transform */
104 int nz
; /**< Length of Z transform */
105 int real_fft
; /**< 1 if real FFT, otherwise 0 */
106 DFTI_DESCRIPTOR
* inplace
[3]; /**< in-place FFT */
107 DFTI_DESCRIPTOR
* ooplace
[4]; /**< out-of-place FFT */
108 t_complex
* work
; /**< Enable out-of-place c2r FFT */
114 gmx_fft_init_1d(gmx_fft_t
* pfft
,
116 gmx_fft_flag gmx_unused flags
)
124 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
129 if ( (fft
= (gmx_fft_t
)malloc(sizeof(struct gmx_fft
))) == NULL
)
134 /* Mark all handles invalid */
135 for (d
= 0; d
< 3; d
++)
137 fft
->inplace
[d
] = fft
->ooplace
[d
] = NULL
;
139 fft
->ooplace
[3] = NULL
;
142 status
= DftiCreateDescriptor(&fft
->inplace
[0], GMX_DFTI_PREC
, DFTI_COMPLEX
, 1, (MKL_LONG
)nx
);
146 status
= DftiSetValue(fft
->inplace
[0], DFTI_PLACEMENT
, DFTI_INPLACE
);
151 status
= DftiCommitDescriptor(fft
->inplace
[0]);
157 status
= DftiCreateDescriptor(&fft
->ooplace
[0], GMX_DFTI_PREC
, DFTI_COMPLEX
, 1, (MKL_LONG
)nx
);
162 DftiSetValue(fft
->ooplace
[0], DFTI_PLACEMENT
, DFTI_NOT_INPLACE
);
167 DftiCommitDescriptor(fft
->ooplace
[0]);
173 gmx_fatal(FARGS
, "Error initializing Intel MKL FFT; status=%d", status
);
174 gmx_fft_destroy(fft
);
190 gmx_fft_init_1d_real(gmx_fft_t
* pfft
,
192 gmx_fft_flag gmx_unused flags
)
200 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
205 if ( (fft
= (gmx_fft_t
)malloc(sizeof(struct gmx_fft
))) == NULL
)
210 /* Mark all handles invalid */
211 for (d
= 0; d
< 3; d
++)
213 fft
->inplace
[d
] = fft
->ooplace
[d
] = NULL
;
215 fft
->ooplace
[3] = NULL
;
217 status
= DftiCreateDescriptor(&fft
->inplace
[0], GMX_DFTI_PREC
, DFTI_REAL
, 1, (MKL_LONG
)nx
);
221 status
= DftiSetValue(fft
->inplace
[0], DFTI_PLACEMENT
, DFTI_INPLACE
);
226 status
= DftiCommitDescriptor(fft
->inplace
[0]);
232 status
= DftiCreateDescriptor(&fft
->ooplace
[0], GMX_DFTI_PREC
, DFTI_REAL
, 1, (MKL_LONG
)nx
);
237 status
= DftiSetValue(fft
->ooplace
[0], DFTI_PLACEMENT
, DFTI_NOT_INPLACE
);
242 status
= DftiCommitDescriptor(fft
->ooplace
[0]);
246 if (status
== DFTI_UNIMPLEMENTED
)
249 "The linked Intel MKL version (<6.0?) cannot do real FFTs.");
250 gmx_fft_destroy(fft
);
257 gmx_fatal(FARGS
, "Error initializing Intel MKL FFT; status=%d", status
);
258 gmx_fft_destroy(fft
);
274 gmx_fft_init_2d_real(gmx_fft_t
* pfft
,
277 gmx_fft_flag gmx_unused flags
)
287 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
292 if ( (fft
= (gmx_fft_t
)malloc(sizeof(struct gmx_fft
))) == NULL
)
299 /* Mark all handles invalid */
300 for (d
= 0; d
< 3; d
++)
302 fft
->inplace
[d
] = fft
->ooplace
[d
] = NULL
;
304 fft
->ooplace
[3] = NULL
;
306 /* Roll our own 2D real transform using multiple transforms in MKL,
307 * since the current MKL versions does not support our storage format,
308 * and all but the most recent don't even have 2D real FFTs.
312 status
= DftiCreateDescriptor(&fft
->inplace
[0], GMX_DFTI_PREC
, DFTI_COMPLEX
, 1, (MKL_LONG
)nx
);
320 (DftiSetValue(fft
->inplace
[0], DFTI_PLACEMENT
, DFTI_INPLACE
) ||
321 DftiSetValue(fft
->inplace
[0], DFTI_NUMBER_OF_TRANSFORMS
, nyc
) ||
322 DftiSetValue(fft
->inplace
[0], DFTI_INPUT_DISTANCE
, 1) ||
323 DftiSetValue(fft
->inplace
[0], DFTI_INPUT_STRIDES
, stride
) ||
324 DftiSetValue(fft
->inplace
[0], DFTI_OUTPUT_DISTANCE
, 1) ||
325 DftiSetValue(fft
->inplace
[0], DFTI_OUTPUT_STRIDES
, stride
));
330 status
= DftiCommitDescriptor(fft
->inplace
[0]);
333 /* Out-of-place X FFT */
336 status
= DftiCreateDescriptor(&(fft
->ooplace
[0]), GMX_DFTI_PREC
, DFTI_COMPLEX
, 1, (MKL_LONG
)nx
);
345 (DftiSetValue(fft
->ooplace
[0], DFTI_PLACEMENT
, DFTI_NOT_INPLACE
) ||
346 DftiSetValue(fft
->ooplace
[0], DFTI_NUMBER_OF_TRANSFORMS
, nyc
) ||
347 DftiSetValue(fft
->ooplace
[0], DFTI_INPUT_DISTANCE
, 1) ||
348 DftiSetValue(fft
->ooplace
[0], DFTI_INPUT_STRIDES
, stride
) ||
349 DftiSetValue(fft
->ooplace
[0], DFTI_OUTPUT_DISTANCE
, 1) ||
350 DftiSetValue(fft
->ooplace
[0], DFTI_OUTPUT_STRIDES
, stride
));
355 status
= DftiCommitDescriptor(fft
->ooplace
[0]);
362 status
= DftiCreateDescriptor(&fft
->inplace
[1], GMX_DFTI_PREC
, DFTI_REAL
, 1, (MKL_LONG
)ny
);
371 (DftiSetValue(fft
->inplace
[1], DFTI_PLACEMENT
, DFTI_INPLACE
) ||
372 DftiSetValue(fft
->inplace
[1], DFTI_NUMBER_OF_TRANSFORMS
, (MKL_LONG
)nx
) ||
373 DftiSetValue(fft
->inplace
[1], DFTI_INPUT_DISTANCE
, 2*nyc
) ||
374 DftiSetValue(fft
->inplace
[1], DFTI_INPUT_STRIDES
, stride
) ||
375 DftiSetValue(fft
->inplace
[1], DFTI_OUTPUT_DISTANCE
, 2*nyc
) ||
376 DftiSetValue(fft
->inplace
[1], DFTI_OUTPUT_STRIDES
, stride
) ||
377 DftiCommitDescriptor(fft
->inplace
[1]));
381 /* Out-of-place real-to-complex (affects output distance) Y FFT */
384 status
= DftiCreateDescriptor(&fft
->ooplace
[1], GMX_DFTI_PREC
, DFTI_REAL
, 1, (MKL_LONG
)ny
);
393 (DftiSetValue(fft
->ooplace
[1], DFTI_PLACEMENT
, DFTI_NOT_INPLACE
) ||
394 DftiSetValue(fft
->ooplace
[1], DFTI_NUMBER_OF_TRANSFORMS
, (MKL_LONG
)nx
) ||
395 DftiSetValue(fft
->ooplace
[1], DFTI_INPUT_DISTANCE
, (MKL_LONG
)ny
) ||
396 DftiSetValue(fft
->ooplace
[1], DFTI_INPUT_STRIDES
, stride
) ||
397 DftiSetValue(fft
->ooplace
[1], DFTI_OUTPUT_DISTANCE
, 2*nyc
) ||
398 DftiSetValue(fft
->ooplace
[1], DFTI_OUTPUT_STRIDES
, stride
) ||
399 DftiCommitDescriptor(fft
->ooplace
[1]));
403 /* Out-of-place complex-to-real (affects output distance) Y FFT */
406 status
= DftiCreateDescriptor(&fft
->ooplace
[2], GMX_DFTI_PREC
, DFTI_REAL
, 1, (MKL_LONG
)ny
);
415 (DftiSetValue(fft
->ooplace
[2], DFTI_PLACEMENT
, DFTI_NOT_INPLACE
) ||
416 DftiSetValue(fft
->ooplace
[2], DFTI_NUMBER_OF_TRANSFORMS
, (MKL_LONG
)nx
) ||
417 DftiSetValue(fft
->ooplace
[2], DFTI_INPUT_DISTANCE
, 2*nyc
) ||
418 DftiSetValue(fft
->ooplace
[2], DFTI_INPUT_STRIDES
, stride
) ||
419 DftiSetValue(fft
->ooplace
[2], DFTI_OUTPUT_DISTANCE
, (MKL_LONG
)ny
) ||
420 DftiSetValue(fft
->ooplace
[2], DFTI_OUTPUT_STRIDES
, stride
) ||
421 DftiCommitDescriptor(fft
->ooplace
[2]));
427 void *memory
= malloc(sizeof(t_complex
)*(nx
*(ny
/2+1)));
428 if (nullptr == memory
)
432 fft
->work
= static_cast<t_complex
*>(memory
);
437 gmx_fatal(FARGS
, "Error initializing Intel MKL FFT; status=%d", status
);
438 gmx_fft_destroy(fft
);
452 gmx_fft_1d(gmx_fft_t fft
,
453 enum gmx_fft_direction dir
,
457 int inplace
= (in_data
== out_data
);
460 if ( (fft
->real_fft
== 1) || (fft
->ndim
!= 1) ||
461 ((dir
!= GMX_FFT_FORWARD
) && (dir
!= GMX_FFT_BACKWARD
)) )
463 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
467 if (dir
== GMX_FFT_FORWARD
)
471 status
= DftiComputeForward(fft
->inplace
[0], in_data
);
475 status
= DftiComputeForward(fft
->ooplace
[0], in_data
, out_data
);
482 status
= DftiComputeBackward(fft
->inplace
[0], in_data
);
486 status
= DftiComputeBackward(fft
->ooplace
[0], in_data
, out_data
);
492 gmx_fatal(FARGS
, "Error executing Intel MKL FFT.");
502 gmx_fft_1d_real(gmx_fft_t fft
,
503 enum gmx_fft_direction dir
,
507 int inplace
= (in_data
== out_data
);
510 if ( (fft
->real_fft
!= 1) || (fft
->ndim
!= 1) ||
511 ((dir
!= GMX_FFT_REAL_TO_COMPLEX
) && (dir
!= GMX_FFT_COMPLEX_TO_REAL
)) )
513 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
517 if (dir
== GMX_FFT_REAL_TO_COMPLEX
)
521 status
= DftiComputeForward(fft
->inplace
[0], in_data
);
525 status
= DftiComputeForward(fft
->ooplace
[0], in_data
, out_data
);
532 status
= DftiComputeBackward(fft
->inplace
[0], in_data
);
536 status
= DftiComputeBackward(fft
->ooplace
[0], in_data
, out_data
);
542 gmx_fatal(FARGS
, "Error executing Intel MKL FFT.");
551 gmx_fft_2d_real(gmx_fft_t fft
,
552 enum gmx_fft_direction dir
,
556 int inplace
= (in_data
== out_data
);
559 if ( (fft
->real_fft
!= 1) || (fft
->ndim
!= 2) ||
560 ((dir
!= GMX_FFT_REAL_TO_COMPLEX
) && (dir
!= GMX_FFT_COMPLEX_TO_REAL
)) )
562 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
566 if (dir
== GMX_FFT_REAL_TO_COMPLEX
)
570 /* real-to-complex in Y dimension, in-place */
571 status
= DftiComputeForward(fft
->inplace
[1], in_data
);
573 /* complex-to-complex in X dimension, in-place */
576 status
= DftiComputeForward(fft
->inplace
[0], in_data
);
581 /* real-to-complex in Y dimension, in_data to out_data */
582 status
= DftiComputeForward(fft
->ooplace
[1], in_data
, out_data
);
584 /* complex-to-complex in X dimension, in-place to out_data */
587 status
= DftiComputeForward(fft
->inplace
[0], out_data
);
593 /* prior implementation was incorrect. See fft.cpp unit test */
594 gmx_incons("Complex -> Real is not supported by MKL.");
599 gmx_fatal(FARGS
, "Error executing Intel MKL FFT.");
607 gmx_fft_destroy(gmx_fft_t fft
)
613 for (d
= 0; d
< 3; d
++)
615 if (fft
->inplace
[d
] != NULL
)
617 DftiFreeDescriptor(&fft
->inplace
[d
]);
619 if (fft
->ooplace
[d
] != NULL
)
621 DftiFreeDescriptor(&fft
->ooplace
[d
]);
624 if (fft
->ooplace
[3] != NULL
)
626 DftiFreeDescriptor(&fft
->ooplace
[3]);
628 if (fft
->work
!= NULL
)
636 void gmx_fft_cleanup()