2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_MATH_H
36 #define GMX_SIMD_SCALAR_MATH_H
40 #include "gromacs/math/functions.h"
41 #include "gromacs/math/utilities.h"
42 #include "gromacs/simd/scalar/scalar.h"
44 /*! \libinternal \file
46 * \brief Scalar math functions mimicking GROMACS SIMD math functions
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
52 * It is important that these functions match the SIMD versions exactly in their
53 * arguments and template arguments so that overload resolution works correctly.
55 * \author Erik Lindahl <erik.lindahl@gmail.com>
58 * \ingroup module_simd
64 /*****************************************************************************
65 * Single-precision floating point math functions mimicking SIMD versions *
66 *****************************************************************************/
69 /*! \brief Composes single value with the magnitude of x and the sign of y.
71 * \param x Value to set sign for
72 * \param y Value used to set sign
73 * \return Magnitude of x, sign of y
75 * \note This function might be superficially meaningless, but it helps us to
76 * write templated SIMD/non-SIMD code. For clarity it should not be used
79 static inline float copysign(float x
, float y
)
81 return std::copysign(x
, y
);
84 // invsqrt(x) is already defined in math/functions.h
86 /*! \brief Calculate 1/sqrt(x) for two floats.
88 * \param x0 First argument, x0 must be positive - no argument checking.
89 * \param x1 Second argument, x1 must be positive - no argument checking.
90 * \param[out] out0 Result 1/sqrt(x0)
91 * \param[out] out1 Result 1/sqrt(x1)
93 * \note This function might be superficially meaningless, but it helps us to
94 * write templated SIMD/non-SIMD code. For clarity it should not be used
97 static inline void invsqrtPair(float x0
, float x1
, float* out0
, float* out1
)
103 /*! \brief Calculate 1/x for float.
105 * \param x Argument that must be nonzero. This routine does not check arguments.
106 * \return 1/x. Result is undefined if your argument was invalid.
108 * \note This function might be superficially meaningless, but it helps us to
109 * write templated SIMD/non-SIMD code. For clarity it should not be used
112 static inline float inv(float x
)
117 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
119 * This routine only evaluates 1/sqrt(x) if mask is true.
120 * Illegal values for a masked-out float will not lead to
121 * floating-point exceptions.
123 * \param x Argument that must be >0 if masked-in.
125 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
126 * entry was not masked, and 0.0 for masked-out entries.
128 * \note This function might be superficially meaningless, but it helps us to
129 * write templated SIMD/non-SIMD code. For clarity it should not be used
132 static inline float maskzInvsqrt(float x
, bool m
)
134 return m
? invsqrt(x
) : 0.0F
;
137 /*! \brief Calculate 1/x for masked entry of float.
139 * This routine only evaluates 1/x if mask is true.
140 * Illegal values for a masked-out float will not lead to
141 * floating-point exceptions.
143 * \param x Argument that must be nonzero if masked-in.
145 * \return 1/x. Result is undefined if your argument was invalid or
146 * entry was not masked, and 0.0 for masked-out entries.
148 * \note This function might be superficially meaningless, but it helps us to
149 * write templated SIMD/non-SIMD code. For clarity it should not be used
152 static inline float maskzInv(float x
, bool m
)
154 return m
? inv(x
) : 0.0F
;
157 /*! \brief Float sqrt(x). This is the square root.
159 * \param x Argument, should be >= 0.
160 * \result The square root of x. Undefined if argument is invalid.
162 * \note This function might be superficially meaningless, but it helps us to
163 * write templated SIMD/non-SIMD code. For clarity it should not be used
166 template<MathOptimization opt
= MathOptimization::Safe
>
167 static inline float sqrt(float x
)
172 /*! \brief Float cbrt(x). This is the cubic root.
174 * \param x Argument, should be >= 0.
175 * \result The cubic root of x. Undefined if argument is invalid.
177 * \note This function might be superficially meaningless, but it helps us to
178 * write templated SIMD/non-SIMD code. For clarity it should not be used
181 template<MathOptimization opt
= MathOptimization::Safe
>
182 static inline float cbrt(float x
)
187 /*! \brief Float log(x). This is the natural logarithm.
189 * \param x Argument, should be >0.
190 * \result The natural logarithm of x. Undefined if argument is invalid.
192 * \note This function might be superficially meaningless, but it helps us to
193 * write templated SIMD/non-SIMD code. For clarity it should not be used
196 static inline float log(float x
)
201 /*! \brief Float 2^x.
204 * \result 2^x. Undefined if input argument caused overflow.
206 * \note This function might be superficially meaningless, but it helps us to
207 * write templated SIMD/non-SIMD code. For clarity it should not be used
210 template<MathOptimization opt
= MathOptimization::Safe
>
211 static inline float exp2(float x
)
216 /*! \brief Float exp(x).
219 * \result exp(x). Undefined if input argument caused overflow.
221 * \note This function might be superficially meaningless, but it helps us to
222 * write templated SIMD/non-SIMD code. For clarity it should not be used
225 template<MathOptimization opt
= MathOptimization::Safe
>
226 static inline float exp(float x
)
231 /*! \brief Float erf(x).
236 * \note This function might be superficially meaningless, but it helps us to
237 * write templated SIMD/non-SIMD code. For clarity it should not be used
240 static inline float erf(float x
)
245 /*! \brief Float erfc(x).
250 * \note This function might be superficially meaningless, but it helps us to
251 * write templated SIMD/non-SIMD code. For clarity it should not be used
254 static inline float erfc(float x
)
260 /*! \brief Float sin \& cos.
262 * \param x The argument to evaluate sin/cos for
263 * \param[out] sinval Sin(x)
264 * \param[out] cosval Cos(x)
266 * \note This function might be superficially meaningless, but it helps us to
267 * write templated SIMD/non-SIMD code. For clarity it should not be used
270 static inline void sincos(float x
, float* sinval
, float* cosval
)
272 *sinval
= std::sin(x
);
273 *cosval
= std::cos(x
);
276 /*! \brief Float sin.
278 * \param x The argument to evaluate sin for
281 * \note This function might be superficially meaningless, but it helps us to
282 * write templated SIMD/non-SIMD code. For clarity it should not be used
285 static inline float sin(float x
)
290 /*! \brief Float cos.
292 * \param x The argument to evaluate cos for
295 * \note This function might be superficially meaningless, but it helps us to
296 * write templated SIMD/non-SIMD code. For clarity it should not be used
299 static inline float cos(float x
)
304 /*! \brief Float tan.
306 * \param x The argument to evaluate tan for
309 * \note This function might be superficially meaningless, but it helps us to
310 * write templated SIMD/non-SIMD code. For clarity it should not be used
313 static inline float tan(float x
)
318 /*! \brief float asin.
320 * \param x The argument to evaluate asin for
323 * \note This function might be superficially meaningless, but it helps us to
324 * write templated SIMD/non-SIMD code. For clarity it should not be used
327 static inline float asin(float x
)
332 /*! \brief Float acos.
334 * \param x The argument to evaluate acos for
337 * \note This function might be superficially meaningless, but it helps us to
338 * write templated SIMD/non-SIMD code. For clarity it should not be used
341 static inline float acos(float x
)
346 /*! \brief Float atan.
348 * \param x The argument to evaluate atan for
351 * \note This function might be superficially meaningless, but it helps us to
352 * write templated SIMD/non-SIMD code. For clarity it should not be used
355 static inline float atan(float x
)
360 /*! \brief Float atan2(y,x).
362 * \param y Y component of vector, any quartile
363 * \param x X component of vector, any quartile
366 * \note This function might be superficially meaningless, but it helps us to
367 * write templated SIMD/non-SIMD code. For clarity it should not be used
370 static inline float atan2(float y
, float x
)
372 return std::atan2(y
, x
);
375 /*! \brief Calculate the force correction due to PME analytically in float.
377 * See the SIMD version of this function for details.
379 * \param z2 input parameter
380 * \returns Correction to use on force
382 * \note This function might be superficially meaningless, but it helps us to
383 * write templated SIMD/non-SIMD code. For clarity it should not be used
386 static inline float pmeForceCorrection(float z2
)
388 const float FN6(-1.7357322914161492954e-8F
);
389 const float FN5(1.4703624142580877519e-6F
);
390 const float FN4(-0.000053401640219807709149F
);
391 const float FN3(0.0010054721316683106153F
);
392 const float FN2(-0.019278317264888380590F
);
393 const float FN1(0.069670166153766424023F
);
394 const float FN0(-0.75225204789749321333F
);
396 const float FD4(0.0011193462567257629232F
);
397 const float FD3(0.014866955030185295499F
);
398 const float FD2(0.11583842382862377919F
);
399 const float FD1(0.50736591960530292870F
);
400 const float FD0(1.0F
);
403 float polyFN0
, polyFN1
, polyFD0
, polyFD1
;
407 polyFD0
= fma(FD4
, z4
, FD2
);
408 polyFD1
= fma(FD3
, z4
, FD1
);
409 polyFD0
= fma(polyFD0
, z4
, FD0
);
410 polyFD0
= fma(polyFD1
, z2
, polyFD0
);
412 polyFN0
= fma(FN6
, z4
, FN4
);
413 polyFN1
= fma(FN5
, z4
, FN3
);
414 polyFN0
= fma(polyFN0
, z4
, FN2
);
415 polyFN1
= fma(polyFN1
, z4
, FN1
);
416 polyFN0
= fma(polyFN0
, z4
, FN0
);
417 polyFN0
= fma(polyFN1
, z2
, polyFN0
);
419 return polyFN0
/ polyFD0
;
422 /*! \brief Calculate the potential correction due to PME analytically in float.
424 * See the SIMD version of this function for details.
426 * \param z2 input parameter
427 * \returns Correction to use on potential.
429 * \note This function might be superficially meaningless, but it helps us to
430 * write templated SIMD/non-SIMD code. For clarity it should not be used
433 static inline float pmePotentialCorrection(float z2
)
435 const float VN6(1.9296833005951166339e-8F
);
436 const float VN5(-1.4213390571557850962e-6F
);
437 const float VN4(0.000041603292906656984871F
);
438 const float VN3(-0.00013134036773265025626F
);
439 const float VN2(0.038657983986041781264F
);
440 const float VN1(0.11285044772717598220F
);
441 const float VN0(1.1283802385263030286F
);
443 const float VD3(0.0066752224023576045451F
);
444 const float VD2(0.078647795836373922256F
);
445 const float VD1(0.43336185284710920150F
);
446 const float VD0(1.0F
);
449 float polyVN0
, polyVN1
, polyVD0
, polyVD1
;
453 polyVD1
= fma(VD3
, z4
, VD1
);
454 polyVD0
= fma(VD2
, z4
, VD0
);
455 polyVD0
= fma(polyVD1
, z2
, polyVD0
);
457 polyVN0
= fma(VN6
, z4
, VN4
);
458 polyVN1
= fma(VN5
, z4
, VN3
);
459 polyVN0
= fma(polyVN0
, z4
, VN2
);
460 polyVN1
= fma(polyVN1
, z4
, VN1
);
461 polyVN0
= fma(polyVN0
, z4
, VN0
);
462 polyVN0
= fma(polyVN1
, z2
, polyVN0
);
464 return polyVN0
/ polyVD0
;
467 /*****************************************************************************
468 * Double-precision floating point math functions mimicking SIMD versions *
469 *****************************************************************************/
472 /*! \brief Composes double value with the magnitude of x and the sign of y.
474 * \param x Value to set sign for
475 * \param y Value used to set sign
476 * \return Magnitude of x, sign of y
478 * \note This function might be superficially meaningless, but it helps us to
479 * write templated SIMD/non-SIMD code. For clarity it should not be used
482 static inline double copysign(double x
, double y
)
484 return std::copysign(x
, y
);
487 // invsqrt(x) is already defined in math/functions.h
489 /*! \brief Calculate 1/sqrt(x) for two doubles.
491 * \param x0 First argument, x0 must be positive - no argument checking.
492 * \param x1 Second argument, x1 must be positive - no argument checking.
493 * \param[out] out0 Result 1/sqrt(x0)
494 * \param[out] out1 Result 1/sqrt(x1)
496 * \note This function might be superficially meaningless, but it helps us to
497 * write templated SIMD/non-SIMD code. For clarity it should not be used
500 static inline void invsqrtPair(double x0
, double x1
, double* out0
, double* out1
)
506 /*! \brief Calculate 1/x for double.
508 * \param x Argument that must be nonzero. This routine does not check arguments.
509 * \return 1/x. Result is undefined if your argument was invalid.
511 * \note This function might be superficially meaningless, but it helps us to
512 * write templated SIMD/non-SIMD code. For clarity it should not be used
515 static inline double inv(double x
)
520 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
522 * This routine only evaluates 1/sqrt(x) if mask is true.
523 * Illegal values for a masked-out double will not lead to
524 * floating-point exceptions.
526 * \param x Argument that must be >0 if masked-in.
528 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
529 * entry was not masked, and 0.0 for masked-out entries.
531 * \note This function might be superficially meaningless, but it helps us to
532 * write templated SIMD/non-SIMD code. For clarity it should not be used
535 static inline double maskzInvsqrt(double x
, bool m
)
537 return m
? invsqrt(x
) : 0.0;
540 /*! \brief Calculate 1/x for masked entry of double.
542 * This routine only evaluates 1/x if mask is true.
543 * Illegal values for a masked-out double will not lead to
544 * floating-point exceptions.
546 * \param x Argument that must be nonzero if masked-in.
548 * \return 1/x. Result is undefined if your argument was invalid or
549 * entry was not masked, and 0.0 for masked-out entries.
551 * \note This function might be superficially meaningless, but it helps us to
552 * write templated SIMD/non-SIMD code. For clarity it should not be used
555 static inline double maskzInv(double x
, bool m
)
557 return m
? inv(x
) : 0.0;
560 /*! \brief Double sqrt(x). This is the square root.
562 * \param x Argument, should be >= 0.
563 * \result The square root of x. Undefined if argument is invalid.
565 * \note This function might be superficially meaningless, but it helps us to
566 * write templated SIMD/non-SIMD code. For clarity it should not be used
569 template<MathOptimization opt
= MathOptimization::Safe
>
570 static inline double sqrt(double x
)
575 /*! \brief Double cbrt(x). This is the cubic root.
577 * \param x Argument, should be >= 0.
578 * \result The cubic root of x. Undefined if argument is invalid.
580 * \note This function might be superficially meaningless, but it helps us to
581 * write templated SIMD/non-SIMD code. For clarity it should not be used
584 template<MathOptimization opt
= MathOptimization::Safe
>
585 static inline double cbrt(double x
)
590 /*! \brief Double log(x). This is the natural logarithm.
592 * \param x Argument, should be >0.
593 * \result The natural logarithm of x. Undefined if argument is invalid.
595 * \note This function might be superficially meaningless, but it helps us to
596 * write templated SIMD/non-SIMD code. For clarity it should not be used
599 static inline double log(double x
)
604 /*! \brief Double 2^x.
607 * \result 2^x. Undefined if input argument caused overflow.
609 * \note This function might be superficially meaningless, but it helps us to
610 * write templated SIMD/non-SIMD code. For clarity it should not be used
613 template<MathOptimization opt
= MathOptimization::Safe
>
614 static inline double exp2(double x
)
619 /*! \brief Double exp(x).
622 * \result exp(x). Undefined if input argument caused overflow.
624 * \note This function might be superficially meaningless, but it helps us to
625 * write templated SIMD/non-SIMD code. For clarity it should not be used
628 template<MathOptimization opt
= MathOptimization::Safe
>
629 static inline double exp(double x
)
634 /*! \brief Double erf(x).
639 * \note This function might be superficially meaningless, but it helps us to
640 * write templated SIMD/non-SIMD code. For clarity it should not be used
643 static inline double erf(double x
)
648 /*! \brief Double erfc(x).
653 * \note This function might be superficially meaningless, but it helps us to
654 * write templated SIMD/non-SIMD code. For clarity it should not be used
657 static inline double erfc(double x
)
663 /*! \brief Double sin \& cos.
665 * \param x The argument to evaluate sin/cos for
666 * \param[out] sinval Sin(x)
667 * \param[out] cosval Cos(x)
669 * \note This function might be superficially meaningless, but it helps us to
670 * write templated SIMD/non-SIMD code. For clarity it should not be used
673 static inline void sincos(double x
, double* sinval
, double* cosval
)
675 *sinval
= std::sin(x
);
676 *cosval
= std::cos(x
);
679 /*! \brief Double sin.
681 * \param x The argument to evaluate sin for
684 * \note This function might be superficially meaningless, but it helps us to
685 * write templated SIMD/non-SIMD code. For clarity it should not be used
688 static inline double sin(double x
)
693 /*! \brief Double cos.
695 * \param x The argument to evaluate cos for
698 * \note This function might be superficially meaningless, but it helps us to
699 * write templated SIMD/non-SIMD code. For clarity it should not be used
702 static inline double cos(double x
)
707 /*! \brief Double tan.
709 * \param x The argument to evaluate tan for
712 * \note This function might be superficially meaningless, but it helps us to
713 * write templated SIMD/non-SIMD code. For clarity it should not be used
716 static inline double tan(double x
)
721 /*! \brief Double asin.
723 * \param x The argument to evaluate asin for
726 * \note This function might be superficially meaningless, but it helps us to
727 * write templated SIMD/non-SIMD code. For clarity it should not be used
730 static inline double asin(double x
)
735 /*! \brief Double acos.
737 * \param x The argument to evaluate acos for
740 * \note This function might be superficially meaningless, but it helps us to
741 * write templated SIMD/non-SIMD code. For clarity it should not be used
744 static inline double acos(double x
)
749 /*! \brief Double atan.
751 * \param x The argument to evaluate atan for
754 * \note This function might be superficially meaningless, but it helps us to
755 * write templated SIMD/non-SIMD code. For clarity it should not be used
758 static inline double atan(double x
)
763 /*! \brief Double atan2(y,x).
765 * \param y Y component of vector, any quartile
766 * \param x X component of vector, any quartile
769 * \note This function might be superficially meaningless, but it helps us to
770 * write templated SIMD/non-SIMD code. For clarity it should not be used
773 static inline double atan2(double y
, double x
)
775 return std::atan2(y
, x
);
778 /*! \brief Calculate the force correction due to PME analytically in double.
780 * See the SIMD version of this function for details.
782 * \param z2 input parameter
783 * \returns Correction to use on force
785 * \note This function might be superficially meaningless, but it helps us to
786 * write templated SIMD/non-SIMD code. For clarity it should not be used
789 static inline double pmeForceCorrection(double z2
)
791 const double FN10(-8.0072854618360083154e-14);
792 const double FN9(1.1859116242260148027e-11);
793 const double FN8(-8.1490406329798423616e-10);
794 const double FN7(3.4404793543907847655e-8);
795 const double FN6(-9.9471420832602741006e-7);
796 const double FN5(0.000020740315999115847456);
797 const double FN4(-0.00031991745139313364005);
798 const double FN3(0.0035074449373659008203);
799 const double FN2(-0.031750380176100813405);
800 const double FN1(0.13884101728898463426);
801 const double FN0(-0.75225277815249618847);
803 const double FD5(0.000016009278224355026701);
804 const double FD4(0.00051055686934806966046);
805 const double FD3(0.0081803507497974289008);
806 const double FD2(0.077181146026670287235);
807 const double FD1(0.41543303143712535988);
808 const double FD0(1.0);
811 double polyFN0
, polyFN1
, polyFD0
, polyFD1
;
815 polyFD1
= fma(FD5
, z4
, FD3
);
816 polyFD1
= fma(polyFD1
, z4
, FD1
);
817 polyFD1
= polyFD1
* z2
;
818 polyFD0
= fma(FD4
, z4
, FD2
);
819 polyFD0
= fma(polyFD0
, z4
, FD0
);
820 polyFD0
= polyFD0
+ polyFD1
;
822 polyFD0
= inv(polyFD0
);
824 polyFN0
= fma(FN10
, z4
, FN8
);
825 polyFN0
= fma(polyFN0
, z4
, FN6
);
826 polyFN0
= fma(polyFN0
, z4
, FN4
);
827 polyFN0
= fma(polyFN0
, z4
, FN2
);
828 polyFN0
= fma(polyFN0
, z4
, FN0
);
829 polyFN1
= fma(FN9
, z4
, FN7
);
830 polyFN1
= fma(polyFN1
, z4
, FN5
);
831 polyFN1
= fma(polyFN1
, z4
, FN3
);
832 polyFN1
= fma(polyFN1
, z4
, FN1
);
833 polyFN0
= fma(polyFN1
, z2
, polyFN0
);
835 return polyFN0
* polyFD0
;
838 /*! \brief Calculate the potential correction due to PME analytically in double.
840 * See the SIMD version of this function for details.
842 * \param z2 input parameter
843 * \returns Correction to use on potential.
845 * \note This function might be superficially meaningless, but it helps us to
846 * write templated SIMD/non-SIMD code. For clarity it should not be used
849 static inline double pmePotentialCorrection(double z2
)
851 const double VN9(-9.3723776169321855475e-13);
852 const double VN8(1.2280156762674215741e-10);
853 const double VN7(-7.3562157912251309487e-9);
854 const double VN6(2.6215886208032517509e-7);
855 const double VN5(-4.9532491651265819499e-6);
856 const double VN4(0.00025907400778966060389);
857 const double VN3(0.0010585044856156469792);
858 const double VN2(0.045247661136833092885);
859 const double VN1(0.11643931522926034421);
860 const double VN0(1.1283791671726767970);
862 const double VD5(0.000021784709867336150342);
863 const double VD4(0.00064293662010911388448);
864 const double VD3(0.0096311444822588683504);
865 const double VD2(0.085608012351550627051);
866 const double VD1(0.43652499166614811084);
867 const double VD0(1.0);
870 double polyVN0
, polyVN1
, polyVD0
, polyVD1
;
874 polyVD1
= fma(VD5
, z4
, VD3
);
875 polyVD0
= fma(VD4
, z4
, VD2
);
876 polyVD1
= fma(polyVD1
, z4
, VD1
);
877 polyVD0
= fma(polyVD0
, z4
, VD0
);
878 polyVD0
= fma(polyVD1
, z2
, polyVD0
);
880 polyVD0
= inv(polyVD0
);
882 polyVN1
= fma(VN9
, z4
, VN7
);
883 polyVN0
= fma(VN8
, z4
, VN6
);
884 polyVN1
= fma(polyVN1
, z4
, VN5
);
885 polyVN0
= fma(polyVN0
, z4
, VN4
);
886 polyVN1
= fma(polyVN1
, z4
, VN3
);
887 polyVN0
= fma(polyVN0
, z4
, VN2
);
888 polyVN1
= fma(polyVN1
, z4
, VN1
);
889 polyVN0
= fma(polyVN0
, z4
, VN0
);
890 polyVN0
= fma(polyVN1
, z2
, polyVN0
);
892 return polyVN0
* polyVD0
;
896 /*****************************************************************************
897 * Floating point math functions mimicking SIMD versions. *
898 * Double precision data, single precision accuracy. *
899 *****************************************************************************/
902 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
904 * \param x Argument that must be >0. This routine does not check arguments.
905 * \return 1/sqrt(x). Result is undefined if your argument was invalid.
907 * \note This function might be superficially meaningless, but it helps us to
908 * write templated SIMD/non-SIMD code. For clarity it should not be used
911 static inline double invsqrtSingleAccuracy(double x
)
913 return invsqrt(static_cast<float>(x
));
916 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
918 * \param x0 First argument, x0 must be positive - no argument checking.
919 * \param x1 Second argument, x1 must be positive - no argument checking.
920 * \param[out] out0 Result 1/sqrt(x0)
921 * \param[out] out1 Result 1/sqrt(x1)
923 * \note This function might be superficially meaningless, but it helps us to
924 * write templated SIMD/non-SIMD code. For clarity it should not be used
927 static inline void invsqrtPairSingleAccuracy(double x0
, double x1
, double* out0
, double* out1
)
929 *out0
= invsqrt(static_cast<float>(x0
));
930 *out1
= invsqrt(static_cast<float>(x1
));
933 /*! \brief Calculate 1/x for double, but with single accuracy.
935 * \param x Argument that must be nonzero. This routine does not check arguments.
936 * \return 1/x. Result is undefined if your argument was invalid.
938 * \note This function might be superficially meaningless, but it helps us to
939 * write templated SIMD/non-SIMD code. For clarity it should not be used
942 static inline double invSingleAccuracy(double x
)
947 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
949 * This routine only evaluates 1/sqrt(x) if mask is true.
950 * Illegal values for a masked-out double will not lead to
951 * floating-point exceptions.
953 * \param x Argument that must be >0 if masked-in.
955 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
956 * entry was not masked, and 0.0 for masked-out entries.
958 * \note This function might be superficially meaningless, but it helps us to
959 * write templated SIMD/non-SIMD code. For clarity it should not be used
962 static inline double maskzInvsqrtSingleAccuracy(double x
, bool m
)
964 return m
? invsqrtSingleAccuracy(x
) : 0.0;
967 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
969 * This routine only evaluates 1/x if mask is true.
970 * Illegal values for a masked-out double will not lead to
971 * floating-point exceptions.
973 * \param x Argument that must be nonzero if masked-in.
975 * \return 1/x. Result is undefined if your argument was invalid or
976 * entry was not masked, and 0.0 for masked-out entries.
978 * \note This function might be superficially meaningless, but it helps us to
979 * write templated SIMD/non-SIMD code. For clarity it should not be used
982 static inline double maskzInvSingleAccuracy(double x
, bool m
)
984 return m
? invSingleAccuracy(x
) : 0.0;
987 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
989 * \param x Argument that must be >=0.
992 * \note This function might be superficially meaningless, but it helps us to
993 * write templated SIMD/non-SIMD code. For clarity it should not be used
996 static inline double sqrtSingleAccuracy(double x
)
998 return std::sqrt(static_cast<float>(x
));
1001 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
1003 * \param x Argument, should be >0.
1004 * \result The natural logarithm of x. Undefined if argument is invalid.
1006 * \note This function might be superficially meaningless, but it helps us to
1007 * write templated SIMD/non-SIMD code. For clarity it should not be used
1008 * outside such code.
1010 static inline double logSingleAccuracy(double x
)
1012 return std::log(static_cast<float>(x
));
1015 /*! \brief Double 2^x, but with single accuracy.
1017 * \param x Argument.
1018 * \result 2^x. Undefined if input argument caused overflow.
1020 * \note This function might be superficially meaningless, but it helps us to
1021 * write templated SIMD/non-SIMD code. For clarity it should not be used
1022 * outside such code.
1024 static inline double exp2SingleAccuracy(double x
)
1026 return std::exp2(static_cast<float>(x
));
1029 /*! \brief Double exp(x), but with single accuracy.
1031 * \param x Argument.
1032 * \result exp(x). Undefined if input argument caused overflow.
1034 * \note This function might be superficially meaningless, but it helps us to
1035 * write templated SIMD/non-SIMD code. For clarity it should not be used
1036 * outside such code.
1038 static inline double expSingleAccuracy(double x
)
1040 return std::exp(static_cast<float>(x
));
1043 /*! \brief Double erf(x), but with single accuracy.
1045 * \param x Argument.
1048 * \note This function might be superficially meaningless, but it helps us to
1049 * write templated SIMD/non-SIMD code. For clarity it should not be used
1050 * outside such code.
1052 static inline double erfSingleAccuracy(double x
)
1054 return std::erf(static_cast<float>(x
));
1057 /*! \brief Double erfc(x), but with single accuracy.
1059 * \param x Argument.
1062 * \note This function might be superficially meaningless, but it helps us to
1063 * write templated SIMD/non-SIMD code. For clarity it should not be used
1064 * outside such code.
1066 static inline double erfcSingleAccuracy(double x
)
1068 return std::erfc(static_cast<float>(x
));
1072 /*! \brief Double sin \& cos, but with single accuracy.
1074 * \param x The argument to evaluate sin/cos for
1075 * \param[out] sinval Sin(x)
1076 * \param[out] cosval Cos(x)
1078 * \note This function might be superficially meaningless, but it helps us to
1079 * write templated SIMD/non-SIMD code. For clarity it should not be used
1080 * outside such code.
1082 static inline void sincosSingleAccuracy(double x
, double* sinval
, double* cosval
)
1084 // There is no single-precision sincos guaranteed in C++11, so use
1085 // separate functions and hope the compiler optimizes it for us.
1086 *sinval
= std::sin(static_cast<float>(x
));
1087 *cosval
= std::cos(static_cast<float>(x
));
1090 /*! \brief Double sin, but with single accuracy.
1092 * \param x The argument to evaluate sin for
1095 * \note This function might be superficially meaningless, but it helps us to
1096 * write templated SIMD/non-SIMD code. For clarity it should not be used
1097 * outside such code.
1099 static inline double sinSingleAccuracy(double x
)
1101 return std::sin(static_cast<float>(x
));
1104 /*! \brief Double cos, but with single accuracy.
1106 * \param x The argument to evaluate cos for
1109 * \note This function might be superficially meaningless, but it helps us to
1110 * write templated SIMD/non-SIMD code. For clarity it should not be used
1111 * outside such code.
1113 static inline double cosSingleAccuracy(double x
)
1115 return std::cos(static_cast<float>(x
));
1118 /*! \brief Double tan, but with single accuracy.
1120 * \param x The argument to evaluate tan for
1123 * \note This function might be superficially meaningless, but it helps us to
1124 * write templated SIMD/non-SIMD code. For clarity it should not be used
1125 * outside such code.
1127 static inline double tanSingleAccuracy(double x
)
1129 return std::tan(static_cast<float>(x
));
1132 /*! \brief Double asin, but with single accuracy.
1134 * \param x The argument to evaluate asin for
1137 * \note This function might be superficially meaningless, but it helps us to
1138 * write templated SIMD/non-SIMD code. For clarity it should not be used
1139 * outside such code.
1141 static inline double asinSingleAccuracy(double x
)
1143 return std::asin(static_cast<float>(x
));
1146 /*! \brief Double acos, but with single accuracy.
1148 * \param x The argument to evaluate acos for
1151 * \note This function might be superficially meaningless, but it helps us to
1152 * write templated SIMD/non-SIMD code. For clarity it should not be used
1153 * outside such code.
1155 static inline double acosSingleAccuracy(double x
)
1157 return std::acos(static_cast<float>(x
));
1160 /*! \brief Double atan, but with single accuracy.
1162 * \param x The argument to evaluate atan for
1165 * \note This function might be superficially meaningless, but it helps us to
1166 * write templated SIMD/non-SIMD code. For clarity it should not be used
1167 * outside such code.
1169 static inline double atanSingleAccuracy(double x
)
1171 return std::atan(static_cast<float>(x
));
1174 /*! \brief Double atan2(y,x), but with single accuracy.
1176 * \param y Y component of vector, any quartile
1177 * \param x X component of vector, any quartile
1180 * \note This function might be superficially meaningless, but it helps us to
1181 * write templated SIMD/non-SIMD code. For clarity it should not be used
1182 * outside such code.
1184 static inline double atan2SingleAccuracy(double y
, double x
)
1186 return std::atan2(static_cast<float>(y
), static_cast<float>(x
));
1189 /*! \brief Force correction due to PME in double, but with single accuracy.
1191 * See the SIMD version of this function for details.
1193 * \param z2 input parameter
1194 * \returns Correction to use on force
1196 * \note This function might be superficially meaningless, but it helps us to
1197 * write templated SIMD/non-SIMD code. For clarity it should not be used
1198 * outside such code.
1200 static inline double pmeForceCorrectionSingleAccuracy(double z2
)
1202 const float FN6(-1.7357322914161492954e-8F
);
1203 const float FN5(1.4703624142580877519e-6F
);
1204 const float FN4(-0.000053401640219807709149F
);
1205 const float FN3(0.0010054721316683106153F
);
1206 const float FN2(-0.019278317264888380590F
);
1207 const float FN1(0.069670166153766424023F
);
1208 const float FN0(-0.75225204789749321333F
);
1210 const float FD4(0.0011193462567257629232F
);
1211 const float FD3(0.014866955030185295499F
);
1212 const float FD2(0.11583842382862377919F
);
1213 const float FD1(0.50736591960530292870F
);
1214 const float FD0(1.0F
);
1217 float polyFN0
, polyFN1
, polyFD0
, polyFD1
;
1223 polyFD0
= fma(FD4
, z4
, FD2
);
1224 polyFD1
= fma(FD3
, z4
, FD1
);
1225 polyFD0
= fma(polyFD0
, z4
, FD0
);
1226 polyFD0
= fma(polyFD1
, z2f
, polyFD0
);
1228 polyFN0
= fma(FN6
, z4
, FN4
);
1229 polyFN1
= fma(FN5
, z4
, FN3
);
1230 polyFN0
= fma(polyFN0
, z4
, FN2
);
1231 polyFN1
= fma(polyFN1
, z4
, FN1
);
1232 polyFN0
= fma(polyFN0
, z4
, FN0
);
1233 polyFN0
= fma(polyFN1
, z2f
, polyFN0
);
1235 return polyFN0
/ polyFD0
;
1238 /*! \brief Potential correction due to PME in double, but with single accuracy.
1240 * See the SIMD version of this function for details.
1242 * \param z2 input parameter
1243 * \returns Correction to use on potential.
1245 * \note This function might be superficially meaningless, but it helps us to
1246 * write templated SIMD/non-SIMD code. For clarity it should not be used
1247 * outside such code.
1249 static inline double pmePotentialCorrectionSingleAccuracy(double z2
)
1251 const float VN6(1.9296833005951166339e-8F
);
1252 const float VN5(-1.4213390571557850962e-6F
);
1253 const float VN4(0.000041603292906656984871F
);
1254 const float VN3(-0.00013134036773265025626F
);
1255 const float VN2(0.038657983986041781264F
);
1256 const float VN1(0.11285044772717598220F
);
1257 const float VN0(1.1283802385263030286F
);
1259 const float VD3(0.0066752224023576045451F
);
1260 const float VD2(0.078647795836373922256F
);
1261 const float VD1(0.43336185284710920150F
);
1262 const float VD0(1.0F
);
1265 float polyVN0
, polyVN1
, polyVD0
, polyVD1
;
1271 polyVD1
= fma(VD3
, z4
, VD1
);
1272 polyVD0
= fma(VD2
, z4
, VD0
);
1273 polyVD0
= fma(polyVD1
, z2f
, polyVD0
);
1275 polyVN0
= fma(VN6
, z4
, VN4
);
1276 polyVN1
= fma(VN5
, z4
, VN3
);
1277 polyVN0
= fma(polyVN0
, z4
, VN2
);
1278 polyVN1
= fma(polyVN1
, z4
, VN1
);
1279 polyVN0
= fma(polyVN0
, z4
, VN0
);
1280 polyVN0
= fma(polyVN1
, z2f
, polyVN0
);
1282 return polyVN0
/ polyVD0
;
1289 #endif // GMX_SIMD_SCALAR_MATH_H