2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_SIMD_SCALAR_MATH_H
36 #define GMX_SIMD_SCALAR_MATH_H
40 #include "gromacs/math/functions.h"
41 #include "gromacs/math/utilities.h"
42 #include "gromacs/simd/scalar/scalar.h"
44 /*! \libinternal \file
46 * \brief Scalar math functions mimicking GROMACS SIMD math functions
48 * These versions make it possible to write functions that are templated with
49 * either a SIMD or scalar type. While some of these functions might not appear
50 * SIMD-specific, we have placed them here because the only reason to use these
51 * instead of generic function is in templated combined SIMD/non-SIMD code.
52 * It is important that these functions match the SIMD versions exactly in their
53 * arguments and template arguments so that overload resolution works correctly.
55 * \author Erik Lindahl <erik.lindahl@gmail.com>
58 * \ingroup module_simd
64 /*****************************************************************************
65 * Single-precision floating point math functions mimicking SIMD versions *
66 *****************************************************************************/
69 /*! \brief Composes single value with the magnitude of x and the sign of y.
71 * \param x Value to set sign for
72 * \param y Value used to set sign
73 * \return Magnitude of x, sign of y
75 * \note This function might be superficially meaningless, but it helps us to
76 * write templated SIMD/non-SIMD code. For clarity it should not be used
80 copysign(float x
, float y
)
82 return std::copysign(x
, y
);
85 // invsqrt(x) is already defined in math/functions.h
87 /*! \brief Calculate 1/sqrt(x) for two floats.
89 * \param x0 First argument, x0 must be positive - no argument checking.
90 * \param x1 Second argument, x1 must be positive - no argument checking.
91 * \param[out] out0 Result 1/sqrt(x0)
92 * \param[out] out1 Result 1/sqrt(x1)
94 * \note This function might be superficially meaningless, but it helps us to
95 * write templated SIMD/non-SIMD code. For clarity it should not be used
99 invsqrtPair(float x0
, float x1
,
100 float *out0
, float *out1
)
106 /*! \brief Calculate 1/x for float.
108 * \param x Argument that must be nonzero. This routine does not check arguments.
109 * \return 1/x. Result is undefined if your argument was invalid.
111 * \note This function might be superficially meaningless, but it helps us to
112 * write templated SIMD/non-SIMD code. For clarity it should not be used
121 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
123 * This routine only evaluates 1/sqrt(x) if mask is true.
124 * Illegal values for a masked-out float will not lead to
125 * floating-point exceptions.
127 * \param x Argument that must be >0 if masked-in.
129 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
130 * entry was not masked, and 0.0 for masked-out entries.
132 * \note This function might be superficially meaningless, but it helps us to
133 * write templated SIMD/non-SIMD code. For clarity it should not be used
137 maskzInvsqrt(float x
, bool m
)
139 return m
? invsqrt(x
) : 0.0f
;
142 /*! \brief Calculate 1/x for masked entry of float.
144 * This routine only evaluates 1/x if mask is true.
145 * Illegal values for a masked-out float will not lead to
146 * floating-point exceptions.
148 * \param x Argument that must be nonzero if masked-in.
150 * \return 1/x. Result is undefined if your argument was invalid or
151 * entry was not masked, and 0.0 for masked-out entries.
153 * \note This function might be superficially meaningless, but it helps us to
154 * write templated SIMD/non-SIMD code. For clarity it should not be used
158 maskzInv(float x
, bool m
)
160 return m
? inv(x
) : 0.0f
;
163 /*! \brief Float sqrt(x). This is the square root.
165 * \param x Argument, should be >= 0.
166 * \result The square root of x. Undefined if argument is invalid.
168 * \note This function might be superficially meaningless, but it helps us to
169 * write templated SIMD/non-SIMD code. For clarity it should not be used
172 template <MathOptimization opt
= MathOptimization::Safe
>
179 /*! \brief Float log(x). This is the natural logarithm.
181 * \param x Argument, should be >0.
182 * \result The natural logarithm of x. Undefined if argument is invalid.
184 * \note This function might be superficially meaningless, but it helps us to
185 * write templated SIMD/non-SIMD code. For clarity it should not be used
194 /*! \brief Float 2^x.
197 * \result 2^x. Undefined if input argument caused overflow.
199 * \note This function might be superficially meaningless, but it helps us to
200 * write templated SIMD/non-SIMD code. For clarity it should not be used
203 template <MathOptimization opt
= MathOptimization::Safe
>
210 /*! \brief Float exp(x).
213 * \result exp(x). Undefined if input argument caused overflow.
215 * \note This function might be superficially meaningless, but it helps us to
216 * write templated SIMD/non-SIMD code. For clarity it should not be used
219 template <MathOptimization opt
= MathOptimization::Safe
>
226 /*! \brief Float erf(x).
231 * \note This function might be superficially meaningless, but it helps us to
232 * write templated SIMD/non-SIMD code. For clarity it should not be used
241 /*! \brief Float erfc(x).
246 * \note This function might be superficially meaningless, but it helps us to
247 * write templated SIMD/non-SIMD code. For clarity it should not be used
257 /*! \brief Float sin \& cos.
259 * \param x The argument to evaluate sin/cos for
260 * \param[out] sinval Sin(x)
261 * \param[out] cosval Cos(x)
263 * \note This function might be superficially meaningless, but it helps us to
264 * write templated SIMD/non-SIMD code. For clarity it should not be used
268 sincos(float x
, float *sinval
, float *cosval
)
270 *sinval
= std::sin(x
);
271 *cosval
= std::cos(x
);
274 /*! \brief Float sin.
276 * \param x The argument to evaluate sin for
279 * \note This function might be superficially meaningless, but it helps us to
280 * write templated SIMD/non-SIMD code. For clarity it should not be used
289 /*! \brief Float cos.
291 * \param x The argument to evaluate cos for
294 * \note This function might be superficially meaningless, but it helps us to
295 * write templated SIMD/non-SIMD code. For clarity it should not be used
304 /*! \brief Float tan.
306 * \param x The argument to evaluate tan for
309 * \note This function might be superficially meaningless, but it helps us to
310 * write templated SIMD/non-SIMD code. For clarity it should not be used
319 /*! \brief float asin.
321 * \param x The argument to evaluate asin for
324 * \note This function might be superficially meaningless, but it helps us to
325 * write templated SIMD/non-SIMD code. For clarity it should not be used
334 /*! \brief Float acos.
336 * \param x The argument to evaluate acos for
339 * \note This function might be superficially meaningless, but it helps us to
340 * write templated SIMD/non-SIMD code. For clarity it should not be used
349 /*! \brief Float atan.
351 * \param x The argument to evaluate atan for
354 * \note This function might be superficially meaningless, but it helps us to
355 * write templated SIMD/non-SIMD code. For clarity it should not be used
364 /*! \brief Float atan2(y,x).
366 * \param y Y component of vector, any quartile
367 * \param x X component of vector, any quartile
370 * \note This function might be superficially meaningless, but it helps us to
371 * write templated SIMD/non-SIMD code. For clarity it should not be used
375 atan2(float y
, float x
)
377 return std::atan2(y
, x
);
380 /*! \brief Calculate the force correction due to PME analytically in float.
382 * See the SIMD version of this function for details.
384 * \param z2 input parameter
385 * \returns Correction to use on force
387 * \note This function might be superficially meaningless, but it helps us to
388 * write templated SIMD/non-SIMD code. For clarity it should not be used
392 pmeForceCorrection(float z2
)
394 const float FN6(-1.7357322914161492954e-8f
);
395 const float FN5(1.4703624142580877519e-6f
);
396 const float FN4(-0.000053401640219807709149f
);
397 const float FN3(0.0010054721316683106153f
);
398 const float FN2(-0.019278317264888380590f
);
399 const float FN1(0.069670166153766424023f
);
400 const float FN0(-0.75225204789749321333f
);
402 const float FD4(0.0011193462567257629232f
);
403 const float FD3(0.014866955030185295499f
);
404 const float FD2(0.11583842382862377919f
);
405 const float FD1(0.50736591960530292870f
);
406 const float FD0(1.0f
);
409 float polyFN0
, polyFN1
, polyFD0
, polyFD1
;
413 polyFD0
= fma(FD4
, z4
, FD2
);
414 polyFD1
= fma(FD3
, z4
, FD1
);
415 polyFD0
= fma(polyFD0
, z4
, FD0
);
416 polyFD0
= fma(polyFD1
, z2
, polyFD0
);
418 polyFN0
= fma(FN6
, z4
, FN4
);
419 polyFN1
= fma(FN5
, z4
, FN3
);
420 polyFN0
= fma(polyFN0
, z4
, FN2
);
421 polyFN1
= fma(polyFN1
, z4
, FN1
);
422 polyFN0
= fma(polyFN0
, z4
, FN0
);
423 polyFN0
= fma(polyFN1
, z2
, polyFN0
);
425 return polyFN0
/ polyFD0
;
428 /*! \brief Calculate the potential correction due to PME analytically in float.
430 * See the SIMD version of this function for details.
432 * \param z2 input parameter
433 * \returns Correction to use on potential.
435 * \note This function might be superficially meaningless, but it helps us to
436 * write templated SIMD/non-SIMD code. For clarity it should not be used
440 pmePotentialCorrection(float z2
)
442 const float VN6(1.9296833005951166339e-8f
);
443 const float VN5(-1.4213390571557850962e-6f
);
444 const float VN4(0.000041603292906656984871f
);
445 const float VN3(-0.00013134036773265025626f
);
446 const float VN2(0.038657983986041781264f
);
447 const float VN1(0.11285044772717598220f
);
448 const float VN0(1.1283802385263030286f
);
450 const float VD3(0.0066752224023576045451f
);
451 const float VD2(0.078647795836373922256f
);
452 const float VD1(0.43336185284710920150f
);
453 const float VD0(1.0f
);
456 float polyVN0
, polyVN1
, polyVD0
, polyVD1
;
460 polyVD1
= fma(VD3
, z4
, VD1
);
461 polyVD0
= fma(VD2
, z4
, VD0
);
462 polyVD0
= fma(polyVD1
, z2
, polyVD0
);
464 polyVN0
= fma(VN6
, z4
, VN4
);
465 polyVN1
= fma(VN5
, z4
, VN3
);
466 polyVN0
= fma(polyVN0
, z4
, VN2
);
467 polyVN1
= fma(polyVN1
, z4
, VN1
);
468 polyVN0
= fma(polyVN0
, z4
, VN0
);
469 polyVN0
= fma(polyVN1
, z2
, polyVN0
);
471 return polyVN0
/ polyVD0
;
474 /*****************************************************************************
475 * Double-precision floating point math functions mimicking SIMD versions *
476 *****************************************************************************/
479 /*! \brief Composes double value with the magnitude of x and the sign of y.
481 * \param x Value to set sign for
482 * \param y Value used to set sign
483 * \return Magnitude of x, sign of y
485 * \note This function might be superficially meaningless, but it helps us to
486 * write templated SIMD/non-SIMD code. For clarity it should not be used
490 copysign(double x
, double y
)
492 return std::copysign(x
, y
);
495 // invsqrt(x) is already defined in math/functions.h
497 /*! \brief Calculate 1/sqrt(x) for two doubles.
499 * \param x0 First argument, x0 must be positive - no argument checking.
500 * \param x1 Second argument, x1 must be positive - no argument checking.
501 * \param[out] out0 Result 1/sqrt(x0)
502 * \param[out] out1 Result 1/sqrt(x1)
504 * \note This function might be superficially meaningless, but it helps us to
505 * write templated SIMD/non-SIMD code. For clarity it should not be used
509 invsqrtPair(double x0
, double x1
,
510 double *out0
, double *out1
)
516 /*! \brief Calculate 1/x for double.
518 * \param x Argument that must be nonzero. This routine does not check arguments.
519 * \return 1/x. Result is undefined if your argument was invalid.
521 * \note This function might be superficially meaningless, but it helps us to
522 * write templated SIMD/non-SIMD code. For clarity it should not be used
531 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
533 * This routine only evaluates 1/sqrt(x) if mask is true.
534 * Illegal values for a masked-out double will not lead to
535 * floating-point exceptions.
537 * \param x Argument that must be >0 if masked-in.
539 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
540 * entry was not masked, and 0.0 for masked-out entries.
542 * \note This function might be superficially meaningless, but it helps us to
543 * write templated SIMD/non-SIMD code. For clarity it should not be used
547 maskzInvsqrt(double x
, bool m
)
549 return m
? invsqrt(x
) : 0.0;
552 /*! \brief Calculate 1/x for masked entry of double.
554 * This routine only evaluates 1/x if mask is true.
555 * Illegal values for a masked-out double will not lead to
556 * floating-point exceptions.
558 * \param x Argument that must be nonzero if masked-in.
560 * \return 1/x. Result is undefined if your argument was invalid or
561 * entry was not masked, and 0.0 for masked-out entries.
563 * \note This function might be superficially meaningless, but it helps us to
564 * write templated SIMD/non-SIMD code. For clarity it should not be used
568 maskzInv(double x
, bool m
)
570 return m
? inv(x
) : 0.0;
573 /*! \brief Double sqrt(x). This is the square root.
575 * \param x Argument, should be >= 0.
576 * \result The square root of x. Undefined if argument is invalid.
578 * \note This function might be superficially meaningless, but it helps us to
579 * write templated SIMD/non-SIMD code. For clarity it should not be used
582 template <MathOptimization opt
= MathOptimization::Safe
>
589 /*! \brief Double log(x). This is the natural logarithm.
591 * \param x Argument, should be >0.
592 * \result The natural logarithm of x. Undefined if argument is invalid.
594 * \note This function might be superficially meaningless, but it helps us to
595 * write templated SIMD/non-SIMD code. For clarity it should not be used
604 /*! \brief Double 2^x.
607 * \result 2^x. Undefined if input argument caused overflow.
609 * \note This function might be superficially meaningless, but it helps us to
610 * write templated SIMD/non-SIMD code. For clarity it should not be used
613 template <MathOptimization opt
= MathOptimization::Safe
>
620 /*! \brief Double exp(x).
623 * \result exp(x). Undefined if input argument caused overflow.
625 * \note This function might be superficially meaningless, but it helps us to
626 * write templated SIMD/non-SIMD code. For clarity it should not be used
629 template <MathOptimization opt
= MathOptimization::Safe
>
636 /*! \brief Double erf(x).
641 * \note This function might be superficially meaningless, but it helps us to
642 * write templated SIMD/non-SIMD code. For clarity it should not be used
651 /*! \brief Double erfc(x).
656 * \note This function might be superficially meaningless, but it helps us to
657 * write templated SIMD/non-SIMD code. For clarity it should not be used
667 /*! \brief Double sin \& cos.
669 * \param x The argument to evaluate sin/cos for
670 * \param[out] sinval Sin(x)
671 * \param[out] cosval Cos(x)
673 * \note This function might be superficially meaningless, but it helps us to
674 * write templated SIMD/non-SIMD code. For clarity it should not be used
678 sincos(double x
, double *sinval
, double *cosval
)
680 *sinval
= std::sin(x
);
681 *cosval
= std::cos(x
);
684 /*! \brief Double sin.
686 * \param x The argument to evaluate sin for
689 * \note This function might be superficially meaningless, but it helps us to
690 * write templated SIMD/non-SIMD code. For clarity it should not be used
699 /*! \brief Double cos.
701 * \param x The argument to evaluate cos for
704 * \note This function might be superficially meaningless, but it helps us to
705 * write templated SIMD/non-SIMD code. For clarity it should not be used
714 /*! \brief Double tan.
716 * \param x The argument to evaluate tan for
719 * \note This function might be superficially meaningless, but it helps us to
720 * write templated SIMD/non-SIMD code. For clarity it should not be used
729 /*! \brief Double asin.
731 * \param x The argument to evaluate asin for
734 * \note This function might be superficially meaningless, but it helps us to
735 * write templated SIMD/non-SIMD code. For clarity it should not be used
744 /*! \brief Double acos.
746 * \param x The argument to evaluate acos for
749 * \note This function might be superficially meaningless, but it helps us to
750 * write templated SIMD/non-SIMD code. For clarity it should not be used
759 /*! \brief Double atan.
761 * \param x The argument to evaluate atan for
764 * \note This function might be superficially meaningless, but it helps us to
765 * write templated SIMD/non-SIMD code. For clarity it should not be used
774 /*! \brief Double atan2(y,x).
776 * \param y Y component of vector, any quartile
777 * \param x X component of vector, any quartile
780 * \note This function might be superficially meaningless, but it helps us to
781 * write templated SIMD/non-SIMD code. For clarity it should not be used
785 atan2(double y
, double x
)
787 return std::atan2(y
, x
);
790 /*! \brief Calculate the force correction due to PME analytically in double.
792 * See the SIMD version of this function for details.
794 * \param z2 input parameter
795 * \returns Correction to use on force
797 * \note This function might be superficially meaningless, but it helps us to
798 * write templated SIMD/non-SIMD code. For clarity it should not be used
802 pmeForceCorrection(double z2
)
804 const double FN10(-8.0072854618360083154e-14);
805 const double FN9(1.1859116242260148027e-11);
806 const double FN8(-8.1490406329798423616e-10);
807 const double FN7(3.4404793543907847655e-8);
808 const double FN6(-9.9471420832602741006e-7);
809 const double FN5(0.000020740315999115847456);
810 const double FN4(-0.00031991745139313364005);
811 const double FN3(0.0035074449373659008203);
812 const double FN2(-0.031750380176100813405);
813 const double FN1(0.13884101728898463426);
814 const double FN0(-0.75225277815249618847);
816 const double FD5(0.000016009278224355026701);
817 const double FD4(0.00051055686934806966046);
818 const double FD3(0.0081803507497974289008);
819 const double FD2(0.077181146026670287235);
820 const double FD1(0.41543303143712535988);
821 const double FD0(1.0);
824 double polyFN0
, polyFN1
, polyFD0
, polyFD1
;
828 polyFD1
= fma(FD5
, z4
, FD3
);
829 polyFD1
= fma(polyFD1
, z4
, FD1
);
830 polyFD1
= polyFD1
* z2
;
831 polyFD0
= fma(FD4
, z4
, FD2
);
832 polyFD0
= fma(polyFD0
, z4
, FD0
);
833 polyFD0
= polyFD0
+ polyFD1
;
835 polyFD0
= inv(polyFD0
);
837 polyFN0
= fma(FN10
, z4
, FN8
);
838 polyFN0
= fma(polyFN0
, z4
, FN6
);
839 polyFN0
= fma(polyFN0
, z4
, FN4
);
840 polyFN0
= fma(polyFN0
, z4
, FN2
);
841 polyFN0
= fma(polyFN0
, z4
, FN0
);
842 polyFN1
= fma(FN9
, z4
, FN7
);
843 polyFN1
= fma(polyFN1
, z4
, FN5
);
844 polyFN1
= fma(polyFN1
, z4
, FN3
);
845 polyFN1
= fma(polyFN1
, z4
, FN1
);
846 polyFN0
= fma(polyFN1
, z2
, polyFN0
);
848 return polyFN0
* polyFD0
;
851 /*! \brief Calculate the potential correction due to PME analytically in double.
853 * See the SIMD version of this function for details.
855 * \param z2 input parameter
856 * \returns Correction to use on potential.
858 * \note This function might be superficially meaningless, but it helps us to
859 * write templated SIMD/non-SIMD code. For clarity it should not be used
863 pmePotentialCorrection(double z2
)
865 const double VN9(-9.3723776169321855475e-13);
866 const double VN8(1.2280156762674215741e-10);
867 const double VN7(-7.3562157912251309487e-9);
868 const double VN6(2.6215886208032517509e-7);
869 const double VN5(-4.9532491651265819499e-6);
870 const double VN4(0.00025907400778966060389);
871 const double VN3(0.0010585044856156469792);
872 const double VN2(0.045247661136833092885);
873 const double VN1(0.11643931522926034421);
874 const double VN0(1.1283791671726767970);
876 const double VD5(0.000021784709867336150342);
877 const double VD4(0.00064293662010911388448);
878 const double VD3(0.0096311444822588683504);
879 const double VD2(0.085608012351550627051);
880 const double VD1(0.43652499166614811084);
881 const double VD0(1.0);
884 double polyVN0
, polyVN1
, polyVD0
, polyVD1
;
888 polyVD1
= fma(VD5
, z4
, VD3
);
889 polyVD0
= fma(VD4
, z4
, VD2
);
890 polyVD1
= fma(polyVD1
, z4
, VD1
);
891 polyVD0
= fma(polyVD0
, z4
, VD0
);
892 polyVD0
= fma(polyVD1
, z2
, polyVD0
);
894 polyVD0
= inv(polyVD0
);
896 polyVN1
= fma(VN9
, z4
, VN7
);
897 polyVN0
= fma(VN8
, z4
, VN6
);
898 polyVN1
= fma(polyVN1
, z4
, VN5
);
899 polyVN0
= fma(polyVN0
, z4
, VN4
);
900 polyVN1
= fma(polyVN1
, z4
, VN3
);
901 polyVN0
= fma(polyVN0
, z4
, VN2
);
902 polyVN1
= fma(polyVN1
, z4
, VN1
);
903 polyVN0
= fma(polyVN0
, z4
, VN0
);
904 polyVN0
= fma(polyVN1
, z2
, polyVN0
);
906 return polyVN0
* polyVD0
;
910 /*****************************************************************************
911 * Floating point math functions mimicking SIMD versions. *
912 * Double precision data, single precision accuracy. *
913 *****************************************************************************/
916 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
918 * \param x Argument that must be >0. This routine does not check arguments.
919 * \return 1/sqrt(x). Result is undefined if your argument was invalid.
921 * \note This function might be superficially meaningless, but it helps us to
922 * write templated SIMD/non-SIMD code. For clarity it should not be used
926 invsqrtSingleAccuracy(double x
)
928 return invsqrt(static_cast<float>(x
));
931 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
933 * \param x0 First argument, x0 must be positive - no argument checking.
934 * \param x1 Second argument, x1 must be positive - no argument checking.
935 * \param[out] out0 Result 1/sqrt(x0)
936 * \param[out] out1 Result 1/sqrt(x1)
938 * \note This function might be superficially meaningless, but it helps us to
939 * write templated SIMD/non-SIMD code. For clarity it should not be used
943 invsqrtPairSingleAccuracy(double x0
, double x1
,
944 double *out0
, double *out1
)
946 *out0
= invsqrt(static_cast<float>(x0
));
947 *out1
= invsqrt(static_cast<float>(x1
));
950 /*! \brief Calculate 1/x for double, but with single accuracy.
952 * \param x Argument that must be nonzero. This routine does not check arguments.
953 * \return 1/x. Result is undefined if your argument was invalid.
955 * \note This function might be superficially meaningless, but it helps us to
956 * write templated SIMD/non-SIMD code. For clarity it should not be used
960 invSingleAccuracy(double x
)
965 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
967 * This routine only evaluates 1/sqrt(x) if mask is true.
968 * Illegal values for a masked-out double will not lead to
969 * floating-point exceptions.
971 * \param x Argument that must be >0 if masked-in.
973 * \return 1/sqrt(x). Result is undefined if your argument was invalid or
974 * entry was not masked, and 0.0 for masked-out entries.
976 * \note This function might be superficially meaningless, but it helps us to
977 * write templated SIMD/non-SIMD code. For clarity it should not be used
981 maskzInvsqrtSingleAccuracy(double x
, bool m
)
983 return m
? invsqrtSingleAccuracy(x
) : 0.0;
986 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
988 * This routine only evaluates 1/x if mask is true.
989 * Illegal values for a masked-out double will not lead to
990 * floating-point exceptions.
992 * \param x Argument that must be nonzero if masked-in.
994 * \return 1/x. Result is undefined if your argument was invalid or
995 * entry was not masked, and 0.0 for masked-out entries.
997 * \note This function might be superficially meaningless, but it helps us to
998 * write templated SIMD/non-SIMD code. For clarity it should not be used
1001 static inline double
1002 maskzInvSingleAccuracy(double x
, bool m
)
1004 return m
? invSingleAccuracy(x
) : 0.0;
1007 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
1009 * \param x Argument that must be >=0.
1012 * \note This function might be superficially meaningless, but it helps us to
1013 * write templated SIMD/non-SIMD code. For clarity it should not be used
1014 * outside such code.
1016 static inline double
1017 sqrtSingleAccuracy(double x
)
1019 return std::sqrt(static_cast<float>(x
));
1022 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
1024 * \param x Argument, should be >0.
1025 * \result The natural logarithm of x. Undefined if argument is invalid.
1027 * \note This function might be superficially meaningless, but it helps us to
1028 * write templated SIMD/non-SIMD code. For clarity it should not be used
1029 * outside such code.
1031 static inline double
1032 logSingleAccuracy(double x
)
1034 return std::log(static_cast<float>(x
));
1037 /*! \brief Double 2^x, but with single accuracy.
1039 * \param x Argument.
1040 * \result 2^x. Undefined if input argument caused overflow.
1042 * \note This function might be superficially meaningless, but it helps us to
1043 * write templated SIMD/non-SIMD code. For clarity it should not be used
1044 * outside such code.
1046 static inline double
1047 exp2SingleAccuracy(double x
)
1049 return std::exp2(static_cast<float>(x
));
1052 /*! \brief Double exp(x), but with single accuracy.
1054 * \param x Argument.
1055 * \result exp(x). Undefined if input argument caused overflow.
1057 * \note This function might be superficially meaningless, but it helps us to
1058 * write templated SIMD/non-SIMD code. For clarity it should not be used
1059 * outside such code.
1061 static inline double
1062 expSingleAccuracy(double x
)
1064 return std::exp(static_cast<float>(x
));
1067 /*! \brief Double erf(x), but with single accuracy.
1069 * \param x Argument.
1072 * \note This function might be superficially meaningless, but it helps us to
1073 * write templated SIMD/non-SIMD code. For clarity it should not be used
1074 * outside such code.
1076 static inline double
1077 erfSingleAccuracy(double x
)
1079 return std::erf(static_cast<float>(x
));
1082 /*! \brief Double erfc(x), but with single accuracy.
1084 * \param x Argument.
1087 * \note This function might be superficially meaningless, but it helps us to
1088 * write templated SIMD/non-SIMD code. For clarity it should not be used
1089 * outside such code.
1091 static inline double
1092 erfcSingleAccuracy(double x
)
1094 return std::erfc(static_cast<float>(x
));
1098 /*! \brief Double sin \& cos, but with single accuracy.
1100 * \param x The argument to evaluate sin/cos for
1101 * \param[out] sinval Sin(x)
1102 * \param[out] cosval Cos(x)
1104 * \note This function might be superficially meaningless, but it helps us to
1105 * write templated SIMD/non-SIMD code. For clarity it should not be used
1106 * outside such code.
1109 sincosSingleAccuracy(double x
, double *sinval
, double *cosval
)
1111 // There is no single-precision sincos guaranteed in C++11, so use
1112 // separate functions and hope the compiler optimizes it for us.
1113 *sinval
= std::sin(static_cast<float>(x
));
1114 *cosval
= std::cos(static_cast<float>(x
));
1117 /*! \brief Double sin, but with single accuracy.
1119 * \param x The argument to evaluate sin for
1122 * \note This function might be superficially meaningless, but it helps us to
1123 * write templated SIMD/non-SIMD code. For clarity it should not be used
1124 * outside such code.
1126 static inline double
1127 sinSingleAccuracy(double x
)
1129 return std::sin(static_cast<float>(x
));
1132 /*! \brief Double cos, but with single accuracy.
1134 * \param x The argument to evaluate cos for
1137 * \note This function might be superficially meaningless, but it helps us to
1138 * write templated SIMD/non-SIMD code. For clarity it should not be used
1139 * outside such code.
1141 static inline double
1142 cosSingleAccuracy(double x
)
1144 return std::cos(static_cast<float>(x
));
1147 /*! \brief Double tan, but with single accuracy.
1149 * \param x The argument to evaluate tan for
1152 * \note This function might be superficially meaningless, but it helps us to
1153 * write templated SIMD/non-SIMD code. For clarity it should not be used
1154 * outside such code.
1156 static inline double
1157 tanSingleAccuracy(double x
)
1159 return std::tan(static_cast<float>(x
));
1162 /*! \brief Double asin, but with single accuracy.
1164 * \param x The argument to evaluate asin for
1167 * \note This function might be superficially meaningless, but it helps us to
1168 * write templated SIMD/non-SIMD code. For clarity it should not be used
1169 * outside such code.
1171 static inline double
1172 asinSingleAccuracy(double x
)
1174 return std::asin(static_cast<float>(x
));
1177 /*! \brief Double acos, but with single accuracy.
1179 * \param x The argument to evaluate acos for
1182 * \note This function might be superficially meaningless, but it helps us to
1183 * write templated SIMD/non-SIMD code. For clarity it should not be used
1184 * outside such code.
1186 static inline double
1187 acosSingleAccuracy(double x
)
1189 return std::acos(static_cast<float>(x
));
1192 /*! \brief Double atan, but with single accuracy.
1194 * \param x The argument to evaluate atan for
1197 * \note This function might be superficially meaningless, but it helps us to
1198 * write templated SIMD/non-SIMD code. For clarity it should not be used
1199 * outside such code.
1201 static inline double
1202 atanSingleAccuracy(double x
)
1204 return std::atan(static_cast<float>(x
));
1207 /*! \brief Double atan2(y,x), but with single accuracy.
1209 * \param y Y component of vector, any quartile
1210 * \param x X component of vector, any quartile
1213 * \note This function might be superficially meaningless, but it helps us to
1214 * write templated SIMD/non-SIMD code. For clarity it should not be used
1215 * outside such code.
1217 static inline double
1218 atan2SingleAccuracy(double y
, double x
)
1220 return std::atan2(static_cast<float>(y
), static_cast<float>(x
));
1223 /*! \brief Force correction due to PME in double, but with single accuracy.
1225 * See the SIMD version of this function for details.
1227 * \param z2 input parameter
1228 * \returns Correction to use on force
1230 * \note This function might be superficially meaningless, but it helps us to
1231 * write templated SIMD/non-SIMD code. For clarity it should not be used
1232 * outside such code.
1234 static inline double
1235 pmeForceCorrectionSingleAccuracy(double z2
)
1237 const float FN6(-1.7357322914161492954e-8f
);
1238 const float FN5(1.4703624142580877519e-6f
);
1239 const float FN4(-0.000053401640219807709149f
);
1240 const float FN3(0.0010054721316683106153f
);
1241 const float FN2(-0.019278317264888380590f
);
1242 const float FN1(0.069670166153766424023f
);
1243 const float FN0(-0.75225204789749321333f
);
1245 const float FD4(0.0011193462567257629232f
);
1246 const float FD3(0.014866955030185295499f
);
1247 const float FD2(0.11583842382862377919f
);
1248 const float FD1(0.50736591960530292870f
);
1249 const float FD0(1.0f
);
1252 float polyFN0
, polyFN1
, polyFD0
, polyFD1
;
1258 polyFD0
= fma(FD4
, z4
, FD2
);
1259 polyFD1
= fma(FD3
, z4
, FD1
);
1260 polyFD0
= fma(polyFD0
, z4
, FD0
);
1261 polyFD0
= fma(polyFD1
, z2f
, polyFD0
);
1263 polyFN0
= fma(FN6
, z4
, FN4
);
1264 polyFN1
= fma(FN5
, z4
, FN3
);
1265 polyFN0
= fma(polyFN0
, z4
, FN2
);
1266 polyFN1
= fma(polyFN1
, z4
, FN1
);
1267 polyFN0
= fma(polyFN0
, z4
, FN0
);
1268 polyFN0
= fma(polyFN1
, z2f
, polyFN0
);
1270 return polyFN0
/ polyFD0
;
1273 /*! \brief Potential correction due to PME in double, but with single accuracy.
1275 * See the SIMD version of this function for details.
1277 * \param z2 input parameter
1278 * \returns Correction to use on potential.
1280 * \note This function might be superficially meaningless, but it helps us to
1281 * write templated SIMD/non-SIMD code. For clarity it should not be used
1282 * outside such code.
1284 static inline double
1285 pmePotentialCorrectionSingleAccuracy(double z2
)
1287 const float VN6(1.9296833005951166339e-8f
);
1288 const float VN5(-1.4213390571557850962e-6f
);
1289 const float VN4(0.000041603292906656984871f
);
1290 const float VN3(-0.00013134036773265025626f
);
1291 const float VN2(0.038657983986041781264f
);
1292 const float VN1(0.11285044772717598220f
);
1293 const float VN0(1.1283802385263030286f
);
1295 const float VD3(0.0066752224023576045451f
);
1296 const float VD2(0.078647795836373922256f
);
1297 const float VD1(0.43336185284710920150f
);
1298 const float VD0(1.0f
);
1301 float polyVN0
, polyVN1
, polyVD0
, polyVD1
;
1307 polyVD1
= fma(VD3
, z4
, VD1
);
1308 polyVD0
= fma(VD2
, z4
, VD0
);
1309 polyVD0
= fma(polyVD1
, z2f
, polyVD0
);
1311 polyVN0
= fma(VN6
, z4
, VN4
);
1312 polyVN1
= fma(VN5
, z4
, VN3
);
1313 polyVN0
= fma(polyVN0
, z4
, VN2
);
1314 polyVN1
= fma(polyVN1
, z4
, VN1
);
1315 polyVN0
= fma(polyVN0
, z4
, VN0
);
1316 polyVN0
= fma(polyVN1
, z2f
, polyVN0
);
1318 return polyVN0
/ polyVD0
;
1326 #endif // GMX_SIMD_SCALAR_MATH_H