src/gromacs/simd/scalar/scalar_math.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifndef GMX_SIMD_SCALAR_MATH_H
  36 #define GMX_SIMD_SCALAR_MATH_H
  37
  38 #include <cmath>
  39
  40 #include "gromacs/math/functions.h"
  41 #include "gromacs/math/utilities.h"
  42 #include "gromacs/simd/scalar/scalar.h"
  43
  44 /*! \libinternal \file
  45  *
  46  * \brief Scalar math functions mimicking GROMACS SIMD math functions
  47  *
  48  * These versions make it possible to write functions that are templated with
  49  * either a SIMD or scalar type. While some of these functions might not appear
  50  * SIMD-specific, we have placed them here because the only reason to use these
  51  * instead of generic function is in templated combined SIMD/non-SIMD code.
  52  * It is important that these functions match the SIMD versions exactly in their
  53  * arguments and template arguments so that overload resolution works correctly.
  54  *
  55  * \author Erik Lindahl <erik.lindahl@gmail.com>
  56  *
  57  * \inlibraryapi
  58  * \ingroup module_simd
  59  */
  60
  61 namespace gmx
  62 {
  63
  64 /*****************************************************************************
  65  *   Single-precision floating point math functions mimicking SIMD versions  *
  66  *****************************************************************************/
  67
  68
  69 /*! \brief Composes single value with the magnitude of x and the sign of y.
  70  *
  71  * \param x Value to set sign for
  72  * \param y Value used to set sign
  73  * \return  Magnitude of x, sign of y
  74  *
  75  * \note This function might be superficially meaningless, but it helps us to
  76  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  77  *       outside such code.
  78  */
  79 static inline float copysign(float x, float y)
  80 {
  81     return std::copysign(x, y);
  82 }
  83
  84 // invsqrt(x) is already defined in math/functions.h
  85
  86 /*! \brief Calculate 1/sqrt(x) for two floats.
  87  *
  88  * \param x0  First argument, x0 must be positive - no argument checking.
  89  * \param x1  Second argument, x1 must be positive - no argument checking.
  90  * \param[out] out0  Result 1/sqrt(x0)
  91  * \param[out] out1  Result 1/sqrt(x1)
  92  *
  93  * \note This function might be superficially meaningless, but it helps us to
  94  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  95  *       outside such code.
  96  */
  97 static inline void invsqrtPair(float x0, float x1, float* out0, float* out1)
  98 {
  99     *out0 = invsqrt(x0);
 100     *out1 = invsqrt(x1);
 101 }
 102
 103 /*! \brief Calculate 1/x for float.
 104  *
 105  *  \param x Argument that must be nonzero. This routine does not check arguments.
 106  *  \return 1/x. Result is undefined if your argument was invalid.
 107  *
 108  * \note This function might be superficially meaningless, but it helps us to
 109  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 110  *       outside such code.
 111  */
 112 static inline float inv(float x)
 113 {
 114     return 1.0F / x;
 115 }
 116
 117 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
 118  *
 119  *  This routine only evaluates 1/sqrt(x) if mask is true.
 120  *  Illegal values for a masked-out float will not lead to
 121  *  floating-point exceptions.
 122  *
 123  *  \param x Argument that must be >0 if masked-in.
 124  *  \param m Mask
 125  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 126  *          entry was not masked, and 0.0 for masked-out entries.
 127  *
 128  * \note This function might be superficially meaningless, but it helps us to
 129  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 130  *       outside such code.
 131  */
 132 static inline float maskzInvsqrt(float x, bool m)
 133 {
 134     return m ? invsqrt(x) : 0.0F;
 135 }
 136
 137 /*! \brief Calculate 1/x for masked entry of float.
 138  *
 139  *  This routine only evaluates 1/x if mask is true.
 140  *  Illegal values for a masked-out float will not lead to
 141  *  floating-point exceptions.
 142  *
 143  *  \param x Argument that must be nonzero if masked-in.
 144  *  \param m Mask
 145  *  \return 1/x. Result is undefined if your argument was invalid or
 146  *          entry was not masked, and 0.0 for masked-out entries.
 147  *
 148  * \note This function might be superficially meaningless, but it helps us to
 149  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 150  *       outside such code.
 151  */
 152 static inline float maskzInv(float x, bool m)
 153 {
 154     return m ? inv(x) : 0.0F;
 155 }
 156
 157 /*! \brief Float sqrt(x). This is the square root.
 158  *
 159  * \param x Argument, should be >= 0.
 160  * \result The square root of x. Undefined if argument is invalid.
 161  *
 162  * \note This function might be superficially meaningless, but it helps us to
 163  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 164  *       outside such code.
 165  */
 166 template<MathOptimization opt = MathOptimization::Safe>
 167 static inline float sqrt(float x)
 168 {
 169     return std::sqrt(x);
 170 }
 171
 172 /*! \brief Float cbrt(x). This is the cubic root.
 173  *
 174  * \param x Argument, should be >= 0.
 175  * \result The cubic root of x. Undefined if argument is invalid.
 176  *
 177  * \note This function might be superficially meaningless, but it helps us to
 178  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 179  *       outside such code.
 180  */
 181 template<MathOptimization opt = MathOptimization::Safe>
 182 static inline float cbrt(float x)
 183 {
 184     return std::cbrt(x);
 185 }
 186
 187 /*! \brief Float log(x). This is the natural logarithm.
 188  *
 189  * \param x Argument, should be >0.
 190  * \result The natural logarithm of x. Undefined if argument is invalid.
 191  *
 192  * \note This function might be superficially meaningless, but it helps us to
 193  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 194  *       outside such code.
 195  */
 196 static inline float log(float x)
 197 {
 198     return std::log(x);
 199 }
 200
 201 /*! \brief Float 2^x.
 202  *
 203  * \param x Argument.
 204  * \result 2^x. Undefined if input argument caused overflow.
 205  *
 206  * \note This function might be superficially meaningless, but it helps us to
 207  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 208  *       outside such code.
 209  */
 210 template<MathOptimization opt = MathOptimization::Safe>
 211 static inline float exp2(float x)
 212 {
 213     return std::exp2(x);
 214 }
 215
 216 /*! \brief Float exp(x).
 217  *
 218  * \param x Argument.
 219  * \result exp(x). Undefined if input argument caused overflow.
 220  *
 221  * \note This function might be superficially meaningless, but it helps us to
 222  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 223  *       outside such code.
 224  */
 225 template<MathOptimization opt = MathOptimization::Safe>
 226 static inline float exp(float x)
 227 {
 228     return std::exp(x);
 229 }
 230
 231 /*! \brief Float erf(x).
 232  *
 233  * \param x Argument.
 234  * \result erf(x)
 235  *
 236  * \note This function might be superficially meaningless, but it helps us to
 237  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 238  *       outside such code.
 239  */
 240 static inline float erf(float x)
 241 {
 242     return std::erf(x);
 243 }
 244
 245 /*! \brief Float erfc(x).
 246  *
 247  * \param x Argument.
 248  * \result erfc(x)
 249  *
 250  * \note This function might be superficially meaningless, but it helps us to
 251  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 252  *       outside such code.
 253  */
 254 static inline float erfc(float x)
 255 {
 256     return std::erfc(x);
 257 }
 258
 259
 260 /*! \brief Float sin \& cos.
 261  *
 262  * \param x The argument to evaluate sin/cos for
 263  * \param[out] sinval Sin(x)
 264  * \param[out] cosval Cos(x)
 265  *
 266  * \note This function might be superficially meaningless, but it helps us to
 267  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 268  *       outside such code.
 269  */
 270 static inline void sincos(float x, float* sinval, float* cosval)
 271 {
 272     *sinval = std::sin(x);
 273     *cosval = std::cos(x);
 274 }
 275
 276 /*! \brief Float sin.
 277  *
 278  * \param x The argument to evaluate sin for
 279  * \result Sin(x)
 280  *
 281  * \note This function might be superficially meaningless, but it helps us to
 282  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 283  *       outside such code.
 284  */
 285 static inline float sin(float x)
 286 {
 287     return std::sin(x);
 288 }
 289
 290 /*! \brief Float cos.
 291  *
 292  * \param x The argument to evaluate cos for
 293  * \result Cos(x)
 294  *
 295  * \note This function might be superficially meaningless, but it helps us to
 296  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 297  *       outside such code.
 298  */
 299 static inline float cos(float x)
 300 {
 301     return std::cos(x);
 302 }
 303
 304 /*! \brief Float tan.
 305  *
 306  * \param x The argument to evaluate tan for
 307  * \result Tan(x)
 308  *
 309  * \note This function might be superficially meaningless, but it helps us to
 310  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 311  *       outside such code.
 312  */
 313 static inline float tan(float x)
 314 {
 315     return std::tan(x);
 316 }
 317
 318 /*! \brief float asin.
 319  *
 320  * \param x The argument to evaluate asin for
 321  * \result Asin(x)
 322  *
 323  * \note This function might be superficially meaningless, but it helps us to
 324  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 325  *       outside such code.
 326  */
 327 static inline float asin(float x)
 328 {
 329     return std::asin(x);
 330 }
 331
 332 /*! \brief Float acos.
 333  *
 334  * \param x The argument to evaluate acos for
 335  * \result Acos(x)
 336  *
 337  * \note This function might be superficially meaningless, but it helps us to
 338  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 339  *       outside such code.
 340  */
 341 static inline float acos(float x)
 342 {
 343     return std::acos(x);
 344 }
 345
 346 /*! \brief Float atan.
 347  *
 348  * \param x The argument to evaluate atan for
 349  * \result Atan(x)
 350  *
 351  * \note This function might be superficially meaningless, but it helps us to
 352  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 353  *       outside such code.
 354  */
 355 static inline float atan(float x)
 356 {
 357     return std::atan(x);
 358 }
 359
 360 /*! \brief Float atan2(y,x).
 361  *
 362  * \param y Y component of vector, any quartile
 363  * \param x X component of vector, any quartile
 364  * \result Atan(y,x)
 365  *
 366  * \note This function might be superficially meaningless, but it helps us to
 367  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 368  *       outside such code.
 369  */
 370 static inline float atan2(float y, float x)
 371 {
 372     return std::atan2(y, x);
 373 }
 374
 375 /*! \brief Calculate the force correction due to PME analytically in float.
 376  *
 377  * See the SIMD version of this function for details.
 378  *
 379  * \param z2 input parameter
 380  * \returns Correction to use on force
 381  *
 382  * \note This function might be superficially meaningless, but it helps us to
 383  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 384  *       outside such code.
 385  */
 386 static inline float pmeForceCorrection(float z2)
 387 {
 388     const float FN6(-1.7357322914161492954e-8F);
 389     const float FN5(1.4703624142580877519e-6F);
 390     const float FN4(-0.000053401640219807709149F);
 391     const float FN3(0.0010054721316683106153F);
 392     const float FN2(-0.019278317264888380590F);
 393     const float FN1(0.069670166153766424023F);
 394     const float FN0(-0.75225204789749321333F);
 395
 396     const float FD4(0.0011193462567257629232F);
 397     const float FD3(0.014866955030185295499F);
 398     const float FD2(0.11583842382862377919F);
 399     const float FD1(0.50736591960530292870F);
 400     const float FD0(1.0F);
 401
 402     float z4;
 403     float polyFN0, polyFN1, polyFD0, polyFD1;
 404
 405     z4 = z2 * z2;
 406
 407     polyFD0 = fma(FD4, z4, FD2);
 408     polyFD1 = fma(FD3, z4, FD1);
 409     polyFD0 = fma(polyFD0, z4, FD0);
 410     polyFD0 = fma(polyFD1, z2, polyFD0);
 411
 412     polyFN0 = fma(FN6, z4, FN4);
 413     polyFN1 = fma(FN5, z4, FN3);
 414     polyFN0 = fma(polyFN0, z4, FN2);
 415     polyFN1 = fma(polyFN1, z4, FN1);
 416     polyFN0 = fma(polyFN0, z4, FN0);
 417     polyFN0 = fma(polyFN1, z2, polyFN0);
 418
 419     return polyFN0 / polyFD0;
 420 }
 421
 422 /*! \brief Calculate the potential correction due to PME analytically in float.
 423  *
 424  * See the SIMD version of this function for details.
 425  *
 426  * \param z2 input parameter
 427  * \returns Correction to use on potential.
 428  *
 429  * \note This function might be superficially meaningless, but it helps us to
 430  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 431  *       outside such code.
 432  */
 433 static inline float pmePotentialCorrection(float z2)
 434 {
 435     const float VN6(1.9296833005951166339e-8F);
 436     const float VN5(-1.4213390571557850962e-6F);
 437     const float VN4(0.000041603292906656984871F);
 438     const float VN3(-0.00013134036773265025626F);
 439     const float VN2(0.038657983986041781264F);
 440     const float VN1(0.11285044772717598220F);
 441     const float VN0(1.1283802385263030286F);
 442
 443     const float VD3(0.0066752224023576045451F);
 444     const float VD2(0.078647795836373922256F);
 445     const float VD1(0.43336185284710920150F);
 446     const float VD0(1.0F);
 447
 448     float z4;
 449     float polyVN0, polyVN1, polyVD0, polyVD1;
 450
 451     z4 = z2 * z2;
 452
 453     polyVD1 = fma(VD3, z4, VD1);
 454     polyVD0 = fma(VD2, z4, VD0);
 455     polyVD0 = fma(polyVD1, z2, polyVD0);
 456
 457     polyVN0 = fma(VN6, z4, VN4);
 458     polyVN1 = fma(VN5, z4, VN3);
 459     polyVN0 = fma(polyVN0, z4, VN2);
 460     polyVN1 = fma(polyVN1, z4, VN1);
 461     polyVN0 = fma(polyVN0, z4, VN0);
 462     polyVN0 = fma(polyVN1, z2, polyVN0);
 463
 464     return polyVN0 / polyVD0;
 465 }
 466
 467 /*****************************************************************************
 468  *   Double-precision floating point math functions mimicking SIMD versions  *
 469  *****************************************************************************/
 470
 471
 472 /*! \brief Composes double value with the magnitude of x and the sign of y.
 473  *
 474  * \param x Value to set sign for
 475  * \param y Value used to set sign
 476  * \return  Magnitude of x, sign of y
 477  *
 478  * \note This function might be superficially meaningless, but it helps us to
 479  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 480  *       outside such code.
 481  */
 482 static inline double copysign(double x, double y)
 483 {
 484     return std::copysign(x, y);
 485 }
 486
 487 // invsqrt(x) is already defined in math/functions.h
 488
 489 /*! \brief Calculate 1/sqrt(x) for two doubles.
 490  *
 491  * \param x0  First argument, x0 must be positive - no argument checking.
 492  * \param x1  Second argument, x1 must be positive - no argument checking.
 493  * \param[out] out0  Result 1/sqrt(x0)
 494  * \param[out] out1  Result 1/sqrt(x1)
 495  *
 496  * \note This function might be superficially meaningless, but it helps us to
 497  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 498  *       outside such code.
 499  */
 500 static inline void invsqrtPair(double x0, double x1, double* out0, double* out1)
 501 {
 502     *out0 = invsqrt(x0);
 503     *out1 = invsqrt(x1);
 504 }
 505
 506 /*! \brief Calculate 1/x for double.
 507  *
 508  *  \param x Argument that must be nonzero. This routine does not check arguments.
 509  *  \return 1/x. Result is undefined if your argument was invalid.
 510  *
 511  * \note This function might be superficially meaningless, but it helps us to
 512  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 513  *       outside such code.
 514  */
 515 static inline double inv(double x)
 516 {
 517     return 1.0 / x;
 518 }
 519
 520 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
 521  *
 522  *  This routine only evaluates 1/sqrt(x) if mask is true.
 523  *  Illegal values for a masked-out double will not lead to
 524  *  floating-point exceptions.
 525  *
 526  *  \param x Argument that must be >0 if masked-in.
 527  *  \param m Mask
 528  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 529  *          entry was not masked, and 0.0 for masked-out entries.
 530  *
 531  * \note This function might be superficially meaningless, but it helps us to
 532  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 533  *       outside such code.
 534  */
 535 static inline double maskzInvsqrt(double x, bool m)
 536 {
 537     return m ? invsqrt(x) : 0.0;
 538 }
 539
 540 /*! \brief Calculate 1/x for masked entry of double.
 541  *
 542  *  This routine only evaluates 1/x if mask is true.
 543  *  Illegal values for a masked-out double will not lead to
 544  *  floating-point exceptions.
 545  *
 546  *  \param x Argument that must be nonzero if masked-in.
 547  *  \param m Mask
 548  *  \return 1/x. Result is undefined if your argument was invalid or
 549  *          entry was not masked, and 0.0 for masked-out entries.
 550  *
 551  * \note This function might be superficially meaningless, but it helps us to
 552  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 553  *       outside such code.
 554  */
 555 static inline double maskzInv(double x, bool m)
 556 {
 557     return m ? inv(x) : 0.0;
 558 }
 559
 560 /*! \brief Double sqrt(x). This is the square root.
 561  *
 562  * \param x Argument, should be >= 0.
 563  * \result The square root of x. Undefined if argument is invalid.
 564  *
 565  * \note This function might be superficially meaningless, but it helps us to
 566  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 567  *       outside such code.
 568  */
 569 template<MathOptimization opt = MathOptimization::Safe>
 570 static inline double sqrt(double x)
 571 {
 572     return std::sqrt(x);
 573 }
 574
 575 /*! \brief Double cbrt(x). This is the cubic root.
 576  *
 577  * \param x Argument, should be >= 0.
 578  * \result The cubic root of x. Undefined if argument is invalid.
 579  *
 580  * \note This function might be superficially meaningless, but it helps us to
 581  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 582  *       outside such code.
 583  */
 584 template<MathOptimization opt = MathOptimization::Safe>
 585 static inline double cbrt(double x)
 586 {
 587     return std::cbrt(x);
 588 }
 589
 590 /*! \brief Double log(x). This is the natural logarithm.
 591  *
 592  * \param x Argument, should be >0.
 593  * \result The natural logarithm of x. Undefined if argument is invalid.
 594  *
 595  * \note This function might be superficially meaningless, but it helps us to
 596  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 597  *       outside such code.
 598  */
 599 static inline double log(double x)
 600 {
 601     return std::log(x);
 602 }
 603
 604 /*! \brief Double 2^x.
 605  *
 606  * \param x Argument.
 607  * \result 2^x. Undefined if input argument caused overflow.
 608  *
 609  * \note This function might be superficially meaningless, but it helps us to
 610  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 611  *       outside such code.
 612  */
 613 template<MathOptimization opt = MathOptimization::Safe>
 614 static inline double exp2(double x)
 615 {
 616     return std::exp2(x);
 617 }
 618
 619 /*! \brief Double exp(x).
 620  *
 621  * \param x Argument.
 622  * \result exp(x). Undefined if input argument caused overflow.
 623  *
 624  * \note This function might be superficially meaningless, but it helps us to
 625  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 626  *       outside such code.
 627  */
 628 template<MathOptimization opt = MathOptimization::Safe>
 629 static inline double exp(double x)
 630 {
 631     return std::exp(x);
 632 }
 633
 634 /*! \brief Double erf(x).
 635  *
 636  * \param x Argument.
 637  * \result erf(x)
 638  *
 639  * \note This function might be superficially meaningless, but it helps us to
 640  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 641  *       outside such code.
 642  */
 643 static inline double erf(double x)
 644 {
 645     return std::erf(x);
 646 }
 647
 648 /*! \brief Double erfc(x).
 649  *
 650  * \param x Argument.
 651  * \result erfc(x)
 652  *
 653  * \note This function might be superficially meaningless, but it helps us to
 654  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 655  *       outside such code.
 656  */
 657 static inline double erfc(double x)
 658 {
 659     return std::erfc(x);
 660 }
 661
 662
 663 /*! \brief Double sin \& cos.
 664  *
 665  * \param x The argument to evaluate sin/cos for
 666  * \param[out] sinval Sin(x)
 667  * \param[out] cosval Cos(x)
 668  *
 669  * \note This function might be superficially meaningless, but it helps us to
 670  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 671  *       outside such code.
 672  */
 673 static inline void sincos(double x, double* sinval, double* cosval)
 674 {
 675     *sinval = std::sin(x);
 676     *cosval = std::cos(x);
 677 }
 678
 679 /*! \brief Double sin.
 680  *
 681  * \param x The argument to evaluate sin for
 682  * \result Sin(x)
 683  *
 684  * \note This function might be superficially meaningless, but it helps us to
 685  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 686  *       outside such code.
 687  */
 688 static inline double sin(double x)
 689 {
 690     return std::sin(x);
 691 }
 692
 693 /*! \brief Double cos.
 694  *
 695  * \param x The argument to evaluate cos for
 696  * \result Cos(x)
 697  *
 698  * \note This function might be superficially meaningless, but it helps us to
 699  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 700  *       outside such code.
 701  */
 702 static inline double cos(double x)
 703 {
 704     return std::cos(x);
 705 }
 706
 707 /*! \brief Double tan.
 708  *
 709  * \param x The argument to evaluate tan for
 710  * \result Tan(x)
 711  *
 712  * \note This function might be superficially meaningless, but it helps us to
 713  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 714  *       outside such code.
 715  */
 716 static inline double tan(double x)
 717 {
 718     return std::tan(x);
 719 }
 720
 721 /*! \brief Double asin.
 722  *
 723  * \param x The argument to evaluate asin for
 724  * \result Asin(x)
 725  *
 726  * \note This function might be superficially meaningless, but it helps us to
 727  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 728  *       outside such code.
 729  */
 730 static inline double asin(double x)
 731 {
 732     return std::asin(x);
 733 }
 734
 735 /*! \brief Double acos.
 736  *
 737  * \param x The argument to evaluate acos for
 738  * \result Acos(x)
 739  *
 740  * \note This function might be superficially meaningless, but it helps us to
 741  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 742  *       outside such code.
 743  */
 744 static inline double acos(double x)
 745 {
 746     return std::acos(x);
 747 }
 748
 749 /*! \brief Double atan.
 750  *
 751  * \param x The argument to evaluate atan for
 752  * \result Atan(x)
 753  *
 754  * \note This function might be superficially meaningless, but it helps us to
 755  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 756  *       outside such code.
 757  */
 758 static inline double atan(double x)
 759 {
 760     return std::atan(x);
 761 }
 762
 763 /*! \brief Double atan2(y,x).
 764  *
 765  * \param y Y component of vector, any quartile
 766  * \param x X component of vector, any quartile
 767  * \result Atan(y,x)
 768  *
 769  * \note This function might be superficially meaningless, but it helps us to
 770  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 771  *       outside such code.
 772  */
 773 static inline double atan2(double y, double x)
 774 {
 775     return std::atan2(y, x);
 776 }
 777
 778 /*! \brief Calculate the force correction due to PME analytically in double.
 779  *
 780  * See the SIMD version of this function for details.
 781  *
 782  * \param z2 input parameter
 783  * \returns Correction to use on force
 784  *
 785  * \note This function might be superficially meaningless, but it helps us to
 786  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 787  *       outside such code.
 788  */
 789 static inline double pmeForceCorrection(double z2)
 790 {
 791     const double FN10(-8.0072854618360083154e-14);
 792     const double FN9(1.1859116242260148027e-11);
 793     const double FN8(-8.1490406329798423616e-10);
 794     const double FN7(3.4404793543907847655e-8);
 795     const double FN6(-9.9471420832602741006e-7);
 796     const double FN5(0.000020740315999115847456);
 797     const double FN4(-0.00031991745139313364005);
 798     const double FN3(0.0035074449373659008203);
 799     const double FN2(-0.031750380176100813405);
 800     const double FN1(0.13884101728898463426);
 801     const double FN0(-0.75225277815249618847);
 802
 803     const double FD5(0.000016009278224355026701);
 804     const double FD4(0.00051055686934806966046);
 805     const double FD3(0.0081803507497974289008);
 806     const double FD2(0.077181146026670287235);
 807     const double FD1(0.41543303143712535988);
 808     const double FD0(1.0);
 809
 810     double z4;
 811     double polyFN0, polyFN1, polyFD0, polyFD1;
 812
 813     z4 = z2 * z2;
 814
 815     polyFD1 = fma(FD5, z4, FD3);
 816     polyFD1 = fma(polyFD1, z4, FD1);
 817     polyFD1 = polyFD1 * z2;
 818     polyFD0 = fma(FD4, z4, FD2);
 819     polyFD0 = fma(polyFD0, z4, FD0);
 820     polyFD0 = polyFD0 + polyFD1;
 821
 822     polyFD0 = inv(polyFD0);
 823
 824     polyFN0 = fma(FN10, z4, FN8);
 825     polyFN0 = fma(polyFN0, z4, FN6);
 826     polyFN0 = fma(polyFN0, z4, FN4);
 827     polyFN0 = fma(polyFN0, z4, FN2);
 828     polyFN0 = fma(polyFN0, z4, FN0);
 829     polyFN1 = fma(FN9, z4, FN7);
 830     polyFN1 = fma(polyFN1, z4, FN5);
 831     polyFN1 = fma(polyFN1, z4, FN3);
 832     polyFN1 = fma(polyFN1, z4, FN1);
 833     polyFN0 = fma(polyFN1, z2, polyFN0);
 834
 835     return polyFN0 * polyFD0;
 836 }
 837
 838 /*! \brief Calculate the potential correction due to PME analytically in double.
 839  *
 840  * See the SIMD version of this function for details.
 841  *
 842  * \param z2 input parameter
 843  * \returns Correction to use on potential.
 844  *
 845  * \note This function might be superficially meaningless, but it helps us to
 846  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 847  *       outside such code.
 848  */
 849 static inline double pmePotentialCorrection(double z2)
 850 {
 851     const double VN9(-9.3723776169321855475e-13);
 852     const double VN8(1.2280156762674215741e-10);
 853     const double VN7(-7.3562157912251309487e-9);
 854     const double VN6(2.6215886208032517509e-7);
 855     const double VN5(-4.9532491651265819499e-6);
 856     const double VN4(0.00025907400778966060389);
 857     const double VN3(0.0010585044856156469792);
 858     const double VN2(0.045247661136833092885);
 859     const double VN1(0.11643931522926034421);
 860     const double VN0(1.1283791671726767970);
 861
 862     const double VD5(0.000021784709867336150342);
 863     const double VD4(0.00064293662010911388448);
 864     const double VD3(0.0096311444822588683504);
 865     const double VD2(0.085608012351550627051);
 866     const double VD1(0.43652499166614811084);
 867     const double VD0(1.0);
 868
 869     double z4;
 870     double polyVN0, polyVN1, polyVD0, polyVD1;
 871
 872     z4 = z2 * z2;
 873
 874     polyVD1 = fma(VD5, z4, VD3);
 875     polyVD0 = fma(VD4, z4, VD2);
 876     polyVD1 = fma(polyVD1, z4, VD1);
 877     polyVD0 = fma(polyVD0, z4, VD0);
 878     polyVD0 = fma(polyVD1, z2, polyVD0);
 879
 880     polyVD0 = inv(polyVD0);
 881
 882     polyVN1 = fma(VN9, z4, VN7);
 883     polyVN0 = fma(VN8, z4, VN6);
 884     polyVN1 = fma(polyVN1, z4, VN5);
 885     polyVN0 = fma(polyVN0, z4, VN4);
 886     polyVN1 = fma(polyVN1, z4, VN3);
 887     polyVN0 = fma(polyVN0, z4, VN2);
 888     polyVN1 = fma(polyVN1, z4, VN1);
 889     polyVN0 = fma(polyVN0, z4, VN0);
 890     polyVN0 = fma(polyVN1, z2, polyVN0);
 891
 892     return polyVN0 * polyVD0;
 893 }
 894
 895
 896 /*****************************************************************************
 897  *           Floating point math functions mimicking SIMD versions.          *
 898  *              Double precision data, single precision accuracy.            *
 899  *****************************************************************************/
 900
 901
 902 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
 903  *
 904  *  \param x Argument that must be >0. This routine does not check arguments.
 905  *  \return 1/sqrt(x). Result is undefined if your argument was invalid.
 906  *
 907  * \note This function might be superficially meaningless, but it helps us to
 908  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 909  *       outside such code.
 910  */
 911 static inline double invsqrtSingleAccuracy(double x)
 912 {
 913     return invsqrt(static_cast<float>(x));
 914 }
 915
 916 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
 917  *
 918  * \param x0  First argument, x0 must be positive - no argument checking.
 919  * \param x1  Second argument, x1 must be positive - no argument checking.
 920  * \param[out] out0  Result 1/sqrt(x0)
 921  * \param[out] out1  Result 1/sqrt(x1)
 922  *
 923  * \note This function might be superficially meaningless, but it helps us to
 924  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 925  *       outside such code.
 926  */
 927 static inline void invsqrtPairSingleAccuracy(double x0, double x1, double* out0, double* out1)
 928 {
 929     *out0 = invsqrt(static_cast<float>(x0));
 930     *out1 = invsqrt(static_cast<float>(x1));
 931 }
 932
 933 /*! \brief Calculate 1/x for double, but with single accuracy.
 934  *
 935  *  \param x Argument that must be nonzero. This routine does not check arguments.
 936  *  \return 1/x. Result is undefined if your argument was invalid.
 937  *
 938  * \note This function might be superficially meaningless, but it helps us to
 939  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 940  *       outside such code.
 941  */
 942 static inline double invSingleAccuracy(double x)
 943 {
 944     return 1.0F / x;
 945 }
 946
 947 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
 948  *
 949  *  This routine only evaluates 1/sqrt(x) if mask is true.
 950  *  Illegal values for a masked-out double will not lead to
 951  *  floating-point exceptions.
 952  *
 953  *  \param x Argument that must be >0 if masked-in.
 954  *  \param m Mask
 955  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 956  *          entry was not masked, and 0.0 for masked-out entries.
 957  *
 958  * \note This function might be superficially meaningless, but it helps us to
 959  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 960  *       outside such code.
 961  */
 962 static inline double maskzInvsqrtSingleAccuracy(double x, bool m)
 963 {
 964     return m ? invsqrtSingleAccuracy(x) : 0.0;
 965 }
 966
 967 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
 968  *
 969  *  This routine only evaluates 1/x if mask is true.
 970  *  Illegal values for a masked-out double will not lead to
 971  *  floating-point exceptions.
 972  *
 973  *  \param x Argument that must be nonzero if masked-in.
 974  *  \param m Mask
 975  *  \return 1/x. Result is undefined if your argument was invalid or
 976  *          entry was not masked, and 0.0 for masked-out entries.
 977  *
 978  * \note This function might be superficially meaningless, but it helps us to
 979  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 980  *       outside such code.
 981  */
 982 static inline double maskzInvSingleAccuracy(double x, bool m)
 983 {
 984     return m ? invSingleAccuracy(x) : 0.0;
 985 }
 986
 987 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
 988  *
 989  *  \param x Argument that must be >=0.
 990  *  \return sqrt(x).
 991  *
 992  * \note This function might be superficially meaningless, but it helps us to
 993  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 994  *       outside such code.
 995  */
 996 static inline double sqrtSingleAccuracy(double x)
 997 {
 998     return std::sqrt(static_cast<float>(x));
 999 }
1000
1001 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
1002  *
1003  * \param x Argument, should be >0.
1004  * \result The natural logarithm of x. Undefined if argument is invalid.
1005  *
1006  * \note This function might be superficially meaningless, but it helps us to
1007  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1008  *       outside such code.
1009  */
1010 static inline double logSingleAccuracy(double x)
1011 {
1012     return std::log(static_cast<float>(x));
1013 }
1014
1015 /*! \brief Double 2^x, but with single accuracy.
1016  *
1017  * \param x Argument.
1018  * \result 2^x. Undefined if input argument caused overflow.
1019  *
1020  * \note This function might be superficially meaningless, but it helps us to
1021  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1022  *       outside such code.
1023  */
1024 static inline double exp2SingleAccuracy(double x)
1025 {
1026     return std::exp2(static_cast<float>(x));
1027 }
1028
1029 /*! \brief Double exp(x), but with single accuracy.
1030  *
1031  * \param x Argument.
1032  * \result exp(x). Undefined if input argument caused overflow.
1033  *
1034  * \note This function might be superficially meaningless, but it helps us to
1035  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1036  *       outside such code.
1037  */
1038 static inline double expSingleAccuracy(double x)
1039 {
1040     return std::exp(static_cast<float>(x));
1041 }
1042
1043 /*! \brief Double erf(x), but with single accuracy.
1044  *
1045  * \param x Argument.
1046  * \result erf(x)
1047  *
1048  * \note This function might be superficially meaningless, but it helps us to
1049  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1050  *       outside such code.
1051  */
1052 static inline double erfSingleAccuracy(double x)
1053 {
1054     return std::erf(static_cast<float>(x));
1055 }
1056
1057 /*! \brief Double erfc(x), but with single accuracy.
1058  *
1059  * \param x Argument.
1060  * \result erfc(x)
1061  *
1062  * \note This function might be superficially meaningless, but it helps us to
1063  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1064  *       outside such code.
1065  */
1066 static inline double erfcSingleAccuracy(double x)
1067 {
1068     return std::erfc(static_cast<float>(x));
1069 }
1070
1071
1072 /*! \brief Double sin \& cos, but with single accuracy.
1073  *
1074  * \param x The argument to evaluate sin/cos for
1075  * \param[out] sinval Sin(x)
1076  * \param[out] cosval Cos(x)
1077  *
1078  * \note This function might be superficially meaningless, but it helps us to
1079  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1080  *       outside such code.
1081  */
1082 static inline void sincosSingleAccuracy(double x, double* sinval, double* cosval)
1083 {
1084     // There is no single-precision sincos guaranteed in C++11, so use
1085     // separate functions and hope the compiler optimizes it for us.
1086     *sinval = std::sin(static_cast<float>(x));
1087     *cosval = std::cos(static_cast<float>(x));
1088 }
1089
1090 /*! \brief Double sin, but with single accuracy.
1091  *
1092  * \param x The argument to evaluate sin for
1093  * \result Sin(x)
1094  *
1095  * \note This function might be superficially meaningless, but it helps us to
1096  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1097  *       outside such code.
1098  */
1099 static inline double sinSingleAccuracy(double x)
1100 {
1101     return std::sin(static_cast<float>(x));
1102 }
1103
1104 /*! \brief Double cos, but with single accuracy.
1105  *
1106  * \param x The argument to evaluate cos for
1107  * \result Cos(x)
1108  *
1109  * \note This function might be superficially meaningless, but it helps us to
1110  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1111  *       outside such code.
1112  */
1113 static inline double cosSingleAccuracy(double x)
1114 {
1115     return std::cos(static_cast<float>(x));
1116 }
1117
1118 /*! \brief Double tan, but with single accuracy.
1119  *
1120  * \param x The argument to evaluate tan for
1121  * \result Tan(x)
1122  *
1123  * \note This function might be superficially meaningless, but it helps us to
1124  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1125  *       outside such code.
1126  */
1127 static inline double tanSingleAccuracy(double x)
1128 {
1129     return std::tan(static_cast<float>(x));
1130 }
1131
1132 /*! \brief Double asin, but with single accuracy.
1133  *
1134  * \param x The argument to evaluate asin for
1135  * \result Asin(x)
1136  *
1137  * \note This function might be superficially meaningless, but it helps us to
1138  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1139  *       outside such code.
1140  */
1141 static inline double asinSingleAccuracy(double x)
1142 {
1143     return std::asin(static_cast<float>(x));
1144 }
1145
1146 /*! \brief Double acos, but with single accuracy.
1147  *
1148  * \param x The argument to evaluate acos for
1149  * \result Acos(x)
1150  *
1151  * \note This function might be superficially meaningless, but it helps us to
1152  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1153  *       outside such code.
1154  */
1155 static inline double acosSingleAccuracy(double x)
1156 {
1157     return std::acos(static_cast<float>(x));
1158 }
1159
1160 /*! \brief Double atan, but with single accuracy.
1161  *
1162  * \param x The argument to evaluate atan for
1163  * \result Atan(x)
1164  *
1165  * \note This function might be superficially meaningless, but it helps us to
1166  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1167  *       outside such code.
1168  */
1169 static inline double atanSingleAccuracy(double x)
1170 {
1171     return std::atan(static_cast<float>(x));
1172 }
1173
1174 /*! \brief Double atan2(y,x), but with single accuracy.
1175  *
1176  * \param y Y component of vector, any quartile
1177  * \param x X component of vector, any quartile
1178  * \result Atan(y,x)
1179  *
1180  * \note This function might be superficially meaningless, but it helps us to
1181  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1182  *       outside such code.
1183  */
1184 static inline double atan2SingleAccuracy(double y, double x)
1185 {
1186     return std::atan2(static_cast<float>(y), static_cast<float>(x));
1187 }
1188
1189 /*! \brief Force correction due to PME in double, but with single accuracy.
1190  *
1191  * See the SIMD version of this function for details.
1192  *
1193  * \param z2 input parameter
1194  * \returns Correction to use on force
1195  *
1196  * \note This function might be superficially meaningless, but it helps us to
1197  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1198  *       outside such code.
1199  */
1200 static inline double pmeForceCorrectionSingleAccuracy(double z2)
1201 {
1202     const float FN6(-1.7357322914161492954e-8F);
1203     const float FN5(1.4703624142580877519e-6F);
1204     const float FN4(-0.000053401640219807709149F);
1205     const float FN3(0.0010054721316683106153F);
1206     const float FN2(-0.019278317264888380590F);
1207     const float FN1(0.069670166153766424023F);
1208     const float FN0(-0.75225204789749321333F);
1209
1210     const float FD4(0.0011193462567257629232F);
1211     const float FD3(0.014866955030185295499F);
1212     const float FD2(0.11583842382862377919F);
1213     const float FD1(0.50736591960530292870F);
1214     const float FD0(1.0F);
1215
1216     float z4;
1217     float polyFN0, polyFN1, polyFD0, polyFD1;
1218
1219     float z2f = z2;
1220
1221     z4 = z2f * z2f;
1222
1223     polyFD0 = fma(FD4, z4, FD2);
1224     polyFD1 = fma(FD3, z4, FD1);
1225     polyFD0 = fma(polyFD0, z4, FD0);
1226     polyFD0 = fma(polyFD1, z2f, polyFD0);
1227
1228     polyFN0 = fma(FN6, z4, FN4);
1229     polyFN1 = fma(FN5, z4, FN3);
1230     polyFN0 = fma(polyFN0, z4, FN2);
1231     polyFN1 = fma(polyFN1, z4, FN1);
1232     polyFN0 = fma(polyFN0, z4, FN0);
1233     polyFN0 = fma(polyFN1, z2f, polyFN0);
1234
1235     return polyFN0 / polyFD0;
1236 }
1237
1238 /*! \brief Potential correction due to PME in double, but with single accuracy.
1239  *
1240  * See the SIMD version of this function for details.
1241  *
1242  * \param z2 input parameter
1243  * \returns Correction to use on potential.
1244  *
1245  * \note This function might be superficially meaningless, but it helps us to
1246  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1247  *       outside such code.
1248  */
1249 static inline double pmePotentialCorrectionSingleAccuracy(double z2)
1250 {
1251     const float VN6(1.9296833005951166339e-8F);
1252     const float VN5(-1.4213390571557850962e-6F);
1253     const float VN4(0.000041603292906656984871F);
1254     const float VN3(-0.00013134036773265025626F);
1255     const float VN2(0.038657983986041781264F);
1256     const float VN1(0.11285044772717598220F);
1257     const float VN0(1.1283802385263030286F);
1258
1259     const float VD3(0.0066752224023576045451F);
1260     const float VD2(0.078647795836373922256F);
1261     const float VD1(0.43336185284710920150F);
1262     const float VD0(1.0F);
1263
1264     float z4;
1265     float polyVN0, polyVN1, polyVD0, polyVD1;
1266
1267     float z2f = z2;
1268
1269     z4 = z2f * z2f;
1270
1271     polyVD1 = fma(VD3, z4, VD1);
1272     polyVD0 = fma(VD2, z4, VD0);
1273     polyVD0 = fma(polyVD1, z2f, polyVD0);
1274
1275     polyVN0 = fma(VN6, z4, VN4);
1276     polyVN1 = fma(VN5, z4, VN3);
1277     polyVN0 = fma(polyVN0, z4, VN2);
1278     polyVN1 = fma(polyVN1, z4, VN1);
1279     polyVN0 = fma(polyVN0, z4, VN0);
1280     polyVN0 = fma(polyVN1, z2f, polyVN0);
1281
1282     return polyVN0 / polyVD0;
1283 }
1284
1285
1286 } // namespace gmx
1287
1288
1289 #endif // GMX_SIMD_SCALAR_MATH_H