src/gromacs/simd/scalar/scalar_math.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifndef GMX_SIMD_SCALAR_MATH_H
  36 #define GMX_SIMD_SCALAR_MATH_H
  37
  38 #include <cmath>
  39
  40 #include "gromacs/math/functions.h"
  41 #include "gromacs/math/utilities.h"
  42 #include "gromacs/simd/scalar/scalar.h"
  43
  44 /*! \libinternal \file
  45  *
  46  * \brief Scalar math functions mimicking GROMACS SIMD math functions
  47  *
  48  * These versions make it possible to write functions that are templated with
  49  * either a SIMD or scalar type. While some of these functions might not appear
  50  * SIMD-specific, we have placed them here because the only reason to use these
  51  * instead of generic function is in templated combined SIMD/non-SIMD code.
  52  * It is important that these functions match the SIMD versions exactly in their
  53  * arguments and template arguments so that overload resolution works correctly.
  54  *
  55  * \author Erik Lindahl <erik.lindahl@gmail.com>
  56  *
  57  * \inlibraryapi
  58  * \ingroup module_simd
  59  */
  60
  61 namespace gmx
  62 {
  63
  64 /*****************************************************************************
  65  *   Single-precision floating point math functions mimicking SIMD versions  *
  66  *****************************************************************************/
  67
  68
  69 /*! \brief Composes single value with the magnitude of x and the sign of y.
  70  *
  71  * \param x Value to set sign for
  72  * \param y Value used to set sign
  73  * \return  Magnitude of x, sign of y
  74  *
  75  * \note This function might be superficially meaningless, but it helps us to
  76  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  77  *       outside such code.
  78  */
  79 static inline float
  80 copysign(float x, float y)
  81 {
  82     return std::copysign(x, y);
  83 }
  84
  85 // invsqrt(x) is already defined in math/functions.h
  86
  87 /*! \brief Calculate 1/sqrt(x) for two floats.
  88  *
  89  * \param x0  First argument, x0 must be positive - no argument checking.
  90  * \param x1  Second argument, x1 must be positive - no argument checking.
  91  * \param[out] out0  Result 1/sqrt(x0)
  92  * \param[out] out1  Result 1/sqrt(x1)
  93  *
  94  * \note This function might be superficially meaningless, but it helps us to
  95  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  96  *       outside such code.
  97  */
  98 static inline void
  99 invsqrtPair(float x0,    float x1,
 100             float *out0, float *out1)
 101 {
 102     *out0 = invsqrt(x0);
 103     *out1 = invsqrt(x1);
 104 }
 105
 106 /*! \brief Calculate 1/x for float.
 107  *
 108  *  \param x Argument that must be nonzero. This routine does not check arguments.
 109  *  \return 1/x. Result is undefined if your argument was invalid.
 110  *
 111  * \note This function might be superficially meaningless, but it helps us to
 112  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 113  *       outside such code.
 114  */
 115 static inline float
 116 inv(float x)
 117 {
 118     return 1.0f/x;
 119 }
 120
 121 /*! \brief Calculate 1/sqrt(x) for masked entry of float.
 122  *
 123  *  This routine only evaluates 1/sqrt(x) if mask is true.
 124  *  Illegal values for a masked-out float will not lead to
 125  *  floating-point exceptions.
 126  *
 127  *  \param x Argument that must be >0 if masked-in.
 128  *  \param m Mask
 129  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 130  *          entry was not masked, and 0.0 for masked-out entries.
 131  *
 132  * \note This function might be superficially meaningless, but it helps us to
 133  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 134  *       outside such code.
 135  */
 136 static inline float
 137 maskzInvsqrt(float x, bool m)
 138 {
 139     return m ? invsqrt(x) : 0.0f;
 140 }
 141
 142 /*! \brief Calculate 1/x for masked entry of float.
 143  *
 144  *  This routine only evaluates 1/x if mask is true.
 145  *  Illegal values for a masked-out float will not lead to
 146  *  floating-point exceptions.
 147  *
 148  *  \param x Argument that must be nonzero if masked-in.
 149  *  \param m Mask
 150  *  \return 1/x. Result is undefined if your argument was invalid or
 151  *          entry was not masked, and 0.0 for masked-out entries.
 152  *
 153  * \note This function might be superficially meaningless, but it helps us to
 154  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 155  *       outside such code.
 156  */
 157 static inline float
 158 maskzInv(float x, bool m)
 159 {
 160     return m ? inv(x) : 0.0f;
 161 }
 162
 163 /*! \brief Float sqrt(x). This is the square root.
 164  *
 165  * \param x Argument, should be >= 0.
 166  * \result The square root of x. Undefined if argument is invalid.
 167  *
 168  * \note This function might be superficially meaningless, but it helps us to
 169  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 170  *       outside such code.
 171  */
 172 template <MathOptimization opt = MathOptimization::Safe>
 173 static inline float
 174 sqrt(float x)
 175 {
 176     return std::sqrt(x);
 177 }
 178
 179 /*! \brief Float log(x). This is the natural logarithm.
 180  *
 181  * \param x Argument, should be >0.
 182  * \result The natural logarithm of x. Undefined if argument is invalid.
 183  *
 184  * \note This function might be superficially meaningless, but it helps us to
 185  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 186  *       outside such code.
 187  */
 188 static inline float
 189 log(float x)
 190 {
 191     return std::log(x);
 192 }
 193
 194 /*! \brief Float 2^x.
 195  *
 196  * \param x Argument.
 197  * \result 2^x. Undefined if input argument caused overflow.
 198  *
 199  * \note This function might be superficially meaningless, but it helps us to
 200  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 201  *       outside such code.
 202  */
 203 template <MathOptimization opt = MathOptimization::Safe>
 204 static inline float
 205 exp2(float x)
 206 {
 207     return std::exp2(x);
 208 }
 209
 210 /*! \brief Float exp(x).
 211  *
 212  * \param x Argument.
 213  * \result exp(x). Undefined if input argument caused overflow.
 214  *
 215  * \note This function might be superficially meaningless, but it helps us to
 216  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 217  *       outside such code.
 218  */
 219 template <MathOptimization opt = MathOptimization::Safe>
 220 static inline float
 221 exp(float x)
 222 {
 223     return std::exp(x);
 224 }
 225
 226 /*! \brief Float erf(x).
 227  *
 228  * \param x Argument.
 229  * \result erf(x)
 230  *
 231  * \note This function might be superficially meaningless, but it helps us to
 232  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 233  *       outside such code.
 234  */
 235 static inline float
 236 erf(float x)
 237 {
 238     return std::erf(x);
 239 }
 240
 241 /*! \brief Float erfc(x).
 242  *
 243  * \param x Argument.
 244  * \result erfc(x)
 245  *
 246  * \note This function might be superficially meaningless, but it helps us to
 247  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 248  *       outside such code.
 249  */
 250 static inline float
 251 erfc(float x)
 252 {
 253     return std::erfc(x);
 254 }
 255
 256
 257 /*! \brief Float sin \& cos.
 258  *
 259  * \param x The argument to evaluate sin/cos for
 260  * \param[out] sinval Sin(x)
 261  * \param[out] cosval Cos(x)
 262  *
 263  * \note This function might be superficially meaningless, but it helps us to
 264  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 265  *       outside such code.
 266  */
 267 static inline void
 268 sincos(float x, float *sinval, float *cosval)
 269 {
 270     *sinval = std::sin(x);
 271     *cosval = std::cos(x);
 272 }
 273
 274 /*! \brief Float sin.
 275  *
 276  * \param x The argument to evaluate sin for
 277  * \result Sin(x)
 278  *
 279  * \note This function might be superficially meaningless, but it helps us to
 280  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 281  *       outside such code.
 282  */
 283 static inline float
 284 sin(float x)
 285 {
 286     return std::sin(x);
 287 }
 288
 289 /*! \brief Float cos.
 290  *
 291  * \param x The argument to evaluate cos for
 292  * \result Cos(x)
 293  *
 294  * \note This function might be superficially meaningless, but it helps us to
 295  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 296  *       outside such code.
 297  */
 298 static inline float
 299 cos(float x)
 300 {
 301     return std::cos(x);
 302 }
 303
 304 /*! \brief Float tan.
 305  *
 306  * \param x The argument to evaluate tan for
 307  * \result Tan(x)
 308  *
 309  * \note This function might be superficially meaningless, but it helps us to
 310  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 311  *       outside such code.
 312  */
 313 static inline float
 314 tan(float x)
 315 {
 316     return std::tan(x);
 317 }
 318
 319 /*! \brief float asin.
 320  *
 321  * \param x The argument to evaluate asin for
 322  * \result Asin(x)
 323  *
 324  * \note This function might be superficially meaningless, but it helps us to
 325  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 326  *       outside such code.
 327  */
 328 static inline float
 329 asin(float x)
 330 {
 331     return std::asin(x);
 332 }
 333
 334 /*! \brief Float acos.
 335  *
 336  * \param x The argument to evaluate acos for
 337  * \result Acos(x)
 338  *
 339  * \note This function might be superficially meaningless, but it helps us to
 340  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 341  *       outside such code.
 342  */
 343 static inline float
 344 acos(float x)
 345 {
 346     return std::acos(x);
 347 }
 348
 349 /*! \brief Float atan.
 350  *
 351  * \param x The argument to evaluate atan for
 352  * \result Atan(x)
 353  *
 354  * \note This function might be superficially meaningless, but it helps us to
 355  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 356  *       outside such code.
 357  */
 358 static inline float
 359 atan(float x)
 360 {
 361     return std::atan(x);
 362 }
 363
 364 /*! \brief Float atan2(y,x).
 365  *
 366  * \param y Y component of vector, any quartile
 367  * \param x X component of vector, any quartile
 368  * \result Atan(y,x)
 369  *
 370  * \note This function might be superficially meaningless, but it helps us to
 371  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 372  *       outside such code.
 373  */
 374 static inline float
 375 atan2(float y, float x)
 376 {
 377     return std::atan2(y, x);
 378 }
 379
 380 /*! \brief Calculate the force correction due to PME analytically in float.
 381  *
 382  * See the SIMD version of this function for details.
 383  *
 384  * \param z2 input parameter
 385  * \returns Correction to use on force
 386  *
 387  * \note This function might be superficially meaningless, but it helps us to
 388  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 389  *       outside such code.
 390  */
 391 static inline float
 392 pmeForceCorrection(float z2)
 393 {
 394     const float  FN6(-1.7357322914161492954e-8f);
 395     const float  FN5(1.4703624142580877519e-6f);
 396     const float  FN4(-0.000053401640219807709149f);
 397     const float  FN3(0.0010054721316683106153f);
 398     const float  FN2(-0.019278317264888380590f);
 399     const float  FN1(0.069670166153766424023f);
 400     const float  FN0(-0.75225204789749321333f);
 401
 402     const float  FD4(0.0011193462567257629232f);
 403     const float  FD3(0.014866955030185295499f);
 404     const float  FD2(0.11583842382862377919f);
 405     const float  FD1(0.50736591960530292870f);
 406     const float  FD0(1.0f);
 407
 408     float        z4;
 409     float        polyFN0, polyFN1, polyFD0, polyFD1;
 410
 411     z4             = z2 * z2;
 412
 413     polyFD0        = fma(FD4, z4, FD2);
 414     polyFD1        = fma(FD3, z4, FD1);
 415     polyFD0        = fma(polyFD0, z4, FD0);
 416     polyFD0        = fma(polyFD1, z2, polyFD0);
 417
 418     polyFN0        = fma(FN6, z4, FN4);
 419     polyFN1        = fma(FN5, z4, FN3);
 420     polyFN0        = fma(polyFN0, z4, FN2);
 421     polyFN1        = fma(polyFN1, z4, FN1);
 422     polyFN0        = fma(polyFN0, z4, FN0);
 423     polyFN0        = fma(polyFN1, z2, polyFN0);
 424
 425     return polyFN0 / polyFD0;
 426 }
 427
 428 /*! \brief Calculate the potential correction due to PME analytically in float.
 429  *
 430  * See the SIMD version of this function for details.
 431  *
 432  * \param z2 input parameter
 433  * \returns Correction to use on potential.
 434  *
 435  * \note This function might be superficially meaningless, but it helps us to
 436  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 437  *       outside such code.
 438  */
 439 static inline float
 440 pmePotentialCorrection(float z2)
 441 {
 442     const float  VN6(1.9296833005951166339e-8f);
 443     const float  VN5(-1.4213390571557850962e-6f);
 444     const float  VN4(0.000041603292906656984871f);
 445     const float  VN3(-0.00013134036773265025626f);
 446     const float  VN2(0.038657983986041781264f);
 447     const float  VN1(0.11285044772717598220f);
 448     const float  VN0(1.1283802385263030286f);
 449
 450     const float  VD3(0.0066752224023576045451f);
 451     const float  VD2(0.078647795836373922256f);
 452     const float  VD1(0.43336185284710920150f);
 453     const float  VD0(1.0f);
 454
 455     float        z4;
 456     float        polyVN0, polyVN1, polyVD0, polyVD1;
 457
 458     z4             = z2 * z2;
 459
 460     polyVD1        = fma(VD3, z4, VD1);
 461     polyVD0        = fma(VD2, z4, VD0);
 462     polyVD0        = fma(polyVD1, z2, polyVD0);
 463
 464     polyVN0        = fma(VN6, z4, VN4);
 465     polyVN1        = fma(VN5, z4, VN3);
 466     polyVN0        = fma(polyVN0, z4, VN2);
 467     polyVN1        = fma(polyVN1, z4, VN1);
 468     polyVN0        = fma(polyVN0, z4, VN0);
 469     polyVN0        = fma(polyVN1, z2, polyVN0);
 470
 471     return polyVN0 / polyVD0;
 472 }
 473
 474 /*****************************************************************************
 475  *   Double-precision floating point math functions mimicking SIMD versions  *
 476  *****************************************************************************/
 477
 478
 479 /*! \brief Composes double value with the magnitude of x and the sign of y.
 480  *
 481  * \param x Value to set sign for
 482  * \param y Value used to set sign
 483  * \return  Magnitude of x, sign of y
 484  *
 485  * \note This function might be superficially meaningless, but it helps us to
 486  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 487  *       outside such code.
 488  */
 489 static inline double
 490 copysign(double x, double y)
 491 {
 492     return std::copysign(x, y);
 493 }
 494
 495 // invsqrt(x) is already defined in math/functions.h
 496
 497 /*! \brief Calculate 1/sqrt(x) for two doubles.
 498  *
 499  * \param x0  First argument, x0 must be positive - no argument checking.
 500  * \param x1  Second argument, x1 must be positive - no argument checking.
 501  * \param[out] out0  Result 1/sqrt(x0)
 502  * \param[out] out1  Result 1/sqrt(x1)
 503  *
 504  * \note This function might be superficially meaningless, but it helps us to
 505  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 506  *       outside such code.
 507  */
 508 static inline void
 509 invsqrtPair(double x0,    double x1,
 510             double *out0, double *out1)
 511 {
 512     *out0 = invsqrt(x0);
 513     *out1 = invsqrt(x1);
 514 }
 515
 516 /*! \brief Calculate 1/x for double.
 517  *
 518  *  \param x Argument that must be nonzero. This routine does not check arguments.
 519  *  \return 1/x. Result is undefined if your argument was invalid.
 520  *
 521  * \note This function might be superficially meaningless, but it helps us to
 522  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 523  *       outside such code.
 524  */
 525 static inline double
 526 inv(double x)
 527 {
 528     return 1.0/x;
 529 }
 530
 531 /*! \brief Calculate 1/sqrt(x) for masked entry of double.
 532  *
 533  *  This routine only evaluates 1/sqrt(x) if mask is true.
 534  *  Illegal values for a masked-out double will not lead to
 535  *  floating-point exceptions.
 536  *
 537  *  \param x Argument that must be >0 if masked-in.
 538  *  \param m Mask
 539  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 540  *          entry was not masked, and 0.0 for masked-out entries.
 541  *
 542  * \note This function might be superficially meaningless, but it helps us to
 543  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 544  *       outside such code.
 545  */
 546 static inline double
 547 maskzInvsqrt(double x, bool m)
 548 {
 549     return m ? invsqrt(x) : 0.0;
 550 }
 551
 552 /*! \brief Calculate 1/x for masked entry of double.
 553  *
 554  *  This routine only evaluates 1/x if mask is true.
 555  *  Illegal values for a masked-out double will not lead to
 556  *  floating-point exceptions.
 557  *
 558  *  \param x Argument that must be nonzero if masked-in.
 559  *  \param m Mask
 560  *  \return 1/x. Result is undefined if your argument was invalid or
 561  *          entry was not masked, and 0.0 for masked-out entries.
 562  *
 563  * \note This function might be superficially meaningless, but it helps us to
 564  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 565  *       outside such code.
 566  */
 567 static inline double
 568 maskzInv(double x, bool m)
 569 {
 570     return m ? inv(x) : 0.0;
 571 }
 572
 573 /*! \brief Double sqrt(x). This is the square root.
 574  *
 575  * \param x Argument, should be >= 0.
 576  * \result The square root of x. Undefined if argument is invalid.
 577  *
 578  * \note This function might be superficially meaningless, but it helps us to
 579  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 580  *       outside such code.
 581  */
 582 template <MathOptimization opt = MathOptimization::Safe>
 583 static inline double
 584 sqrt(double x)
 585 {
 586     return std::sqrt(x);
 587 }
 588
 589 /*! \brief Double log(x). This is the natural logarithm.
 590  *
 591  * \param x Argument, should be >0.
 592  * \result The natural logarithm of x. Undefined if argument is invalid.
 593  *
 594  * \note This function might be superficially meaningless, but it helps us to
 595  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 596  *       outside such code.
 597  */
 598 static inline double
 599 log(double x)
 600 {
 601     return std::log(x);
 602 }
 603
 604 /*! \brief Double 2^x.
 605  *
 606  * \param x Argument.
 607  * \result 2^x. Undefined if input argument caused overflow.
 608  *
 609  * \note This function might be superficially meaningless, but it helps us to
 610  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 611  *       outside such code.
 612  */
 613 template <MathOptimization opt = MathOptimization::Safe>
 614 static inline double
 615 exp2(double x)
 616 {
 617     return std::exp2(x);
 618 }
 619
 620 /*! \brief Double exp(x).
 621  *
 622  * \param x Argument.
 623  * \result exp(x). Undefined if input argument caused overflow.
 624  *
 625  * \note This function might be superficially meaningless, but it helps us to
 626  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 627  *       outside such code.
 628  */
 629 template <MathOptimization opt = MathOptimization::Safe>
 630 static inline double
 631 exp(double x)
 632 {
 633     return std::exp(x);
 634 }
 635
 636 /*! \brief Double erf(x).
 637  *
 638  * \param x Argument.
 639  * \result erf(x)
 640  *
 641  * \note This function might be superficially meaningless, but it helps us to
 642  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 643  *       outside such code.
 644  */
 645 static inline double
 646 erf(double x)
 647 {
 648     return std::erf(x);
 649 }
 650
 651 /*! \brief Double erfc(x).
 652  *
 653  * \param x Argument.
 654  * \result erfc(x)
 655  *
 656  * \note This function might be superficially meaningless, but it helps us to
 657  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 658  *       outside such code.
 659  */
 660 static inline double
 661 erfc(double x)
 662 {
 663     return std::erfc(x);
 664 }
 665
 666
 667 /*! \brief Double sin \& cos.
 668  *
 669  * \param x The argument to evaluate sin/cos for
 670  * \param[out] sinval Sin(x)
 671  * \param[out] cosval Cos(x)
 672  *
 673  * \note This function might be superficially meaningless, but it helps us to
 674  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 675  *       outside such code.
 676  */
 677 static inline void
 678 sincos(double x, double *sinval, double *cosval)
 679 {
 680     *sinval = std::sin(x);
 681     *cosval = std::cos(x);
 682 }
 683
 684 /*! \brief Double sin.
 685  *
 686  * \param x The argument to evaluate sin for
 687  * \result Sin(x)
 688  *
 689  * \note This function might be superficially meaningless, but it helps us to
 690  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 691  *       outside such code.
 692  */
 693 static inline double
 694 sin(double x)
 695 {
 696     return std::sin(x);
 697 }
 698
 699 /*! \brief Double cos.
 700  *
 701  * \param x The argument to evaluate cos for
 702  * \result Cos(x)
 703  *
 704  * \note This function might be superficially meaningless, but it helps us to
 705  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 706  *       outside such code.
 707  */
 708 static inline double
 709 cos(double x)
 710 {
 711     return std::cos(x);
 712 }
 713
 714 /*! \brief Double tan.
 715  *
 716  * \param x The argument to evaluate tan for
 717  * \result Tan(x)
 718  *
 719  * \note This function might be superficially meaningless, but it helps us to
 720  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 721  *       outside such code.
 722  */
 723 static inline double
 724 tan(double x)
 725 {
 726     return std::tan(x);
 727 }
 728
 729 /*! \brief Double asin.
 730  *
 731  * \param x The argument to evaluate asin for
 732  * \result Asin(x)
 733  *
 734  * \note This function might be superficially meaningless, but it helps us to
 735  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 736  *       outside such code.
 737  */
 738 static inline double
 739 asin(double x)
 740 {
 741     return std::asin(x);
 742 }
 743
 744 /*! \brief Double acos.
 745  *
 746  * \param x The argument to evaluate acos for
 747  * \result Acos(x)
 748  *
 749  * \note This function might be superficially meaningless, but it helps us to
 750  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 751  *       outside such code.
 752  */
 753 static inline double
 754 acos(double x)
 755 {
 756     return std::acos(x);
 757 }
 758
 759 /*! \brief Double atan.
 760  *
 761  * \param x The argument to evaluate atan for
 762  * \result Atan(x)
 763  *
 764  * \note This function might be superficially meaningless, but it helps us to
 765  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 766  *       outside such code.
 767  */
 768 static inline double
 769 atan(double x)
 770 {
 771     return std::atan(x);
 772 }
 773
 774 /*! \brief Double atan2(y,x).
 775  *
 776  * \param y Y component of vector, any quartile
 777  * \param x X component of vector, any quartile
 778  * \result Atan(y,x)
 779  *
 780  * \note This function might be superficially meaningless, but it helps us to
 781  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 782  *       outside such code.
 783  */
 784 static inline double
 785 atan2(double y, double x)
 786 {
 787     return std::atan2(y, x);
 788 }
 789
 790 /*! \brief Calculate the force correction due to PME analytically in double.
 791  *
 792  * See the SIMD version of this function for details.
 793  *
 794  * \param z2 input parameter
 795  * \returns Correction to use on force
 796  *
 797  * \note This function might be superficially meaningless, but it helps us to
 798  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 799  *       outside such code.
 800  */
 801 static inline double
 802 pmeForceCorrection(double z2)
 803 {
 804     const double  FN10(-8.0072854618360083154e-14);
 805     const double  FN9(1.1859116242260148027e-11);
 806     const double  FN8(-8.1490406329798423616e-10);
 807     const double  FN7(3.4404793543907847655e-8);
 808     const double  FN6(-9.9471420832602741006e-7);
 809     const double  FN5(0.000020740315999115847456);
 810     const double  FN4(-0.00031991745139313364005);
 811     const double  FN3(0.0035074449373659008203);
 812     const double  FN2(-0.031750380176100813405);
 813     const double  FN1(0.13884101728898463426);
 814     const double  FN0(-0.75225277815249618847);
 815
 816     const double  FD5(0.000016009278224355026701);
 817     const double  FD4(0.00051055686934806966046);
 818     const double  FD3(0.0081803507497974289008);
 819     const double  FD2(0.077181146026670287235);
 820     const double  FD1(0.41543303143712535988);
 821     const double  FD0(1.0);
 822
 823     double        z4;
 824     double        polyFN0, polyFN1, polyFD0, polyFD1;
 825
 826     z4             = z2 * z2;
 827
 828     polyFD1        = fma(FD5, z4, FD3);
 829     polyFD1        = fma(polyFD1, z4, FD1);
 830     polyFD1        = polyFD1 * z2;
 831     polyFD0        = fma(FD4, z4, FD2);
 832     polyFD0        = fma(polyFD0, z4, FD0);
 833     polyFD0        = polyFD0 + polyFD1;
 834
 835     polyFD0        = inv(polyFD0);
 836
 837     polyFN0        = fma(FN10, z4, FN8);
 838     polyFN0        = fma(polyFN0, z4, FN6);
 839     polyFN0        = fma(polyFN0, z4, FN4);
 840     polyFN0        = fma(polyFN0, z4, FN2);
 841     polyFN0        = fma(polyFN0, z4, FN0);
 842     polyFN1        = fma(FN9, z4, FN7);
 843     polyFN1        = fma(polyFN1, z4, FN5);
 844     polyFN1        = fma(polyFN1, z4, FN3);
 845     polyFN1        = fma(polyFN1, z4, FN1);
 846     polyFN0        = fma(polyFN1, z2, polyFN0);
 847
 848     return polyFN0 * polyFD0;
 849 }
 850
 851 /*! \brief Calculate the potential correction due to PME analytically in double.
 852  *
 853  * See the SIMD version of this function for details.
 854  *
 855  * \param z2 input parameter
 856  * \returns Correction to use on potential.
 857  *
 858  * \note This function might be superficially meaningless, but it helps us to
 859  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 860  *       outside such code.
 861  */
 862 static inline double
 863 pmePotentialCorrection(double z2)
 864 {
 865     const double  VN9(-9.3723776169321855475e-13);
 866     const double  VN8(1.2280156762674215741e-10);
 867     const double  VN7(-7.3562157912251309487e-9);
 868     const double  VN6(2.6215886208032517509e-7);
 869     const double  VN5(-4.9532491651265819499e-6);
 870     const double  VN4(0.00025907400778966060389);
 871     const double  VN3(0.0010585044856156469792);
 872     const double  VN2(0.045247661136833092885);
 873     const double  VN1(0.11643931522926034421);
 874     const double  VN0(1.1283791671726767970);
 875
 876     const double  VD5(0.000021784709867336150342);
 877     const double  VD4(0.00064293662010911388448);
 878     const double  VD3(0.0096311444822588683504);
 879     const double  VD2(0.085608012351550627051);
 880     const double  VD1(0.43652499166614811084);
 881     const double  VD0(1.0);
 882
 883     double        z4;
 884     double        polyVN0, polyVN1, polyVD0, polyVD1;
 885
 886     z4             = z2 * z2;
 887
 888     polyVD1        = fma(VD5, z4, VD3);
 889     polyVD0        = fma(VD4, z4, VD2);
 890     polyVD1        = fma(polyVD1, z4, VD1);
 891     polyVD0        = fma(polyVD0, z4, VD0);
 892     polyVD0        = fma(polyVD1, z2, polyVD0);
 893
 894     polyVD0        = inv(polyVD0);
 895
 896     polyVN1        = fma(VN9, z4, VN7);
 897     polyVN0        = fma(VN8, z4, VN6);
 898     polyVN1        = fma(polyVN1, z4, VN5);
 899     polyVN0        = fma(polyVN0, z4, VN4);
 900     polyVN1        = fma(polyVN1, z4, VN3);
 901     polyVN0        = fma(polyVN0, z4, VN2);
 902     polyVN1        = fma(polyVN1, z4, VN1);
 903     polyVN0        = fma(polyVN0, z4, VN0);
 904     polyVN0        = fma(polyVN1, z2, polyVN0);
 905
 906     return polyVN0 * polyVD0;
 907 }
 908
 909
 910 /*****************************************************************************
 911  *           Floating point math functions mimicking SIMD versions.          *
 912  *              Double precision data, single precision accuracy.            *
 913  *****************************************************************************/
 914
 915
 916 /*! \brief Calculate 1/sqrt(x) for double, but with single accuracy.
 917  *
 918  *  \param x Argument that must be >0. This routine does not check arguments.
 919  *  \return 1/sqrt(x). Result is undefined if your argument was invalid.
 920  *
 921  * \note This function might be superficially meaningless, but it helps us to
 922  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 923  *       outside such code.
 924  */
 925 static inline double
 926 invsqrtSingleAccuracy(double x)
 927 {
 928     return invsqrt(static_cast<float>(x));
 929 }
 930
 931 /*! \brief Calculate 1/sqrt(x) for two doubles, but with single accuracy.
 932  *
 933  * \param x0  First argument, x0 must be positive - no argument checking.
 934  * \param x1  Second argument, x1 must be positive - no argument checking.
 935  * \param[out] out0  Result 1/sqrt(x0)
 936  * \param[out] out1  Result 1/sqrt(x1)
 937  *
 938  * \note This function might be superficially meaningless, but it helps us to
 939  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 940  *       outside such code.
 941  */
 942 static inline void
 943 invsqrtPairSingleAccuracy(double x0,    double x1,
 944                           double *out0, double *out1)
 945 {
 946     *out0 = invsqrt(static_cast<float>(x0));
 947     *out1 = invsqrt(static_cast<float>(x1));
 948 }
 949
 950 /*! \brief Calculate 1/x for double, but with single accuracy.
 951  *
 952  *  \param x Argument that must be nonzero. This routine does not check arguments.
 953  *  \return 1/x. Result is undefined if your argument was invalid.
 954  *
 955  * \note This function might be superficially meaningless, but it helps us to
 956  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 957  *       outside such code.
 958  */
 959 static inline double
 960 invSingleAccuracy(double x)
 961 {
 962     return 1.0f/x;
 963 }
 964
 965 /*! \brief Calculate 1/sqrt(x) for masked entry of double, but with single accuracy.
 966  *
 967  *  This routine only evaluates 1/sqrt(x) if mask is true.
 968  *  Illegal values for a masked-out double will not lead to
 969  *  floating-point exceptions.
 970  *
 971  *  \param x Argument that must be >0 if masked-in.
 972  *  \param m Mask
 973  *  \return 1/sqrt(x). Result is undefined if your argument was invalid or
 974  *          entry was not masked, and 0.0 for masked-out entries.
 975  *
 976  * \note This function might be superficially meaningless, but it helps us to
 977  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 978  *       outside such code.
 979  */
 980 static inline double
 981 maskzInvsqrtSingleAccuracy(double x, bool m)
 982 {
 983     return m ? invsqrtSingleAccuracy(x) : 0.0;
 984 }
 985
 986 /*! \brief Calculate 1/x for masked entry of double, but with single accuracy.
 987  *
 988  *  This routine only evaluates 1/x if mask is true.
 989  *  Illegal values for a masked-out double will not lead to
 990  *  floating-point exceptions.
 991  *
 992  *  \param x Argument that must be nonzero if masked-in.
 993  *  \param m Mask
 994  *  \return 1/x. Result is undefined if your argument was invalid or
 995  *          entry was not masked, and 0.0 for masked-out entries.
 996  *
 997  * \note This function might be superficially meaningless, but it helps us to
 998  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 999  *       outside such code.
1000  */
1001 static inline double
1002 maskzInvSingleAccuracy(double x, bool m)
1003 {
1004     return m ? invSingleAccuracy(x) : 0.0;
1005 }
1006
1007 /*! \brief Calculate sqrt(x) for double, but with single accuracy.
1008  *
1009  *  \param x Argument that must be >=0.
1010  *  \return sqrt(x).
1011  *
1012  * \note This function might be superficially meaningless, but it helps us to
1013  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1014  *       outside such code.
1015  */
1016 static inline double
1017 sqrtSingleAccuracy(double x)
1018 {
1019     return std::sqrt(static_cast<float>(x));
1020 }
1021
1022 /*! \brief Double log(x), but with single accuracy. This is the natural logarithm.
1023  *
1024  * \param x Argument, should be >0.
1025  * \result The natural logarithm of x. Undefined if argument is invalid.
1026  *
1027  * \note This function might be superficially meaningless, but it helps us to
1028  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1029  *       outside such code.
1030  */
1031 static inline double
1032 logSingleAccuracy(double x)
1033 {
1034     return std::log(static_cast<float>(x));
1035 }
1036
1037 /*! \brief Double 2^x, but with single accuracy.
1038  *
1039  * \param x Argument.
1040  * \result 2^x. Undefined if input argument caused overflow.
1041  *
1042  * \note This function might be superficially meaningless, but it helps us to
1043  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1044  *       outside such code.
1045  */
1046 static inline double
1047 exp2SingleAccuracy(double x)
1048 {
1049     return std::exp2(static_cast<float>(x));
1050 }
1051
1052 /*! \brief Double exp(x), but with single accuracy.
1053  *
1054  * \param x Argument.
1055  * \result exp(x). Undefined if input argument caused overflow.
1056  *
1057  * \note This function might be superficially meaningless, but it helps us to
1058  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1059  *       outside such code.
1060  */
1061 static inline double
1062 expSingleAccuracy(double x)
1063 {
1064     return std::exp(static_cast<float>(x));
1065 }
1066
1067 /*! \brief Double erf(x), but with single accuracy.
1068  *
1069  * \param x Argument.
1070  * \result erf(x)
1071  *
1072  * \note This function might be superficially meaningless, but it helps us to
1073  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1074  *       outside such code.
1075  */
1076 static inline double
1077 erfSingleAccuracy(double x)
1078 {
1079     return std::erf(static_cast<float>(x));
1080 }
1081
1082 /*! \brief Double erfc(x), but with single accuracy.
1083  *
1084  * \param x Argument.
1085  * \result erfc(x)
1086  *
1087  * \note This function might be superficially meaningless, but it helps us to
1088  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1089  *       outside such code.
1090  */
1091 static inline double
1092 erfcSingleAccuracy(double x)
1093 {
1094     return std::erfc(static_cast<float>(x));
1095 }
1096
1097
1098 /*! \brief Double sin \& cos, but with single accuracy.
1099  *
1100  * \param x The argument to evaluate sin/cos for
1101  * \param[out] sinval Sin(x)
1102  * \param[out] cosval Cos(x)
1103  *
1104  * \note This function might be superficially meaningless, but it helps us to
1105  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1106  *       outside such code.
1107  */
1108 static inline void
1109 sincosSingleAccuracy(double x, double *sinval, double *cosval)
1110 {
1111     // There is no single-precision sincos guaranteed in C++11, so use
1112     // separate functions and hope the compiler optimizes it for us.
1113     *sinval = std::sin(static_cast<float>(x));
1114     *cosval = std::cos(static_cast<float>(x));
1115 }
1116
1117 /*! \brief Double sin, but with single accuracy.
1118  *
1119  * \param x The argument to evaluate sin for
1120  * \result Sin(x)
1121  *
1122  * \note This function might be superficially meaningless, but it helps us to
1123  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1124  *       outside such code.
1125  */
1126 static inline double
1127 sinSingleAccuracy(double x)
1128 {
1129     return std::sin(static_cast<float>(x));
1130 }
1131
1132 /*! \brief Double cos, but with single accuracy.
1133  *
1134  * \param x The argument to evaluate cos for
1135  * \result Cos(x)
1136  *
1137  * \note This function might be superficially meaningless, but it helps us to
1138  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1139  *       outside such code.
1140  */
1141 static inline double
1142 cosSingleAccuracy(double x)
1143 {
1144     return std::cos(static_cast<float>(x));
1145 }
1146
1147 /*! \brief Double tan, but with single accuracy.
1148  *
1149  * \param x The argument to evaluate tan for
1150  * \result Tan(x)
1151  *
1152  * \note This function might be superficially meaningless, but it helps us to
1153  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1154  *       outside such code.
1155  */
1156 static inline double
1157 tanSingleAccuracy(double x)
1158 {
1159     return std::tan(static_cast<float>(x));
1160 }
1161
1162 /*! \brief Double asin, but with single accuracy.
1163  *
1164  * \param x The argument to evaluate asin for
1165  * \result Asin(x)
1166  *
1167  * \note This function might be superficially meaningless, but it helps us to
1168  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1169  *       outside such code.
1170  */
1171 static inline double
1172 asinSingleAccuracy(double x)
1173 {
1174     return std::asin(static_cast<float>(x));
1175 }
1176
1177 /*! \brief Double acos, but with single accuracy.
1178  *
1179  * \param x The argument to evaluate acos for
1180  * \result Acos(x)
1181  *
1182  * \note This function might be superficially meaningless, but it helps us to
1183  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1184  *       outside such code.
1185  */
1186 static inline double
1187 acosSingleAccuracy(double x)
1188 {
1189     return std::acos(static_cast<float>(x));
1190 }
1191
1192 /*! \brief Double atan, but with single accuracy.
1193  *
1194  * \param x The argument to evaluate atan for
1195  * \result Atan(x)
1196  *
1197  * \note This function might be superficially meaningless, but it helps us to
1198  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1199  *       outside such code.
1200  */
1201 static inline double
1202 atanSingleAccuracy(double x)
1203 {
1204     return std::atan(static_cast<float>(x));
1205 }
1206
1207 /*! \brief Double atan2(y,x), but with single accuracy.
1208  *
1209  * \param y Y component of vector, any quartile
1210  * \param x X component of vector, any quartile
1211  * \result Atan(y,x)
1212  *
1213  * \note This function might be superficially meaningless, but it helps us to
1214  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1215  *       outside such code.
1216  */
1217 static inline double
1218 atan2SingleAccuracy(double y, double x)
1219 {
1220     return std::atan2(static_cast<float>(y), static_cast<float>(x));
1221 }
1222
1223 /*! \brief Force correction due to PME in double, but with single accuracy.
1224  *
1225  * See the SIMD version of this function for details.
1226  *
1227  * \param z2 input parameter
1228  * \returns Correction to use on force
1229  *
1230  * \note This function might be superficially meaningless, but it helps us to
1231  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1232  *       outside such code.
1233  */
1234 static inline double
1235 pmeForceCorrectionSingleAccuracy(double z2)
1236 {
1237     const float  FN6(-1.7357322914161492954e-8f);
1238     const float  FN5(1.4703624142580877519e-6f);
1239     const float  FN4(-0.000053401640219807709149f);
1240     const float  FN3(0.0010054721316683106153f);
1241     const float  FN2(-0.019278317264888380590f);
1242     const float  FN1(0.069670166153766424023f);
1243     const float  FN0(-0.75225204789749321333f);
1244
1245     const float  FD4(0.0011193462567257629232f);
1246     const float  FD3(0.014866955030185295499f);
1247     const float  FD2(0.11583842382862377919f);
1248     const float  FD1(0.50736591960530292870f);
1249     const float  FD0(1.0f);
1250
1251     float        z4;
1252     float        polyFN0, polyFN1, polyFD0, polyFD1;
1253
1254     float        z2f = z2;
1255
1256     z4             = z2f * z2f;
1257
1258     polyFD0        = fma(FD4, z4, FD2);
1259     polyFD1        = fma(FD3, z4, FD1);
1260     polyFD0        = fma(polyFD0, z4, FD0);
1261     polyFD0        = fma(polyFD1, z2f, polyFD0);
1262
1263     polyFN0        = fma(FN6, z4, FN4);
1264     polyFN1        = fma(FN5, z4, FN3);
1265     polyFN0        = fma(polyFN0, z4, FN2);
1266     polyFN1        = fma(polyFN1, z4, FN1);
1267     polyFN0        = fma(polyFN0, z4, FN0);
1268     polyFN0        = fma(polyFN1, z2f, polyFN0);
1269
1270     return polyFN0 / polyFD0;
1271 }
1272
1273 /*! \brief Potential correction due to PME in double, but with single accuracy.
1274  *
1275  * See the SIMD version of this function for details.
1276  *
1277  * \param z2 input parameter
1278  * \returns Correction to use on potential.
1279  *
1280  * \note This function might be superficially meaningless, but it helps us to
1281  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1282  *       outside such code.
1283  */
1284 static inline double
1285 pmePotentialCorrectionSingleAccuracy(double z2)
1286 {
1287     const float  VN6(1.9296833005951166339e-8f);
1288     const float  VN5(-1.4213390571557850962e-6f);
1289     const float  VN4(0.000041603292906656984871f);
1290     const float  VN3(-0.00013134036773265025626f);
1291     const float  VN2(0.038657983986041781264f);
1292     const float  VN1(0.11285044772717598220f);
1293     const float  VN0(1.1283802385263030286f);
1294
1295     const float  VD3(0.0066752224023576045451f);
1296     const float  VD2(0.078647795836373922256f);
1297     const float  VD1(0.43336185284710920150f);
1298     const float  VD0(1.0f);
1299
1300     float        z4;
1301     float        polyVN0, polyVN1, polyVD0, polyVD1;
1302
1303     float        z2f = z2;
1304
1305     z4             = z2f * z2f;
1306
1307     polyVD1        = fma(VD3, z4, VD1);
1308     polyVD0        = fma(VD2, z4, VD0);
1309     polyVD0        = fma(polyVD1, z2f, polyVD0);
1310
1311     polyVN0        = fma(VN6, z4, VN4);
1312     polyVN1        = fma(VN5, z4, VN3);
1313     polyVN0        = fma(polyVN0, z4, VN2);
1314     polyVN1        = fma(polyVN1, z4, VN1);
1315     polyVN0        = fma(polyVN0, z4, VN0);
1316     polyVN0        = fma(polyVN1, z2f, polyVN0);
1317
1318     return polyVN0 / polyVD0;
1319 }
1320
1321
1322
1323 } // namespace gmx
1324
1325
1326 #endif // GMX_SIMD_SCALAR_MATH_H