src/gromacs/simd/tests/simd_math.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "gromacs/simd/simd_math.h"
  38
  39 #include "config.h"
  40
  41 #include <cmath>
  42 #include <cstdint>
  43
  44 #include <map>
  45 #include <utility>
  46 #include <vector>
  47
  48 #include "gromacs/math/utilities.h"
  49 #include "gromacs/options/basicoptions.h"
  50 #include "gromacs/simd/simd.h"
  51
  52 #include "testutils/refdata.h"
  53 #include "testutils/testasserts.h"
  54
  55 #include "simd.h"
  56
  57 #if GMX_SIMD
  58
  59 namespace gmx
  60 {
  61 namespace test
  62 {
  63
  64 /*! \cond internal */
  65 /*! \addtogroup module_simd */
  66 /*! \{ */
  67
  68 #if GMX_SIMD_HAVE_REAL
  69
  70 class SimdMathTest : public SimdTest
  71 {
  72     public:
  73
  74         /*! \brief Type for half-open intervals specifying test ranges */
  75         typedef std::pair<real, real> Range;
  76
  77         /*! \brief Control what is considered matching values
  78          *
  79          * Normal simply means that we request the values to be equal
  80          * to within the specified tolerance.
  81          * However, there are also two more cases that are special:
  82          *
  83          * - Even if we only care about normal (i.e., not denormal) values, some math
  84          *   libraries might clamp the value to zero, which means our SIMD output
  85          *   might not match their values. By using MatchRule::Dtz, we will consider
  86          *   all values both from the reference and test functions that are within the
  87          *   requested ulp tolerance of a denormal number to be equivalent to 0.0.
  88          * - For some older architectures without fused multiply-add units (e.g. x86 SSE2),
  89          *   we might end up clamping the results to zero just before reaching
  90          *   denormal output, since the intermediate results e.g. in polynomial
  91          *   approximations can be smaller than the final one. We often simply don't
  92          *   care about those values, and then one can use
  93          *   MatchRule::ReferenceOrZero to allow the test value to either match
  94          *   the reference or be zero.
  95          */
  96         enum class MatchRule
  97         {
  98             Normal,          //!< Match function values
  99             Dtz,             //!< Match function values after setting denormals to zero both in test and reference
 100             ReferenceOrZero, //!< Test values can either match reference or be zero
 101         };
 102
 103         const std::map<MatchRule, std::string> matchRuleNames_ =
 104         {
 105             { MatchRule::Normal,          "Test should match reference." },
 106             { MatchRule::Dtz,             "Test should match reference, with denormals treated as 0.0." },
 107             { MatchRule::ReferenceOrZero, "Test should match reference or 0.0." }
 108         };
 109
 110         /*! \brief Settings used for simd math function comparisons */
 111         struct CompareSettings
 112         {
 113             Range          range;     //!< Range over which to test function
 114             std::int64_t   ulpTol;    //!< Ulp tolerance
 115             real           absTol;    //!< Absolute tolerance
 116             MatchRule      matchRule; //!< Decide what we consider a match
 117         };
 118
 119         ::testing::AssertionResult
 120             compareSimdMathFunction(const char *                refFuncExpr,
 121                                     const char *                simdFuncExpr,
 122                                     const char *                compareSettingsExpr,
 123                                     real                        refFunc(real x),
 124                                     SimdReal      gmx_simdcall  simdFunc(SimdReal x),
 125                                     const CompareSettings      &compareSettings);
 126
 127         /*! \brief Generate test point vector
 128          *
 129          *  \param range  The test interval, half open. Upper limit is not included.
 130          *                Pass by value, since we need to modify in method anyway.
 131          *  \param points Number of points to generate. This might be increased
 132          *                slightly to account both for extra special values like 0.0
 133          *                and the SIMD width.
 134          *
 135          * This routine generates a vector with test points separated by constant
 136          * multiplicative factors, based on the range and number of points in the
 137          * class. If the range includes both negative and positive values, points
 138          * will be generated separately for the negative/positive intervals down
 139          * to the smallest real number that can be represented, and we also include
 140          * 0.0 explicitly.
 141          *
 142          * This is highly useful for large test ranges. For example, with a linear
 143          * 1000-point division of the range (1,1e10) the first three values to test
 144          * would be 1, 10000000.999, and 20000000.998, etc. For large values we would
 145          * commonly hit the point where adding the small delta has no effect due to
 146          * limited numerical precision.
 147          * When we instead use this routine, the values will be 1, 1.0239, 1.0471, etc.
 148          * This will spread the entropy over all bits in the IEEE754 representation,
 149          * and be a much better test of all potential input values.
 150          *
 151          *  \note We do not use the static variable s_nPoints in the parent class
 152          *        to avoid altering any value the user has set on the command line; since
 153          *        it's a static member, changing it would have permanent effect.
 154          */
 155         std::vector<real>
 156         generateTestPoints(Range range, std::size_t points);
 157
 158         /*! \brief Test routine for the test point vector generation
 159          */
 160         void
 161         generateTestPointsTest();
 162
 163 };
 164
 165 /*! \brief Test approximate equality of SIMD vs reference version of a function.
 166  *
 167  * This macro takes vanilla C and SIMD flavors of a function and tests it with
 168  * the number of points, range, and tolerances specified by the test fixture class.
 169  *
 170  * The third option controls the range, tolerances, and match settings.
 171  */
 172 #define GMX_EXPECT_SIMD_FUNC_NEAR(refFunc, tstFunc, compareSettings) \
 173     EXPECT_PRED_FORMAT3(compareSimdMathFunction, refFunc, tstFunc, compareSettings)
 174
 175 std::vector<real>
 176 SimdMathTest::generateTestPoints(Range inputRange, std::size_t inputPoints)
 177 {
 178
 179     std::vector<real> testPoints;
 180     testPoints.reserve(inputPoints);
 181
 182     GMX_RELEASE_ASSERT(inputRange.first < inputRange.second, "The start of the interval must come before the end");
 183
 184     std::vector<Range> testRanges;
 185
 186     if (inputRange.first < 0 && inputRange.second > 0)
 187     {
 188         testRanges.push_back({inputRange.first, -std::numeric_limits<real>::min()});
 189         testRanges.push_back({0.0, inputRange.second});
 190     }
 191     else
 192     {
 193         if (inputRange.second == 0)
 194         {
 195             inputRange.second = -std::numeric_limits<real>::min();
 196             inputRange.first  = std::min(inputRange.first, inputRange.second);
 197         }
 198         testRanges.push_back(inputRange);
 199     }
 200
 201     for (Range &range : testRanges)
 202     {
 203         std::size_t points = inputPoints / testRanges.size();
 204
 205         // The value 0 is special, and can only occur at the start of
 206         // the interval after the corrections outside this loop.
 207         // Add it explicitly, and adjust the interval to continue
 208         // at the first valid non-zero positive number.
 209         if (range.first == 0)
 210         {
 211             testPoints.push_back(0.0);
 212             range.first = std::numeric_limits<real>::min();
 213             points--; // Used one point
 214         }
 215
 216         union
 217         {
 218             real                                                                               r;
 219             std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 220         }
 221         low, high, x;
 222
 223         low.r  = range.first;
 224         high.r = range.second;
 225
 226         // IEEE754 floating-point numbers have the cool property that for any range of
 227         // constant sign, for all non-zero numbers a constant (i.e., linear) difference
 228         // in the bitwise representation corresponds to a constant multiplicative factor.
 229         //
 230         // Divide the ulp difference evenly
 231         std::int64_t ulpDiff     = high.i-low.i;
 232         // dividend and divisor must both be signed types
 233         std::int64_t ulpDelta    = ulpDiff/static_cast<std::int64_t>(points);
 234         std::int64_t minUlpDelta = (ulpDiff > 0) ? 1 : -1;
 235
 236         if (ulpDelta == 0)
 237         {
 238             // Very short interval or very many points caused round-to-zero.
 239             // Select the smallest possible change, which is one ulp (with correct sign)
 240             ulpDelta = minUlpDelta;
 241             points   = std::abs(ulpDiff);
 242         }
 243
 244         x.r = low.r;
 245         // Use an index-based loop to avoid floating-point comparisons with
 246         // values that might have overflowed. Save one point for the very last
 247         // bitwise value that is part of the interval
 248         for (std::size_t i = 0; i < points-1; i++)
 249         {
 250             testPoints.push_back(x.r);
 251             x.i += ulpDelta;
 252         }
 253
 254         // Make sure we test the very last point that is inside the interval
 255         x.r  = high.r;
 256         x.i -= minUlpDelta;
 257         testPoints.push_back(x.r);
 258     }
 259     return testPoints;
 260 }
 261
 262 /*! \brief Implementation routine to compare SIMD vs reference functions.
 263  *
 264  * \param refFuncExpr         Description of reference function expression
 265  * \param simdFuncExpr        Description of SIMD function expression
 266  * \param compareSettingsExpr Description of compareSettings
 267  * \param refFunc             Reference math function pointer
 268  * \param simdFunc            SIMD math function pointer
 269  * \param compareSettings     Structure with the range, tolerances, and
 270  *                            matching rules to use for the comparison.
 271  *
 272  * \note You should not never call this function directly,  but use the
 273  *       macro GMX_EXPECT_SIMD_FUNC_NEAR(refFunc,tstFunc,matchRule) instead.
 274  */
 275 ::testing::AssertionResult
 276 SimdMathTest::compareSimdMathFunction(const char                  * refFuncExpr,
 277                                       const char                  * simdFuncExpr,
 278                                       const char gmx_unused       * compareSettingsExpr,
 279                                       real                          refFunc(real x),
 280                                       SimdReal        gmx_simdcall  simdFunc(SimdReal x),
 281                                       const CompareSettings        &compareSettings)
 282 {
 283     std::vector<real>                 vx(GMX_SIMD_REAL_WIDTH);
 284     std::vector<real>                 vref(GMX_SIMD_REAL_WIDTH);
 285     std::vector<real>                 vtst(GMX_SIMD_REAL_WIDTH);
 286     real                              absDiff;
 287     std::int64_t                      ulpDiff;
 288     std::int64_t                      maxUlpDiff = 0;
 289     real                              maxUlpDiffPos;
 290     real                              refValMaxUlpDiff, simdValMaxUlpDiff;
 291     const int                         niter = s_nPoints/GMX_SIMD_REAL_WIDTH;
 292
 293     union
 294     {
 295         real                                                                               r;
 296         std::conditional<sizeof(real) == sizeof(double), std::int64_t, std::int32_t>::type i;
 297     } conv0, conv1;
 298
 299     // Allow zero-size intervals - nothing to test means we succeeded at it
 300     if (compareSettings.range.first == compareSettings.range.second)
 301     {
 302         ::testing::AssertionSuccess();
 303     }
 304
 305     // Calculate the tolerance limit to use for denormals - we want
 306     // values that are within the ulp tolerance of denormals to be considered matching
 307     conv0.r                   = std::numeric_limits<real>::min();
 308     conv0.i                  += compareSettings.ulpTol - 1; // min() itself is not denormal, but one ulp larger
 309     const real denormalLimit  = conv0.r;
 310
 311     // We want to test as many diverse bit combinations as possible over the range requested,
 312     // and in particular do it evenly spaced in bit-space.
 313     // Due to the way IEEE754 floating-point is represented, that means we should have a
 314     // constant multiplicative factor between adjacent values. This gets a bit complicated
 315     // when we have both positive and negative values, so we offload the generation of the
 316     // specific testing values to a separate routine
 317     std::vector<real> testPoints = generateTestPoints(compareSettings.range, s_nPoints);
 318
 319     size_t            pointIndex = 0;
 320
 321     for (int iter = 0; iter < niter; iter++)
 322     {
 323         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 324         {
 325             vx[i]   = testPoints[pointIndex];
 326             vref[i] = refFunc(vx[i]);
 327             // If we reach the end of the points, stop increasing index so we pad with
 328             // extra copies of the last element up to the SIMD width
 329             if (pointIndex + 1 < testPoints.size() )
 330             {
 331                 pointIndex++;
 332             }
 333         }
 334         vtst  = simdReal2Vector(simdFunc(vector2SimdReal(vx)));
 335
 336         bool absOk = true, signOk = true;
 337         for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
 338         {
 339             if (compareSettings.matchRule == MatchRule::Dtz &&
 340                 std::abs(vref[i]) <= denormalLimit && std::abs(vtst[i]) <= denormalLimit)
 341             {
 342                 continue;
 343             }
 344
 345             if (compareSettings.matchRule == MatchRule::ReferenceOrZero && vtst[i] == 0.0)
 346             {
 347                 // If we accept 0.0 for the test function, we can continue to the next loop iteration.
 348                 continue;
 349             }
 350
 351             absDiff = std::abs(vref[i]-vtst[i]);
 352             absOk   = absOk  && ( absDiff < compareSettings.absTol );
 353             signOk  = signOk && ( (vref[i] >= 0 && vtst[i] >= 0) || (vref[i] <= 0 && vtst[i] <= 0));
 354
 355             if (absDiff >= compareSettings.absTol)
 356             {
 357                 /* We replicate the trivial ulp differences comparison here rather than
 358                  * calling the lower-level routine for comparing them, since this enables
 359                  * us to run through the entire test range and report the largest deviation
 360                  * without lots of extra glue routines.
 361                  */
 362                 conv0.r           = vref[i];
 363                 conv1.r           = vtst[i];
 364                 ulpDiff           = llabs(conv0.i-conv1.i);
 365                 if (ulpDiff > maxUlpDiff)
 366                 {
 367                     maxUlpDiff        = ulpDiff;
 368                     maxUlpDiffPos     = vx[i];
 369                     refValMaxUlpDiff  = vref[i];
 370                     simdValMaxUlpDiff = vtst[i];
 371                 }
 372             }
 373         }
 374         if ( (!absOk) && (!signOk) )
 375         {
 376             return ::testing::AssertionFailure()
 377                    << "Failing SIMD math function comparison due to sign differences." << std::endl
 378                    << "Reference function: " << refFuncExpr << std::endl
 379                    << "Simd function:      " << simdFuncExpr << std::endl
 380                    << "Test range is ( " << compareSettings.range.first << " , " << compareSettings.range.second << " ) " << std::endl
 381                    << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 382                    << "First sign difference around x=" << std::setprecision(20) << ::testing::PrintToString(vx) << std::endl
 383                    << "Ref values:  " << std::setprecision(20) << ::testing::PrintToString(vref) << std::endl
 384                    << "SIMD values: " << std::setprecision(20) << ::testing::PrintToString(vtst) << std::endl;
 385         }
 386     }
 387
 388     GMX_RELEASE_ASSERT(compareSettings.ulpTol >= 0, "Invalid ulp value.");
 389     if (maxUlpDiff <= compareSettings.ulpTol)
 390     {
 391         return ::testing::AssertionSuccess();
 392     }
 393     else
 394     {
 395         return ::testing::AssertionFailure()
 396                << "Failing SIMD math function ulp comparison between " << refFuncExpr << " and " << simdFuncExpr << std::endl
 397                << "Requested ulp tolerance: " << compareSettings.ulpTol << std::endl
 398                << "Requested abs tolerance: " << compareSettings.absTol << std::endl
 399                << "Match rule: " << matchRuleNames_.at(compareSettings.matchRule) << std::endl
 400                << "Largest Ulp difference occurs for x=" << std::setprecision(20) << maxUlpDiffPos << std::endl
 401                << "Ref  values: " << std::setprecision(20) << refValMaxUlpDiff << std::endl
 402                << "SIMD values: " << std::setprecision(20) << simdValMaxUlpDiff << std::endl
 403                << "Ulp diff.:   " << std::setprecision(20) << maxUlpDiff << std::endl;
 404     }
 405 }
 406
 407 // Actual routine to generate a small set of test points in current precision. This will
 408 // be called by either the double or single precision test fixture, since we need different
 409 // test names to compare to the right reference data.
 410 void
 411 SimdMathTest::generateTestPointsTest()
 412 {
 413     int                                     points(10);
 414     gmx::test::TestReferenceData            data;
 415     gmx::test::TestReferenceChecker         checker(data.rootChecker());
 416
 417     std::vector<real>                       result;
 418
 419     result = generateTestPoints(Range(-1e10, -1), points);
 420     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10,-1[");
 421
 422     result = generateTestPoints(Range(-1e10, -1e-10), points);
 423     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, -1e-10[");
 424
 425     result = generateTestPoints(Range(1, 1e10), points);
 426     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1, 1e10[");
 427
 428     result = generateTestPoints(Range(1e-10, 1e10), points);
 429     checker.checkSequence(result.begin(), result.end(), "Test points for interval [1e-10, 1e10[");
 430
 431     result = generateTestPoints(Range(-1e10, 1e-10), points);
 432     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e-10[");
 433
 434     result = generateTestPoints(Range(-1e-10, 1e-10), points);
 435     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e-10[");
 436
 437     result = generateTestPoints(Range(-1e-10, 1e10), points);
 438     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e-10, 1e10[");
 439
 440     result = generateTestPoints(Range(-1e10, 1e10), points);
 441     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1e10, 1e10[");
 442
 443     result = generateTestPoints(Range(-1000, 0), points);
 444     checker.checkSequence(result.begin(), result.end(), "Test points for interval [-1000, 0[");
 445
 446     result = generateTestPoints(Range(0, 1000), points);
 447     checker.checkSequence(result.begin(), result.end(), "Test points for interval [0, 1000[");
 448 }
 449
 450 /*! \} */
 451 /*! \endcond */
 452
 453
 454 // Actual math function tests below
 455
 456
 457 namespace
 458 {
 459
 460 /*! \cond internal */
 461 /*! \addtogroup module_simd */
 462 /*! \{ */
 463
 464 // Reference data is selected based on test name, so make the test name precision-dependent
 465 #if GMX_DOUBLE
 466 TEST_F(SimdMathTest, generateTestPointsDouble)
 467 {
 468     generateTestPointsTest();
 469 }
 470 #else
 471 TEST_F(SimdMathTest, generateTestPointsFloat)
 472 {
 473     generateTestPointsTest();
 474 }
 475 #endif
 476
 477 TEST_F(SimdMathTest, copysign)
 478 {
 479     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0,  c1, c2), copysign(setSimdRealFrom3R( c0,  c1,  c2), setSimdRealFrom3R(-c3,  c4, 0)));
 480     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-c0,  c1, c2), copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R(-c3,  c4, 0)));
 481     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R( c0, -c1, c2), copysign(setSimdRealFrom3R( c0,  c1,  c2), setSimdRealFrom3R( c3, -c4, 0)));
 482     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R( c0, -c1, c2), copysign(setSimdRealFrom3R(-c0, -c1, -c2), setSimdRealFrom3R( c3, -c4, 0)));
 483 }
 484
 485 /*! \brief Function wrapper to evaluate reference 1/sqrt(x) */
 486 real
 487 refInvsqrt(real x)
 488 {
 489     return 1.0/std::sqrt(x);
 490 }
 491
 492 TEST_F(SimdMathTest, invsqrt)
 493 {
 494     const real      low  = std::numeric_limits<float>::min();
 495     const real      high = std::numeric_limits<float>::max();
 496     CompareSettings settings {
 497         Range(low, high), ulpTol_, absTol_, MatchRule::Normal
 498     };
 499
 500     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrt, settings);
 501 }
 502
 503 TEST_F(SimdMathTest, maskzInvsqrt)
 504 {
 505     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 506     SimdBool m   = (setZero() < x);
 507     SimdReal ref = setSimdRealFrom3R(1.0/std::sqrt(c1), 0.0, 1.0/std::sqrt(c2));
 508     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInvsqrt(x, m));
 509 }
 510
 511 /*! \brief Function wrapper to return first result when testing \ref invsqrtPair */
 512 SimdReal gmx_simdcall
 513 tstInvsqrtPair0(SimdReal x)
 514 {
 515     SimdReal r0, r1;
 516     invsqrtPair(x, x, &r0, &r1);
 517     return r0;
 518 }
 519
 520 /*! \brief Function wrapper to return second result when testing \ref invsqrtPair */
 521 SimdReal gmx_simdcall
 522 tstInvsqrtPair1(SimdReal x)
 523 {
 524     SimdReal r0, r1;
 525     invsqrtPair(x, x, &r0, &r1);
 526     return r1;
 527 }
 528
 529 TEST_F(SimdMathTest, invsqrtPair)
 530 {
 531     const real low  = std::numeric_limits<float>::min();
 532     const real high = std::numeric_limits<float>::max();
 533
 534     // Accuracy conversions lose a bit of accuracy compared to all-double,
 535     // so increase the tolerance to 4*ulpTol_
 536     CompareSettings settings {
 537         Range(low, high), 4*ulpTol_, absTol_, MatchRule::Normal
 538     };
 539
 540     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair0, settings);
 541     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tstInvsqrtPair1, settings);
 542 }
 543
 544 /*! \brief Function wrapper to evaluate reference sqrt(x) */
 545 real
 546 refSqrt(real x)
 547 {
 548     return std::sqrt(x);
 549 }
 550
 551 TEST_F(SimdMathTest, sqrt)
 552 {
 553     // Since the first lookup step is sometimes performed in single precision,
 554     // our SIMD sqrt can only handle single-precision input values, even when
 555     // compiled in double precision.
 556
 557     const real      minFloat     = std::numeric_limits<float>::min();
 558     const real      minSafeFloat = minFloat*10;
 559     const real      maxSafeFloat = std::numeric_limits<float>::max()*0.1;
 560     CompareSettings settings;
 561     // The accuracy conversions lose a bit of extra accuracy compared to
 562     // doing the iterations in all-double.
 563     setUlpTol(4*ulpTol_);
 564
 565     // First test that 0.0 and a few other values work
 566     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c1), std::sqrt(c2)), sqrt(setSimdRealFrom3R(0, c1, c2)));
 567
 568 #if GMX_DOUBLE
 569     // As mentioned above, we cannot guarantee that very small double precision
 570     // input values (below std::numeric_limits<float>::min()) are handled correctly,
 571     // so our implementation will clamp it to zero. In this range we allow either
 572     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
 573     //
 574     // This test range must not be called for single precision, since if we try to divide
 575     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
 576     // they end up being flushed to zero, and the loop would never end.
 577     settings = { Range(0.0, minFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 578     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 579 #endif
 580
 581     // Next range: Just about minFloat the lookup should always work, but the results
 582     // might be a bit fragile due to issues with the N-R iterations being flushed to zero
 583     // for denormals. We can probably relax the latter in double precision, but since we
 584     // anyway cannot handle numbers that cannot be represented in single it's not worth
 585     // worrying too much about whether we have zero or an exact values around 10^-38....
 586     settings = { Range(minFloat, minSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 587     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 588
 589     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
 590     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt, settings);
 591 }
 592
 593 TEST_F(SimdMathTest, sqrtUnsafe)
 594 {
 595     const real minSafeFloat = std::numeric_limits<float>::min()*10;
 596     const real maxSafeFloat = std::numeric_limits<float>::max()*0.1;
 597
 598     // The accuracy conversions lose a bit of extra accuracy compared to
 599     // doing the iterations in all-double, so we use 4*ulpTol_
 600     setUlpTol(4*ulpTol_);
 601
 602     CompareSettings settings {
 603         Range(minSafeFloat, maxSafeFloat), 4*ulpTol_, absTol_, MatchRule::Normal
 604     };
 605     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrt<MathOptimization::Unsafe>, settings);
 606 }
 607
 608 /*! \brief Function wrapper to evaluate reference 1/x */
 609 real refInv(real x)
 610 {
 611     return 1.0/x;
 612 }
 613
 614 TEST_F(SimdMathTest, inv)
 615 {
 616     // Since the first lookup step is sometimes performed in single precision,
 617     // our SIMD 1/x can only handle single-precision input values, even when
 618     // compiled in double precision.
 619
 620     // Relevant threshold points
 621     const real      minSafeFloat = std::numeric_limits<float>::min()*10;  // X value guaranteed not to result in Inf intermediates for 1/x calc.
 622     const real      maxSafeFloat = std::numeric_limits<float>::max()*0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
 623     // Scale highest value by 1-eps, since we will do some arithmetics on this value
 624     const real      maxFloat     = std::numeric_limits<float>::max()*(1.0 - std::numeric_limits<float>::epsilon() );
 625     CompareSettings settings;
 626
 627     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 628     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 629     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 630
 631     // Normal checks for x < 0
 632     settings = { Range(-maxSafeFloat, -minSafeFloat),  ulpTol_, absTol_, MatchRule::Normal };
 633     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 634
 635     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
 636
 637     // Normal checks for x > 0
 638     settings = { Range(minSafeFloat, maxSafeFloat),  ulpTol_, absTol_, MatchRule::Normal };
 639     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 640
 641     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
 642     settings = { Range(maxSafeFloat, maxFloat),  ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 643     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
 644 }
 645
 646 TEST_F(SimdMathTest, maskzInv)
 647 {
 648     SimdReal x   = setSimdRealFrom3R(c1, 0.0, c2);
 649     SimdBool m   = (setZero() < x);
 650     SimdReal ref = setSimdRealFrom3R(1.0/c1, 0.0, 1.0/c2);
 651     GMX_EXPECT_SIMD_REAL_NEAR(ref, maskzInv(x, m));
 652 }
 653
 654 TEST_F(SimdMathTest, log)
 655 {
 656     const real      low  = std::numeric_limits<real>::min();
 657     const real      high = std::numeric_limits<real>::max();
 658
 659     CompareSettings settings {
 660         Range(low, high), ulpTol_, absTol_, MatchRule::Normal
 661     };
 662     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, log, settings);
 663 }
 664
 665 TEST_F(SimdMathTest, exp2)
 666 {
 667     // Relevant threshold points
 668     constexpr real  lowestReal                     = -std::numeric_limits<real>::max();
 669     constexpr real  lowestRealThatProducesNormal   = std::numeric_limits<real>::min_exponent - 1;                      // adding the significant corresponds to one more unit in exponent
 670     constexpr real  lowestRealThatProducesDenormal = lowestRealThatProducesNormal - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
 671     constexpr real  highestRealThatProducesNormal  = std::numeric_limits<real>::max_exponent - 1;                      // adding the significant corresponds to one more unit in exponent
 672     CompareSettings settings;
 673
 674     // Below subnormal range all results should be zero (so, match the reference)
 675     settings = { Range(lowestReal, lowestRealThatProducesDenormal),  ulpTol_, absTol_, MatchRule::Normal };
 676     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 677
 678     // Subnormal range, require matching, but DTZ is fine
 679     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal),  ulpTol_, absTol_, MatchRule::Dtz };
 680     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 681
 682     // Normal range, standard result expected
 683     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal),  ulpTol_, absTol_, MatchRule::Normal };
 684     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
 685 }
 686
 687 TEST_F(SimdMathTest, exp2Unsafe)
 688 {
 689     // The unsafe version is only defined in the normal range
 690     constexpr real  lowestRealThatProducesNormal   = std::numeric_limits<real>::min_exponent - 1; // adding the significant corresponds to one more unit in exponent
 691     constexpr real  highestRealThatProducesNormal  = std::numeric_limits<real>::max_exponent - 1; // adding the significant corresponds to one more unit in exponent
 692
 693     CompareSettings settings {
 694         Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Normal
 695     };
 696     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2<MathOptimization::Unsafe>, settings);
 697 }
 698
 699 TEST_F(SimdMathTest, exp)
 700 {
 701     // Relevant threshold points. See the exp2 test for more details about the values; these are simply
 702     // scaled by log(2) due to the difference between exp2 and exp.
 703     const real      lowestReal                       = -std::numeric_limits<real>::max();
 704     const real      lowestRealThatProducesNormal     = (std::numeric_limits<real>::min_exponent - 1)*std::log(2.0);
 705     const real      lowestRealThatProducesDenormal   = lowestRealThatProducesNormal - std::numeric_limits<real>::digits*std::log(2.0);
 706     const real      highestRealThatProducesNormal    = (std::numeric_limits<real>::max_exponent - 1)*std::log(2.0);
 707     CompareSettings settings;
 708
 709     // Below subnormal range all results should be zero (so, match the reference)
 710     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal};
 711     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 712
 713     // Subnormal range, require matching, but DTZ is fine
 714     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Dtz};
 715     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 716
 717     // Normal range, standard result expected
 718     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Normal};
 719     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
 720 }
 721
 722 TEST_F(SimdMathTest, expUnsafe)
 723 {
 724     // See test of exp() for comments about test ranges
 725     const real      lowestRealThatProducesNormal     = (std::numeric_limits<real>::min_exponent - 1)*std::log(2.0);
 726     const real      lowestRealThatProducesCorrectExp = lowestRealThatProducesNormal + GMX_SIMD_HAVE_FMA ? 0.0 : 0.5 * std::numeric_limits<real>::digits * std::log(2.0);
 727     const real      highestRealThatProducesNormal    = (std::numeric_limits<real>::max_exponent - 1)*std::log(2.0);
 728
 729     CompareSettings settings {
 730         Range(lowestRealThatProducesCorrectExp, highestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Normal
 731     };
 732     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp<MathOptimization::Unsafe>, settings);
 733 }
 734
 735 /*! \brief Function wrapper for erf(x), with argument/return in default Gromacs precision.
 736  *
 737  * \note Single-precision erf() in some libraries can be slightly lower precision
 738  * than the SIMD flavor, so we use a cast to force double precision for reference.
 739  */
 740 real
 741 refErf(real x)
 742 {
 743     return std::erf(static_cast<double>(x));
 744 }
 745
 746 TEST_F(SimdMathTest, erf)
 747 {
 748     CompareSettings settings {
 749         Range(-9, 9), ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal
 750     };
 751     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erf, settings);
 752 }
 753
 754 /*! \brief Function wrapper for erfc(x), with argument/return in default Gromacs precision.
 755  *
 756  * \note Single-precision erfc() in some libraries can be slightly lower precision
 757  * than the SIMD flavor, so we use a cast to force double precision for reference.
 758  */
 759 real
 760 refErfc(real x)
 761 {
 762     return std::erfc(static_cast<double>(x));
 763 }
 764
 765 TEST_F(SimdMathTest, erfc)
 766 {
 767     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit to 4*ulpTol
 768     CompareSettings settings {
 769         Range(-9, 9), 4*ulpTol_, std::numeric_limits<real>::min(), MatchRule::Normal
 770     };
 771     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfc, settings);
 772 }
 773
 774 TEST_F(SimdMathTest, sin)
 775 {
 776     CompareSettings settings {
 777         Range(-8*M_PI, 8*M_PI), ulpTol_, absTol_, MatchRule::Normal
 778     };
 779     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 780
 781     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 782     settings = { Range(-10000, 10000), 2*ulpTol_, absTol_, MatchRule::Normal};
 783     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sin, settings);
 784 }
 785
 786 TEST_F(SimdMathTest, cos)
 787 {
 788     CompareSettings settings {
 789         Range(-8*M_PI, 8*M_PI), ulpTol_, absTol_, MatchRule::Normal
 790     };
 791     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 792
 793     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 794     settings = { Range(-10000, 10000), 2*ulpTol_, absTol_, MatchRule::Normal};
 795     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cos, settings);
 796 }
 797
 798 TEST_F(SimdMathTest, tan)
 799 {
 800     // Tan(x) is a little sensitive due to the division in the algorithm.
 801     // Rather than using lots of extra FP operations, we accept the algorithm
 802     // presently only achieves a ~3 ulp error and use the medium tolerance.
 803     CompareSettings settings {
 804         Range(-8*M_PI, 8*M_PI), ulpTol_, absTol_, MatchRule::Normal
 805     };
 806     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 807
 808     // Range reduction leads to accuracy loss, so we might want higher tolerance here
 809     settings = { Range(-10000, 10000), 2*ulpTol_, absTol_, MatchRule::Normal};
 810     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tan, settings);
 811 }
 812
 813 TEST_F(SimdMathTest, asin)
 814 {
 815     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
 816     CompareSettings settings {
 817         Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal
 818     };
 819     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asin, settings);
 820 }
 821
 822 TEST_F(SimdMathTest, acos)
 823 {
 824     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
 825     CompareSettings settings {
 826         Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal
 827     };
 828     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acos, settings);
 829 }
 830
 831 TEST_F(SimdMathTest, atan)
 832 {
 833     // Our present atan(x) algorithm achieves 1 ulp accuracy
 834     CompareSettings settings {
 835         Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal
 836     };
 837     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atan, settings);
 838 }
 839
 840 TEST_F(SimdMathTest, atan2)
 841 {
 842     // test each quadrant
 843     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
 844                               atan2(rSimd_c0c1c2, rSimd_c3c4c5));
 845     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
 846                               atan2(rSimd_m0m1m2, rSimd_c3c4c5));
 847     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
 848                               atan2(rSimd_m0m1m2, rSimd_m3m0m4));
 849     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
 850                               atan2(rSimd_c0c1c2, rSimd_m3m0m4));
 851
 852     // cases important for calculating angles
 853     // values on coordinate axes
 854     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
 855                               atan2(setZero(), rSimd_c0c1c2));
 856     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
 857                               atan2(rSimd_c0c1c2, setZero()));
 858     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
 859                               atan2(setZero(), rSimd_m0m1m2));
 860     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
 861                               atan2(rSimd_m0m1m2, setZero()));
 862     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
 863     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
 864     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), atan2(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
 865 }
 866
 867 /*! \brief Evaluate reference version of PME force correction. */
 868 real
 869 refPmeForceCorrection(real x)
 870 {
 871     if (x != 0)
 872     {
 873         real y = std::sqrt(x);
 874         return 2*std::exp(-x)/(std::sqrt(M_PI)*x) - std::erf(static_cast<double>(y))/(x*y);
 875     }
 876     else
 877     {
 878         return -4/(3*std::sqrt(M_PI));
 879     }
 880 }
 881
 882 // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
 883 TEST_F(SimdMathTest, pmeForceCorrection)
 884 {
 885     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 886     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) /GMX_REAL_EPS;
 887
 888     CompareSettings    settings {
 889         Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal
 890     };
 891     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrection, settings);
 892 }
 893
 894 /*! \brief Evaluate reference version of PME potential correction. */
 895 real
 896 refPmePotentialCorrection(real x)
 897 {
 898     real y = std::sqrt(x);
 899     return std::erf(static_cast<double>(y))/y;
 900 }
 901
 902 // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
 903 TEST_F(SimdMathTest, pmePotentialCorrection)
 904 {
 905     // Pme correction relative accuracy only needs to be ~1e-6 accuracy single, 1e-10 double
 906     const std::int64_t ulpTol = (GMX_DOUBLE ? 5e-10 : 5e-6) /GMX_REAL_EPS;
 907
 908     CompareSettings    settings {
 909         Range(0.15, 4), ulpTol, GMX_REAL_EPS, MatchRule::Normal
 910     };
 911     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrection, settings);
 912 }
 913
 914 // Functions that only target single accuracy, even for double SIMD data
 915
 916 TEST_F(SimdMathTest, invsqrtSingleAccuracy)
 917 {
 918     // Here we always use float limits, since the lookup is not defined for numbers that
 919     // cannot be represented in single precision.
 920     const real low  = std::numeric_limits<float>::min();
 921     const real high = std::numeric_limits<float>::max();
 922     /* Increase the allowed error by the difference between the actual precision and single */
 923     setUlpTolSingleAccuracy(ulpTol_);
 924
 925     CompareSettings settings {
 926         Range(low, high), ulpTol_, absTol_, MatchRule::Normal
 927     };
 928     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, invsqrtSingleAccuracy, settings);
 929 }
 930
 931 /*! \brief Function wrapper to return first result when testing \ref invsqrtPairSingleAccuracy */
 932 SimdReal gmx_simdcall
 933 tst_invsqrt_SingleAccuracy_pair0(SimdReal x)
 934 {
 935     SimdReal r0, r1;
 936     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
 937     return r0;
 938 }
 939
 940 /*! \brief Function wrapper to return second result when testing \ref invsqrtPairSingleAccuracy */
 941 SimdReal gmx_simdcall
 942 tst_invsqrt_SingleAccuracy_pair1(SimdReal x)
 943 {
 944     SimdReal r0, r1;
 945     invsqrtPairSingleAccuracy(x, x, &r0, &r1);
 946     return r1;
 947 }
 948
 949 TEST_F(SimdMathTest, invsqrtPairSingleAccuracy)
 950 {
 951     // Float limits since lookup is always performed in single
 952     const real low  = std::numeric_limits<float>::min();
 953     const real high = std::numeric_limits<float>::max();
 954     /* Increase the allowed error by the difference between the actual precision and single */
 955     setUlpTolSingleAccuracy(ulpTol_);
 956
 957     CompareSettings settings {
 958         Range(low, high), ulpTol_, absTol_, MatchRule::Normal
 959     };
 960     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair0, settings);
 961     GMX_EXPECT_SIMD_FUNC_NEAR(refInvsqrt, tst_invsqrt_SingleAccuracy_pair1, settings);
 962 }
 963
 964 TEST_F(SimdMathTest, sqrtSingleAccuracy)
 965 {
 966     // Since the first lookup step is sometimes performed in single precision,
 967     // our SIMD sqrt can only handle single-precision input values, even when
 968     // compiled in double precision - thus we use single precision limits here.
 969
 970     // Scale lowest value by 1+eps, since we will do some arithmetics on this value
 971     const real      low  = std::numeric_limits<float>::min()*(1.0 + std::numeric_limits<float>::epsilon() );
 972     const real      high = std::numeric_limits<float>::max();
 973     CompareSettings settings;
 974
 975     // Increase the allowed error by the difference between the actual precision and single
 976     setUlpTolSingleAccuracy(ulpTol_);
 977
 978     // First test that 0.0 and a few other values works
 979     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(0, std::sqrt(c0), std::sqrt(c1)), sqrtSingleAccuracy(setSimdRealFrom3R(0, c0, c1)));
 980
 981 #if GMX_DOUBLE
 982     // As mentioned above, we cannot guarantee that very small double precision
 983     // input values (below std::numeric_limits<float>::min()) are handled correctly,
 984     // so our implementation will clamp it to zero. In this range we allow either
 985     // the correct value or zero, but it's important that it does not result in NaN or Inf values.
 986     //
 987     // This test range must not be called for single precision, since if we try to divide
 988     // the interval (0.0, low( in npoints we will try to multiply by factors so small that
 989     // they end up being flushed to zero, and the loop would never end.
 990     settings = { Range(0.0, low), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
 991     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
 992 #endif
 993
 994     settings = { Range(low, high), ulpTol_, absTol_, MatchRule::Normal };
 995     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy, settings);
 996 }
 997
 998 TEST_F(SimdMathTest, sqrtSingleAccuracyUnsafe)
 999 {
1000     // Test the full range, but stick to float limits since lookup is done in single.
1001     const real low  = std::numeric_limits<float>::min();
1002     const real high = std::numeric_limits<float>::max();
1003
1004     /* Increase the allowed error by the difference between the actual precision and single */
1005     setUlpTolSingleAccuracy(ulpTol_);
1006
1007     CompareSettings settings {
1008         Range(low, high), ulpTol_, absTol_, MatchRule::Normal
1009     };
1010     GMX_EXPECT_SIMD_FUNC_NEAR(refSqrt, sqrtSingleAccuracy<MathOptimization::Unsafe>, settings);
1011 }
1012
1013 TEST_F(SimdMathTest, invSingleAccuracy)
1014 {
1015     // Since the first lookup step is sometimes performed in single precision,
1016     // our SIMD 1/x can only handle single-precision input values, even when
1017     // compiled in double precision.
1018
1019     // Relevant threshold points
1020     const real      minSafeFloat = std::numeric_limits<float>::min()*10;  // X value guaranteed not to result in Inf intermediates for 1/x calc.
1021     const real      maxSafeFloat = std::numeric_limits<float>::max()*0.1; // X value guaranteed not to result in DTZ intermediates for 1/x calc.
1022     // Scale highest value by 1-eps, since we will do some arithmetics on this value
1023     const real      maxFloat     = std::numeric_limits<float>::max()*(1.0 - std::numeric_limits<float>::epsilon() );
1024     CompareSettings settings;
1025
1026     // Increase the allowed error by the difference between the actual precision and single
1027     setUlpTolSingleAccuracy(ulpTol_);
1028
1029     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1030     settings = { Range(-maxFloat, -maxSafeFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1031     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1032
1033     // Normal checks for x < 0
1034     settings = { Range(-maxSafeFloat, -minSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1035     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1036
1037     // We do not care about the small range -minSafeFloat < x < +minSafeFloat where the result can be +/- Inf, since we don't require strict IEEE754.
1038
1039     // Normal checks for x > 0
1040     settings = { Range(minSafeFloat, maxSafeFloat), ulpTol_, absTol_, MatchRule::Normal };
1041     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1042
1043     // Danger zone where intermediates might be flushed to zero and produce 1/x==0.0
1044     settings = { Range(maxSafeFloat, maxFloat), ulpTol_, absTol_, MatchRule::ReferenceOrZero };
1045     GMX_EXPECT_SIMD_FUNC_NEAR(refInv, inv, settings);
1046 }
1047
1048 TEST_F(SimdMathTest, logSingleAccuracy)
1049 {
1050     const real low  = std::numeric_limits<real>::min();
1051     const real high = std::numeric_limits<real>::max();
1052
1053     // Increase the allowed error by the difference between the actual precision and single
1054     setUlpTolSingleAccuracy(ulpTol_);
1055
1056     CompareSettings settings {
1057         Range(low, high), ulpTol_, absTol_, MatchRule::Normal
1058     };
1059     GMX_EXPECT_SIMD_FUNC_NEAR(std::log, logSingleAccuracy, settings);
1060 }
1061
1062 TEST_F(SimdMathTest, exp2SingleAccuracy)
1063 {
1064     // Relevant threshold points - float limits since we only target single accuracy
1065     constexpr real  lowestReal                     = -std::numeric_limits<real>::max();
1066     constexpr real  lowestRealThatProducesNormal   = std::numeric_limits<real>::min_exponent - 1;                      // adding the significant corresponds to one more unit in exponent
1067     constexpr real  lowestRealThatProducesDenormal = lowestRealThatProducesNormal - std::numeric_limits<real>::digits; // digits refer to bits in significand, so 24/53 for float/double
1068     constexpr real  highestRealThatProducesNormal  = std::numeric_limits<real>::max_exponent - 1;                      // adding the significant corresponds to one more unit in exponent
1069     CompareSettings settings;
1070
1071     // Increase the allowed error by the difference between the actual precision and single
1072     setUlpTolSingleAccuracy(ulpTol_);
1073
1074     // Below subnormal range all results should be zero
1075     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1076     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
1077
1078     // Subnormal range, require matching, but DTZ is fine
1079     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Dtz };
1080     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
1081
1082     // Normal range, standard result expected
1083     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Normal };
1084     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2, settings);
1085 }
1086
1087 TEST_F(SimdMathTest, exp2SingleAccuracyUnsafe)
1088 {
1089     // The unsafe version is only defined in the normal range
1090     constexpr real lowestRealThatProducesNormal   = std::numeric_limits<real>::min_exponent - 1; // adding the significant corresponds to one more unit in exponent
1091     constexpr real highestRealThatProducesNormal  = std::numeric_limits<real>::max_exponent - 1; // adding the significant corresponds to one more unit in exponent
1092
1093     /* Increase the allowed error by the difference between the actual precision and single */
1094     setUlpTolSingleAccuracy(ulpTol_);
1095
1096     CompareSettings settings {
1097         Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Normal
1098     };
1099     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp2, exp2SingleAccuracy<MathOptimization::Unsafe>, settings);
1100 }
1101
1102 TEST_F(SimdMathTest, expSingleAccuracy)
1103 {
1104     // See threshold point comments in normal exp() test
1105     const real      lowestReal                       = -std::numeric_limits<real>::max();
1106     const real      lowestRealThatProducesNormal     = (std::numeric_limits<real>::min_exponent - 1)*std::log(2.0);
1107     const real      lowestRealThatProducesDenormal   = lowestRealThatProducesNormal - std::numeric_limits<real>::digits*std::log(2.0);
1108     const real      highestRealThatProducesNormal    = (std::numeric_limits<real>::max_exponent - 1)*std::log(2.0);
1109     CompareSettings settings;
1110
1111     // Below subnormal range all results should be zero (so, match the reference)
1112     settings = { Range(lowestReal, lowestRealThatProducesDenormal), ulpTol_, absTol_, MatchRule::Normal };
1113     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
1114
1115     // Subnormal range, require matching, but DTZ is fine
1116     settings = { Range(lowestRealThatProducesDenormal, lowestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Dtz };
1117     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
1118
1119     // Normal range, standard result expected
1120     settings = { Range(lowestRealThatProducesNormal, highestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Normal };
1121     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, exp, settings);
1122 }
1123
1124 TEST_F(SimdMathTest, expSingleAccuracyUnsafe)
1125 {
1126     // See test of exp() for comments about test ranges
1127     const real lowestRealThatProducesNormal     = (std::numeric_limits<real>::min_exponent - 1)*std::log(2.0);
1128     const real lowestRealThatProducesCorrectExp = lowestRealThatProducesNormal + GMX_SIMD_HAVE_FMA ? 0.0 : 0.5 * std::numeric_limits<real>::digits * std::log(2.0);
1129     const real highestRealThatProducesNormal    = (std::numeric_limits<real>::max_exponent - 1)*std::log(2.0);
1130
1131     // Increase the allowed error by the difference between the actual precision and single
1132     setUlpTolSingleAccuracy(ulpTol_);
1133
1134     CompareSettings settings {
1135         Range(lowestRealThatProducesCorrectExp, highestRealThatProducesNormal), ulpTol_, absTol_, MatchRule::Normal
1136     };
1137     GMX_EXPECT_SIMD_FUNC_NEAR(std::exp, expSingleAccuracy<MathOptimization::Unsafe>, settings);
1138 }
1139
1140 TEST_F(SimdMathTest, erfSingleAccuracy)
1141 {
1142     // Increase the allowed error by the difference between the actual precision and single
1143     setUlpTolSingleAccuracy(ulpTol_);
1144
1145     CompareSettings settings {
1146         Range(-9, 9), ulpTol_, GMX_REAL_MIN, MatchRule::Normal
1147     };
1148     GMX_EXPECT_SIMD_FUNC_NEAR(refErf, erfSingleAccuracy, settings);
1149 }
1150
1151 TEST_F(SimdMathTest, erfcSingleAccuracy)
1152 {
1153     // Increase the allowed error by the difference between the actual precision and single
1154     setUlpTolSingleAccuracy(ulpTol_);
1155
1156     // Our erfc algorithm has 4 ulp accuracy, so relax tolerance a bit
1157     CompareSettings settings {
1158         Range(-9, 9), 4*ulpTol_, GMX_REAL_MIN, MatchRule::Normal
1159     };
1160     GMX_EXPECT_SIMD_FUNC_NEAR(refErfc, erfcSingleAccuracy, settings);
1161 }
1162
1163
1164 TEST_F(SimdMathTest, sinSingleAccuracy)
1165 {
1166     /* Increase the allowed error by the difference between the actual precision and single */
1167     setUlpTolSingleAccuracy(ulpTol_);
1168
1169     CompareSettings settings {
1170         Range(-8*M_PI, 8*M_PI), ulpTol_, absTol_, MatchRule::Normal
1171     };
1172     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1173
1174     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1175     settings = { Range(-10000, 10000), 2*ulpTol_, absTol_, MatchRule::Normal};
1176     GMX_EXPECT_SIMD_FUNC_NEAR(std::sin, sinSingleAccuracy, settings);
1177 }
1178
1179 TEST_F(SimdMathTest, cosSingleAccuracy)
1180 {
1181     /* Increase the allowed error by the difference between the actual precision and single */
1182     setUlpTolSingleAccuracy(ulpTol_);
1183
1184     CompareSettings settings {
1185         Range(-8*M_PI, 8*M_PI), ulpTol_, absTol_, MatchRule::Normal
1186     };
1187     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1188
1189     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1190     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal};
1191     GMX_EXPECT_SIMD_FUNC_NEAR(std::cos, cosSingleAccuracy, settings);
1192 }
1193
1194 TEST_F(SimdMathTest, tanSingleAccuracy)
1195 {
1196     /* Increase the allowed error by the difference between the actual precision and single */
1197     setUlpTolSingleAccuracy(ulpTol_);
1198
1199     // Tan(x) is a little sensitive due to the division in the algorithm.
1200     // Rather than using lots of extra FP operations, we accept the algorithm
1201     // presently only achieves a ~3 ulp error and use the medium tolerance.
1202     CompareSettings settings {
1203         Range(-8*M_PI, 8*M_PI), ulpTol_, absTol_, MatchRule::Normal
1204     };
1205     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1206
1207     // Range reduction leads to accuracy loss, so we might want higher tolerance here
1208     settings = { Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal};
1209     GMX_EXPECT_SIMD_FUNC_NEAR(std::tan, tanSingleAccuracy, settings);
1210 }
1211
1212 TEST_F(SimdMathTest, asinSingleAccuracy)
1213 {
1214     /* Increase the allowed error by the difference between the actual precision and single */
1215     setUlpTolSingleAccuracy(ulpTol_);
1216
1217     // Our present asin(x) algorithm achieves 2-3 ulp accuracy
1218     CompareSettings settings {
1219         Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal
1220     };
1221     GMX_EXPECT_SIMD_FUNC_NEAR(std::asin, asinSingleAccuracy, settings);
1222 }
1223
1224 TEST_F(SimdMathTest, acosSingleAccuracy)
1225 {
1226     /* Increase the allowed error by the difference between the actual precision and single */
1227     setUlpTolSingleAccuracy(ulpTol_);
1228
1229     // Our present acos(x) algorithm achieves 2-3 ulp accuracy
1230     CompareSettings settings {
1231         Range(-1, 1), ulpTol_, absTol_, MatchRule::Normal
1232     };
1233     GMX_EXPECT_SIMD_FUNC_NEAR(std::acos, acosSingleAccuracy, settings);
1234 }
1235
1236 TEST_F(SimdMathTest, atanSingleAccuracy)
1237 {
1238     /* Increase the allowed error by the difference between the actual precision and single */
1239     setUlpTolSingleAccuracy(ulpTol_);
1240
1241     // Our present atan(x) algorithm achieves 1 ulp accuracy
1242     CompareSettings settings {
1243         Range(-10000, 10000), ulpTol_, absTol_, MatchRule::Normal
1244     };
1245     GMX_EXPECT_SIMD_FUNC_NEAR(std::atan, atanSingleAccuracy, settings);
1246 }
1247
1248 TEST_F(SimdMathTest, atan2SingleAccuracy)
1249 {
1250     /* Increase the allowed error by the difference between the actual precision and single */
1251     setUlpTolSingleAccuracy(ulpTol_);
1252
1253     // test each quadrant
1254     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, c3), std::atan2(c1, c4), std::atan2(c2, c5)),
1255                               atan2SingleAccuracy(rSimd_c0c1c2, rSimd_c3c4c5));
1256     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(-c0, c3), std::atan2(-c1, c4), std::atan2(-c2, c5)),
1257                               atan2SingleAccuracy(rSimd_m0m1m2, rSimd_c3c4c5));
1258     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(-c0, -c3), std::atan2(-c1, -c0), std::atan2(-c2, -c4)),
1259                               atan2SingleAccuracy(rSimd_m0m1m2, rSimd_m3m0m4));
1260     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, -c3), std::atan2(c1, -c0), std::atan2(c2, -c4)),
1261                               atan2SingleAccuracy(rSimd_c0c1c2, rSimd_m3m0m4));
1262     // cases important for calculating angles
1263     // values on coordinate axes
1264     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, c0), std::atan2(0, c1), std::atan2(0, c2)),
1265                               atan2SingleAccuracy(setZero(), rSimd_c0c1c2));
1266     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(c0, 0), std::atan2(c1, 0), std::atan2(c2, 0)),
1267                               atan2SingleAccuracy(rSimd_c0c1c2, setZero()));
1268     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(0, -c0), std::atan2(0, -c1), std::atan2(0, -c2)),
1269                               atan2SingleAccuracy(setZero(), rSimd_m0m1m2));
1270     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom3R(std::atan2(-c0, 0), std::atan2(-c1, 0), std::atan2(-c2, 0)),
1271                               atan2SingleAccuracy(rSimd_m0m1m2, setZero()));
1272
1273     // degenerate value (origin) should return 0.0. At least IBM xlc 13.1.5 gets the reference
1274     // value wrong (-nan) at -O3 optimization, so we compare to the correct value (0.0) instead.
1275     GMX_EXPECT_SIMD_REAL_NEAR(setSimdRealFrom1R(0.0), atan2SingleAccuracy(setSimdRealFrom3R(0.0, 0.0, 0.0), setZero()));
1276 }
1277
1278 TEST_F(SimdMathTest, pmeForceCorrectionSingleAccuracy)
1279 {
1280     // The PME corrections will be added to ~1/r2, so absolute tolerance of EPS is fine.
1281     // Pme correction only needs to be ~1e-6 accuracy single.
1282     // Then increase the allowed error by the difference between the actual precision and single.
1283     setUlpTolSingleAccuracy(std::int64_t(5e-6/GMX_FLOAT_EPS));
1284
1285     CompareSettings settings {
1286         Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal
1287     };
1288     GMX_EXPECT_SIMD_FUNC_NEAR(refPmeForceCorrection, pmeForceCorrectionSingleAccuracy, settings);
1289 }
1290
1291 TEST_F(SimdMathTest, pmePotentialCorrectionSingleAccuracy)
1292 {
1293     // The PME corrections will be added to ~1/r, so absolute tolerance of EPS is fine.
1294     // Pme correction only needs to be ~1e-6 accuracy single.
1295     // Then increase the allowed error by the difference between the actual precision and single.
1296     setUlpTolSingleAccuracy(std::int64_t(5e-6/GMX_FLOAT_EPS));
1297
1298     CompareSettings settings {
1299         Range(0.15, 4), ulpTol_, GMX_FLOAT_EPS, MatchRule::Normal
1300     };
1301     GMX_EXPECT_SIMD_FUNC_NEAR(refPmePotentialCorrection, pmePotentialCorrectionSingleAccuracy, settings);
1302 }
1303
1304 }      // namespace
1305
1306 #endif // GMX_SIMD_HAVE_REAL
1307
1308 /*! \} */
1309 /*! \endcond */
1310
1311 }      // namespace test
1312 }      // namespace gmx
1313
1314 #endif // GMX_SIMD