src/gromacs/tables/quadraticsplinetable.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \libinternal
  37  * \defgroup module_tables  Classes for table interpolation
  38  * \ingroup group_utilitymodules
  39  *
  40  * \brief Table interpolation from analytical or numerical input
  41  *
  42  * This module provides quadratic spline interpolation tables used
  43  * both for the nonbonded kernels and listed interactions.
  44  *
  45  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  46  */
  47
  48
  49 /*! \libinternal \file
  50  * \brief
  51  * Declares classes for quadratic spline table
  52  *
  53  * \inlibraryapi
  54  * \author Erik Lindahl <erik.lindahl@gmail.com>
  55  * \ingroup module_tables
  56  */
  57 #ifndef GMX_TABLES_QUADRATICSPLINETABLE_H
  58 #define GMX_TABLES_QUADRATICSPLINETABLE_H
  59
  60 #include <functional>
  61 #include <initializer_list>
  62 #include <vector>
  63
  64 #include "gromacs/simd/simd.h"
  65 #include "gromacs/tables/tableinput.h"
  66 #include "gromacs/utility/alignedallocator.h"
  67 #include "gromacs/utility/classhelpers.h"
  68 #include "gromacs/utility/exceptions.h"
  69 #include "gromacs/utility/gmxassert.h"
  70 #include "gromacs/utility/real.h"
  71
  72 namespace gmx
  73 {
  74
  75 /*! \libinternal \brief Quadratic spline interpolation table.
  76  *
  77  * This class interpolates a function specified either as an analytical
  78  * expression or from user-provided table data.
  79  *
  80  * At initialization, you provide the reference function of vectors
  81  * as a list of tuples that contain a brief name, the function, and
  82  * derivative for each function to tabulate. To create a table with
  83  * two functions this initializer list can for instance look like
  84  *
  85  *     { {"LJ6", lj6Func, lj6Der}, {"LJ12", lj12Func, lj12Der} }
  86  *
  87  * The names are only used so exceptions during initialization can
  88  * be traced to a specific table.
  89  *
  90  * When interpolating, there are methods to interpolate either 1, 2, or 3
  91  * functions in one go. By default these interpolation routines will
  92  * operate on tables with the same number of functions as specified in
  93  * the interpolation method (debug builds check that this is consistent with
  94  * the table). However, it is also possible to use optional template
  95  * parameters that specify the total number of functions in a table, and
  96  * what function index to interpolate. For instance, to interpolate the
  97  * derivative of the second function (i.e., index 1) in a
  98  * multi-function-table with three functions in total, you can write
  99  *
 100  *     table.evaluateDerivative<3,1>(x,&der);
 101  *
 102  * Here too, debug builds will check that the template parameters are
 103  * consistent with the table.
 104  *
 105  * The table data is internally adjusted to guarantee that the interpolated
 106  * derivative is the true derivative of the interpolated potential, which is
 107  * important to avoid systematic errors for the common case when the derivative
 108  * is concave/convex in the entire interval.
 109  * We do this by expressing the difference in the function value
 110  * at a small offset h relative to a reference value in position 0 with a forward
 111  * Taylor series expanded around 0, and then doing the opposite of expressing
 112  * difference in the function at position 0 relative to a reference value in
 113  * position h when using a backward Taylor expansion:
 114  *
 115  * \f{eqnarray*}{
 116  *  \Delta V & = & hV'(0) + \frac{1}{2} h^2 V''(0) + \frac{1}{6} h^3 V'''(0) + O(h^4) \\
 117  *  \Delta V & = & hV'(h) - \frac{1}{2} h^2 V''(h) + \frac{1}{6} h^3 V'''(h) + O(h^4)
 118  * \f}
 119  *
 120  * Summing the equations leads to
 121  *
 122  * \f[
 123  *  2 \Delta V = h(V'(0) + V'(h)) + \frac{1}{2} h^2 (V''(0)-V''(h)) + \frac{1}{6}h^3(V'''(0)+V'''(h)) + O(h^4)
 124  * \f]
 125  *
 126  * To make the second term symmetric too, we can replace it with the average of
 127  * the Taylor expansion at 0 and h (i.e., using the third derivative). This gives
 128  *
 129  * \f[
 130  *  2 \Delta V = h(V'(0) + V'(h)) - \frac{1}{12} h^3 (V'''(0)+V'''(h)) + O(h^4)
 131  * \f]
 132  *
 133  * Thus, if we replace the derivative in the internal quadratic table data with
 134  *
 135  * \f[
 136  *  V' - \frac{1}{12}h^2 V'''
 137  * \f]
 138  *
 139  * we will cancel the h^3 term in the error. This will make the integral of the
 140  * forces match the potential much better (The h^4 term actually disappears, so
 141  * when summing over 1/h points the remaining error will be O(h^4).
 142  *
 143  * While it is possible to create tables only from function values
 144  * (i.e., no derivatives), it is recommended to provide derivatives for higher
 145  * accuracy and to avoid issues with numerical differentiation. Note that the
 146  * table input should be smooth, i.e. it should not contain noise e.g. from an
 147  * (iterative) Boltzmann inversion procedure - you have been warned.
 148  *
 149  * \note This class is responsible for fundamental interpolation of any function,
 150  *       which might or might not correspond to a potential. For this reason
 151  *       both input and output derivatives are proper function derivatives, and
 152  *       we do not swap any signs to get forces directly from the table.
 153  *
 154  * \note There will be a small additional accuracy loss from the internal
 155  *       operation where we calculate the epsilon offset from the nearest table
 156  *       point, since the integer part we subtract can get large in those cases.
 157  *       The absolute such error both in the function and derivative value will
 158  *       be roughly f''*x*GMX_REAL_EPS, where x is the argument and f'' the
 159  *       second derivative.
 160  *       While this is technically possible to solve with extended precision
 161  *       arithmetics, that would introduce extra instructions in some highly
 162  *       performance-sensitive code parts. For typical GROMACS interaction
 163  *       functions the derivatives will decay faster than the potential, which
 164  *       means it will never play any role. For other functions it will only
 165  *       cause a small increase in the relative error for arguments where the
 166  *       magnitude of the function or derivative is very small.
 167  *       Since we typically sum several results in GROMACS, this should never
 168  *       show up in any real cases, and for this reason we choose not to do
 169  *       the extended precision arithmetics.
 170  *
 171  * \note These routines are not suitable for table ranges starting far away
 172  *       from zero, since we allocate memory and calculate indices starting from
 173  *       range zero for efficiency reasons.
 174  */
 175 class QuadraticSplineTable
 176 {
 177     private:
 178         /*! \brief Change that function value falls inside range when debugging
 179          *
 180          *  \tparam T   Lookup argument floating-point type, typically SimdReal or real.
 181          *  \param  r   Lookup argument to test
 182          *
 183          *  \throws Debug builds will throw gmx::RangeError for values that are
 184          *          larger than the upper limit of the range, or smaller than 0.
 185          *          We allow the table to be called with arguments between 0 and
 186          *          the lower limit of the range, since this might in theory occur
 187          *          once-in-a-blue-moon with some algorithms.
 188          */
 189         template <typename T>
 190         void
 191         rangeCheck(T gmx_unused r) const
 192         {
 193 #ifndef NDEBUG
 194             // Check that all values fall in range when debugging
 195             if (anyTrue( r < T(0.0) || T(range_.second) <= r ) )
 196             {
 197                 GMX_THROW(RangeError("Interpolation input value falls outside table definition range"));
 198             }
 199 #endif
 200         }
 201
 202     public:
 203
 204         /*! \brief Default tolerance for tables is 10*GMX_FLOAT_EPS
 205          *
 206          *  \note Even for double precision builds we set the tolerance to
 207          *        one order of magnitude above the single precision epsilon.
 208          */
 209         static const real defaultTolerance;
 210
 211         /*! \brief Initialize table data from function
 212          *
 213          * \param analyticalInputList Initializer list with one or more functions to tabulate,
 214          *                            specified as pairs containing analytical
 215          *                            functions and their derivatives. The function will also
 216          *                            be called for values smaller than the lower limit of the
 217          *                            range, but we avoid calling it for 0.0 if that value
 218          *                            is not included in the range.
 219          * \param range               Range over which the function will be tabulated.
 220          *                            Constructor will throw gmx::APIError for negative values.
 221          *                            Due to the way the numerical derivative evaluation depends
 222          *                            on machine precision internally, this range must be
 223          *                            at least 0.001, or the constructor throws gmx::APIError.
 224          * \param tolerance           Requested accuracy of the table. This will be used to
 225          *                            calculate the required internal spacing. If this cannot
 226          *                            be achieved (for instance because the table would require
 227          *                            too much memory) the constructor will throw gmx::ToleranceError.
 228          *
 229          * \note The functions are always defined in double precision to avoid
 230          *       losing accuracy when constructing tables.
 231          *
 232          * \note Since we fill the table for values below range.first, you can achieve
 233          *       a smaller table by using a smaller range where the tolerance has to be
 234          *       met, and accept that a few function calls below range.first do not
 235          *       quite reach the tolerance.
 236          *
 237          * \warning For efficiency reasons (since this code is used in some inner
 238          *       (kernels), we always allocate memory and calculate table indices
 239          *       for the complete interval [0,range.second], although the data will
 240          *       not be valid outside the definition range to avoid calling the
 241          *       function there. This means you should \a not use this class
 242          *       to tabulate functions for small ranges very far away from zero,
 243          *       since you would both waste a huge amount of memory and incur
 244          *       truncation errors when calculating the index.
 245          *
 246          * \throws gmx::ToleranceError if the requested tolerance cannot be achieved,
 247          *         and gmx::APIError for other incorrect input.
 248          */
 249         QuadraticSplineTable(std::initializer_list<AnalyticalSplineTableInput>   analyticalInputList,
 250                              const std::pair<real, real>                        &range,
 251                              real                                                tolerance = defaultTolerance);
 252
 253         /*! \brief Initialize table data from tabulated values and derivatives
 254          *
 255          * \param numericalInputList  Initializer list with one or more functions to tabulate,
 256          *                            specified as pairs containing containing vectors for the
 257          *                            function values and their derivatives. Data points are
 258          *                            separated by the spacing parameter, starting from 0.
 259          *                            Values below the lower limit of the range will be used to
 260          *                            attempt defining the table, but we avoid using index 0
 261          *                            unless 0.0 is included in the range. Some extra points beyond
 262          *                            range.second are required to re-interpolate values, so add
 263          *                            some margin. The constructor will throw gmx::APIError if the
 264          *                            input vectors are too short to cover the requested range
 265          *                            (and they must always be at least five points).
 266          * \param range               Range over which the function will be tabulated.
 267          *                            Constructor will throw gmx::APIError for negative values,
 268          *                            or if the value/derivative vector does not cover the
 269          *                            range.
 270          * \param tolerance           Requested accuracy of the table in the range. This will be
 271          *                            used to calculate the required internal spacing and possibly
 272          *                            re-interpolate. The constructor will throw
 273          *                            gmx::ToleranceError if the input spacing is too coarse
 274          *                            to achieve this accuracy.
 275          *
 276          * \note The input data vectors are always double precision to avoid
 277          *       losing accuracy when constructing tables.
 278          *
 279          * \note Since we fill the table for values below range.first, you can achieve
 280          *       a smaller table by using a smaller range where the tolerance has to be
 281          *       met, and accept that a few function calls below range.first do not
 282          *       quite reach the tolerance.
 283          *
 284          * \warning For efficiency reasons (since this code is used in some inner
 285          *       (kernels), we always allocate memory and calculate table indices
 286          *       for the complete interval [0,range.second], although the data will
 287          *       not be valid outside the definition range to avoid calling the
 288          *       function there. This means you should \a not use this class
 289          *       to tabulate functions for small ranges very far away from zero,
 290          *       since you would both waste a huge amount of memory and incur
 291          *       truncation errors when calculating the index.
 292          */
 293         QuadraticSplineTable(std::initializer_list<NumericalSplineTableInput>    numericalInputList,
 294                              const std::pair<real, real>                        &range,
 295                              real                                                tolerance = defaultTolerance);
 296
 297
 298         /************************************************************
 299          *           Evaluation methods for single functions        *
 300          ************************************************************/
 301
 302         /*! \brief Evaluate both function and derivative, single table function
 303          *
 304          *  This is a templated method where the template can be either real or SimdReal.
 305          *
 306          *  \tparam     numFuncInTable  Number of separate functions in table, default is 1
 307          *  \tparam     funcIndex       Index of function to evaluate in table, default is 0
 308          *  \tparam     T               Type (SimdReal or real) of lookup and result
 309          *  \param      r               Points for which to evaluate function and derivative
 310          *  \param[out] functionValue   Function value
 311          *  \param[out] derivativeValue Function derivative
 312          *
 313          *  For debug builds we assert that the input values fall in the range
 314          *  specified when constructing the table.
 315          */
 316         template <int numFuncInTable = 1, int funcIndex = 0, typename T>
 317         void
 318         evaluateFunctionAndDerivative(T     r,
 319                                       T *   functionValue,
 320                                       T *   derivativeValue) const
 321         {
 322             rangeCheck(r);
 323             GMX_ASSERT(numFuncInTable == numFuncInTable_, "Evaluation method not matching number of functions in table");
 324             GMX_ASSERT(funcIndex < numFuncInTable, "Function index not in range of the number of tables");
 325
 326             T     rTable   = r * T(tableScale_);
 327             auto  tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 328             T     eps      = rTable - trunc(rTable);
 329             T     t0;
 330             T     t1;
 331             T     t2;
 332             T     t3 gmx_unused;
 333
 334             // Load Derivative, Delta, Function, and Zero values for each table point.
 335             // The 4 refers to these four values - not any SIMD width.
 336             gatherLoadBySimdIntTranspose<4*numFuncInTable>(ddfzMultiTableData_.data() + 4 * funcIndex, tabIndex, &t0, &t1, &t2, &t3);
 337
 338             t1               = t0 + eps * t1;
 339             *functionValue   = fma( eps * T(halfSpacing_),  t0 + t1, t2);
 340             *derivativeValue = t1;
 341         }
 342
 343         /*! \brief Evaluate function value only, single table function
 344          *
 345          *  This is a templated method where the template can be either real or SimdReal.
 346          *
 347          *  \tparam     numFuncInTable  Number of separate functions in table, default is 1
 348          *  \tparam     funcIndex       Index of function to evaluate in table, default is 0
 349          *  \tparam     T               Type (SimdReal or real) of lookup and result
 350          *  \param      r               Points for which to evaluate function value
 351          *  \param[out] functionValue   Function value
 352          *
 353          *  For debug builds we assert that the input values fall in the range
 354          *  specified when constructing the table.
 355          */
 356         template <int numFuncInTable = 1, int funcIndex = 0, typename T>
 357         void
 358         evaluateFunction(T     r,
 359                          T *   functionValue) const
 360         {
 361             T     der gmx_unused;
 362
 363             evaluateFunctionAndDerivative<numFuncInTable, funcIndex>(r, functionValue, &der);
 364         }
 365
 366         /*! \brief Evaluate function derivative only, single table function
 367          *
 368          *  This is a templated method where the template can be either real or SimdReal.
 369          *
 370          *  \tparam     numFuncInTable  Number of separate functions in table, default is 1
 371          *  \tparam     funcIndex       Index of function to evaluate in table, default is 0
 372          *  \tparam     T               Type (SimdReal or real) of lookup and result
 373          *  \param      r               Points for which to evaluate function derivative
 374          *  \param[out] derivativeValue Function derivative
 375          *
 376          *  For debug builds we assert that the input values fall in the range
 377          *  specified when constructing the table.
 378          */
 379         template <int numFuncInTable = 1, int funcIndex = 0, typename T>
 380         void
 381         evaluateDerivative(T     r,
 382                            T *   derivativeValue) const
 383         {
 384             rangeCheck(r);
 385             GMX_ASSERT(numFuncInTable == numFuncInTable_, "Evaluation method not matching number of functions in table");
 386             GMX_ASSERT(funcIndex < numFuncInTable, "Function index not in range of the number of tables");
 387
 388             T     rTable   = r * T(tableScale_);
 389             auto  tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 390             T     eps      = rTable - trunc(rTable);
 391             T     t0;
 392             T     t1;
 393             T     t2 gmx_unused;
 394
 395             if (numFuncInTable == 1)
 396             {
 397                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex, tabIndex, &t0, &t1); // works for scalar T too
 398             }
 399             else
 400             {
 401                 // This is not ideal, but we need a version of gatherLoadUBySimdIntTranspose that
 402                 // only loads a single value from memory to implement it better (will be written)
 403                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex, tabIndex, &t0, &t2);        // works for scalar T too
 404                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex, tabIndex + T(1), &t1, &t2); // works for scalar T too
 405             }
 406
 407             // (1-eps)*t0 + eps*t1
 408             *derivativeValue = fma(t1-t0, eps, t0);
 409         }
 410
 411         /************************************************************
 412          *             Evaluation methods for two functions         *
 413          ************************************************************/
 414
 415         /*! \brief Evaluate both function and derivative, two table functions
 416          *
 417          *  This is a templated method where the template can be either real or SimdReal.
 418          *
 419          *  \tparam     numFuncInTable  Number of separate functions in table, default is 2
 420          *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 421          *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 422          *  \tparam     T                Type (SimdReal or real) of lookup and result
 423          *  \param      r                Points for which to evaluate function and derivative
 424          *  \param[out] functionValue1   Interpolated value for first function
 425          *  \param[out] derivativeValue1 Interpolated derivative for first function
 426          *  \param[out] functionValue2   Interpolated value for second function
 427          *  \param[out] derivativeValue2 Interpolated derivative for second function
 428          *
 429          *  For debug builds we assert that the input values fall in the range
 430          *  specified when constructing the table.
 431          */
 432         template <int numFuncInTable = 2, int funcIndex0 = 0, int funcIndex1 = 1, typename T>
 433         void
 434         evaluateFunctionAndDerivative(T     r,
 435                                       T *   functionValue1,
 436                                       T *   derivativeValue1,
 437                                       T *   functionValue2,
 438                                       T *   derivativeValue2) const
 439         {
 440             rangeCheck(r);
 441             GMX_ASSERT(numFuncInTable == numFuncInTable_, "Evaluation method not matching number of functions in table");
 442             GMX_ASSERT(funcIndex0 < numFuncInTable && funcIndex1 < numFuncInTable, "Function index not in range of the number of tables");
 443
 444             T     rTable   = r * T(tableScale_);
 445             auto  tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 446             T     eps      = rTable - trunc(rTable);
 447             T     t0;
 448             T     t1;
 449             T     t2;
 450             T     t3 gmx_unused;
 451
 452             // Load Derivative, Delta, Function, and Zero values for each table point.
 453             // The 4 refers to these four values - not any SIMD width.
 454             gatherLoadBySimdIntTranspose<4*numFuncInTable>(ddfzMultiTableData_.data() + 4*funcIndex0, tabIndex, &t0, &t1, &t2, &t3);
 455             t1                = t0 + eps * t1;
 456             *functionValue1   = fma( eps * T(halfSpacing_),  t0 + t1, t2);
 457             *derivativeValue1 = t1;
 458
 459             gatherLoadBySimdIntTranspose<4*numFuncInTable>(ddfzMultiTableData_.data() + 4*funcIndex1, tabIndex, &t0, &t1, &t2, &t3);
 460             t1                = t0 + eps * t1;
 461             *functionValue2   = fma( eps * T(halfSpacing_),  t0 + t1, t2);
 462             *derivativeValue2 = t1;
 463         }
 464
 465         /*! \brief Evaluate function value only, two table functions
 466          *
 467          *  This is a templated method where the template can be either real or SimdReal.
 468          *
 469          *  \tparam     numFuncInTable   Number of separate functions in table, default is 2
 470          *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 471          *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 472          *  \tparam     T                Type (SimdReal or real) of lookup and result
 473          *  \param      r                Points for which to evaluate function value
 474          *  \param[out] functionValue1   Interpolated value for first function
 475          *  \param[out] functionValue2   Interpolated value for second function
 476          *
 477          *  For debug builds we assert that the input values fall in the range
 478          *  specified when constructing the table.
 479          */
 480         template <int numFuncInTable = 2, int funcIndex0 = 0, int funcIndex1 = 1, typename T>
 481         void
 482         evaluateFunction(T     r,
 483                          T *   functionValue1,
 484                          T *   functionValue2) const
 485         {
 486             T     der1 gmx_unused;
 487             T     der2 gmx_unused;
 488
 489             evaluateFunctionAndDerivative<numFuncInTable, funcIndex0, funcIndex1>(r, functionValue1, &der1, functionValue2, &der2);
 490         }
 491
 492         /*! \brief Evaluate function derivative only, two table functions
 493          *
 494          *  This is a templated method where the template can be either real or SimdReal.
 495          *
 496          *  \tparam     numFuncInTable   Number of separate functions in table, default is 2
 497          *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 498          *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 499          *  \tparam     T                Type (SimdReal or real) of lookup and result
 500          *  \param      r                Points for which to evaluate function derivative
 501          *  \param[out] derivativeValue1 Interpolated derivative for first function
 502          *  \param[out] derivativeValue2 Interpolated derivative for second function
 503          *
 504          *  For debug builds we assert that the input values fall in the range
 505          *  specified when constructing the table.
 506          */
 507         template <int numFuncInTable = 2, int funcIndex0 = 0, int funcIndex1 = 1, typename T>
 508         void
 509         evaluateDerivative(T     r,
 510                            T *   derivativeValue1,
 511                            T *   derivativeValue2) const
 512         {
 513             rangeCheck(r);
 514             GMX_ASSERT(numFuncInTable == numFuncInTable_, "Evaluation method not matching number of functions in table");
 515             GMX_ASSERT(funcIndex0 < numFuncInTable && funcIndex1 < numFuncInTable, "Function index not in range of the number of tables");
 516
 517             T     rTable   = r * T(tableScale_);
 518             auto  tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 519             T     eps      = rTable - trunc(rTable);
 520
 521             if (numFuncInTable == 2 && funcIndex0 == 0 && funcIndex1 == 1)
 522             {
 523                 T t0A, t0B, t1A, t1B;
 524                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data(), tabIndex, &t0A, &t0B);     // works for scalar T too
 525                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + 2, tabIndex, &t1A, &t1B); // works for scalar T too
 526                 *derivativeValue1 = fma(t1A-t0A, eps, t0A);
 527                 *derivativeValue2 = fma(t1B-t0B, eps, t0B);
 528             }
 529             else
 530             {
 531                 T t0, t1, t2;
 532                 // This is not ideal, but we need a version of gatherLoadUBySimdIntTranspose that
 533                 // only loads a single value from memory to implement it better (will be written)
 534                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex0, tabIndex, &t0, &t2);        // works for scalar T too
 535                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex0, tabIndex + T(1), &t1, &t2); // works for scalar T too
 536                 *derivativeValue1 = fma(t1-t0, eps, t0);
 537
 538                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex1, tabIndex, &t0, &t2);        // works for scalar T too
 539                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex1, tabIndex + T(1), &t1, &t2); // works for scalar T too
 540                 *derivativeValue2 = fma(t1-t0, eps, t0);
 541             }
 542         }
 543
 544         /************************************************************
 545          *            Evaluation methods for three functions        *
 546          ************************************************************/
 547
 548
 549         /*! \brief Evaluate both function and derivative, three table functions
 550          *
 551          *  This is a templated method where the template can be either real or SimdReal.
 552          *
 553          *  \tparam     numFuncInTable   Number of separate functions in table, default is 3
 554          *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 555          *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 556          *  \tparam     funcIndex2       Index of 3rd function to evaluate in table, default is 2
 557          *  \tparam     T                Type (SimdReal or real) of lookup and result
 558          *  \param      r                Points for which to evaluate function and derivative
 559          *  \param[out] functionValue1   Interpolated value for first function
 560          *  \param[out] derivativeValue1 Interpolated derivative for first function
 561          *  \param[out] functionValue2   Interpolated value for second function
 562          *  \param[out] derivativeValue2 Interpolated derivative for second function
 563          *  \param[out] functionValue3   Interpolated value for third function
 564          *  \param[out] derivativeValue3 Interpolated derivative for third function
 565          *
 566          *  For debug builds we assert that the input values fall in the range
 567          *  specified when constructing the table.
 568          */
 569         template <int numFuncInTable = 3, int funcIndex0 = 0, int funcIndex1 = 1, int funcIndex2 = 2, typename T>
 570         void
 571         evaluateFunctionAndDerivative(T     r,
 572                                       T *   functionValue1,
 573                                       T *   derivativeValue1,
 574                                       T *   functionValue2,
 575                                       T *   derivativeValue2,
 576                                       T *   functionValue3,
 577                                       T *   derivativeValue3) const
 578         {
 579             rangeCheck(r);
 580             GMX_ASSERT(numFuncInTable == numFuncInTable, "Evaluation method not matching number of functions in table");
 581             GMX_ASSERT(funcIndex0 < numFuncInTable  && funcIndex1 < numFuncInTable  && funcIndex2 < numFuncInTable,
 582                        "Function index not in range of the number of tables");
 583
 584             T     rTable   = r * T(tableScale_);
 585             auto  tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 586             T     eps      = rTable - trunc(rTable);
 587             T     t0;
 588             T     t1;
 589             T     t2;
 590             T     t3 gmx_unused;
 591
 592             gatherLoadBySimdIntTranspose<4*numFuncInTable>(ddfzMultiTableData_.data() + 4*funcIndex0, tabIndex, &t0, &t1, &t2, &t3);
 593             t1                = t0 + eps * t1;
 594             *functionValue1   = fma( eps * T(halfSpacing_),  t0 + t1, t2);
 595             *derivativeValue1 = t1;
 596
 597             gatherLoadBySimdIntTranspose<4*numFuncInTable>(ddfzMultiTableData_.data() + 4*funcIndex1, tabIndex, &t0, &t1, &t2, &t3);
 598             t1                = t0 + eps * t1;
 599             *functionValue2   = fma( eps * T(halfSpacing_),  t0 + t1, t2);
 600             *derivativeValue2 = t1;
 601
 602             gatherLoadBySimdIntTranspose<4*numFuncInTable>(ddfzMultiTableData_.data() + 4*funcIndex2, tabIndex, &t0, &t1, &t2, &t3);
 603             t1                = t0 + eps * t1;
 604             *functionValue3   = fma( eps * T(halfSpacing_),  t0 + t1, t2);
 605             *derivativeValue3 = t1;
 606         }
 607
 608         /*! \brief Evaluate function value only, three table functions
 609          *
 610          *  This is a templated method where the template can be either real or SimdReal.
 611          *
 612          *  \tparam     numFuncInTable   Number of separate functions in table, default is 3
 613          *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 614          *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 615          *  \tparam     funcIndex2       Index of 3rd function to evaluate in table, default is 2
 616          *  \tparam     T                Type (SimdReal or real) of lookup and result
 617          *  \param      r                Points for which to evaluate function value
 618          *  \param[out] functionValue1   Interpolated value for first function
 619          *  \param[out] functionValue2   Interpolated value for second function
 620          *  \param[out] functionValue3   Interpolated value for third function
 621          *
 622          *  For debug builds we assert that the input values fall in the range
 623          *  specified when constructing the table.
 624          */
 625         template <int numFuncInTable = 3, int funcIndex0 = 0, int funcIndex1 = 1, int funcIndex2 = 2, typename T>
 626         void
 627         evaluateFunction(T     r,
 628                          T *   functionValue1,
 629                          T *   functionValue2,
 630                          T *   functionValue3) const
 631         {
 632             T     der1 gmx_unused;
 633             T     der2 gmx_unused;
 634             T     der3 gmx_unused;
 635
 636             evaluateFunctionAndDerivative<numFuncInTable, funcIndex0, funcIndex1, funcIndex2>(r, functionValue1, &der1, functionValue2, &der2, functionValue3, &der3);
 637         }
 638
 639         /*! \brief Evaluate function derivative only, three table functions
 640          *
 641          *  This is a templated method where the template can be either real or SimdReal.
 642          *
 643          *  \tparam     numFuncInTable   Number of separate functions in table, default is 3
 644          *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 645          *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 646          *  \tparam     funcIndex2       Index of 3rd function to evaluate in table, default is 2
 647          *  \tparam     T                Type (SimdReal or real) of lookup and result
 648          *  \param      r                Points for which to evaluate function derivative
 649          *  \param[out] derivativeValue1 Interpolated derivative for first function
 650          *  \param[out] derivativeValue2 Interpolated derivative for second function
 651          *  \param[out] derivativeValue3 Interpolated derivative for third function
 652          *
 653          *  For debug builds we assert that the input values fall in the range
 654          *  specified when constructing the table.
 655          */
 656         template <int numFuncInTable = 3, int funcIndex0 = 0, int funcIndex1 = 1, int funcIndex2 = 2, typename T>
 657         void
 658         evaluateDerivative(T     r,
 659                            T *   derivativeValue1,
 660                            T *   derivativeValue2,
 661                            T *   derivativeValue3) const
 662         {
 663             rangeCheck(r);
 664             GMX_ASSERT(numFuncInTable == numFuncInTable_, "Evaluation method not matching number of functions in table");
 665             GMX_ASSERT(funcIndex0 < numFuncInTable  && funcIndex1 < numFuncInTable  && funcIndex2 < numFuncInTable,
 666                        "Function index not in range of the number of tables");
 667
 668             T     rTable   = r * T(tableScale_);
 669             auto  tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 670             T     eps      = rTable - trunc(rTable);
 671
 672             if (numFuncInTable == 3 && funcIndex0 == 0 && funcIndex1 == 1 && funcIndex2 == 2)
 673             {
 674                 T t0A, t0B, t0C, t1A, t1B, t1C;
 675                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data(), tabIndex, &t0A, &t0B);
 676                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + 2, tabIndex, &t0C, &t1A);
 677                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + 4, tabIndex, &t1B, &t1C);
 678                 *derivativeValue1 = fma(t1A-t0A, eps, t0A);
 679                 *derivativeValue2 = fma(t1B-t0B, eps, t0B);
 680                 *derivativeValue3 = fma(t1C-t0C, eps, t0C);
 681             }
 682             else
 683             {
 684                 T t0, t1, t2;
 685                 // This is not ideal, but we need a version of gatherLoadUBySimdIntTranspose that
 686                 // only loads a single value from memory to implement it better (will be written)
 687                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex0, tabIndex, &t0, &t2);        // works for scalar T too
 688                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex0, tabIndex + T(1), &t1, &t2); // works for scalar T too
 689                 *derivativeValue1 = fma(t1-t0, eps, t0);
 690
 691                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex1, tabIndex, &t0, &t2);        // works for scalar T too
 692                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex1, tabIndex + T(1), &t1, &t2); // works for scalar T too
 693                 *derivativeValue2 = fma(t1-t0, eps, t0);
 694
 695                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex2, tabIndex, &t0, &t2);        // works for scalar T too
 696                 gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex2, tabIndex + T(1), &t1, &t2); // works for scalar T too
 697                 *derivativeValue3 = fma(t1-t0, eps, t0);
 698             }
 699         }
 700
 701         /*! \brief Return the table spacing (distance between points)
 702          *
 703          *  You should never have to use this for normal code, but due to the
 704          *  way tables are constructed internally we need this in the unit tests
 705          *  to check relative tolerances over each interval.
 706          *
 707          *  \return table spacing.
 708          */
 709         real
 710         tableSpacing() const { return 1.0 / tableScale_; }
 711
 712     private:
 713
 714         std::size_t             numFuncInTable_; //!< Number of separate tabluated functions
 715         std::pair<real, real>   range_;          //!< Range for which table evaluation is allowed
 716         real                    tableScale_;     //!< Table scale (inverse of spacing between points)
 717         real                    halfSpacing_;    //!< 0.5*spacing (used for DDFZ table data)
 718
 719         //!< Derivative values only, with the third-derivative subtraction described in the class documentation.
 720         std::vector<real>      derivativeMultiTableData_;
 721
 722         /*! \brief Combined derivative, difference to next derivative, value, and zero.
 723          *
 724          *  For table point i, this vector contains the four values:
 725          *  - derivative[i]
 726          *  - (derivative[i+1]-derivative[i])
 727          *  - value[i]
 728          *  - 0.0
 729          *
 730          *  For the derivative terms we have subtracted the third-derivative term described
 731          *  in the main class documentation.
 732          *
 733          *  This is typically more efficient than the individual tables, in particular
 734          *  when using SIMD. The result should be identical outside the class, so this
 735          *  is merely an internal implementation optimization. However, to allow
 736          *  aligned SIMD loads we need to use an aligned allocator for this container.
 737          *  We occasionally abbreviate this data as DDFZ.
 738          */
 739         std::vector<real, AlignedAllocator<real> >  ddfzMultiTableData_;
 740
 741         // There should never be any reason to copy the table since it is read-only
 742         GMX_DISALLOW_COPY_AND_ASSIGN(QuadraticSplineTable);
 743 };
 744
 745
 746 }      // namespace gmx
 747
 748 #endif // GMX_TABLES_QUADRATICSPLINETABLE_H