src/gromacs/simd/impl_reference/impl_reference_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD double precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "gromacs/utility/fatalerror.h"
  59
  60 #include "impl_reference_definitions.h"
  61 #include "impl_reference_simd_float.h"
  62
  63 namespace gmx
  64 {
  65
  66 /*! \cond libapi */
  67 /*! \addtogroup module_simd */
  68 /*! \{ */
  69
  70 /* \name SIMD implementation data types
  71  * \{
  72  */
  73
  74 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
  75  *
  76  * \note This variable cannot be placed inside other structures or classes, since
  77  *       some compilers (including at least clang-3.7) appear to lose the
  78  *       alignment. This is likely particularly severe when allocating such
  79  *       memory on the heap, but it occurs for stack structures too.
  80  */
  81 class SimdDouble
  82 {
  83     public:
  84         SimdDouble() {}
  85
  86         //! \brief Construct from scalar
  87         SimdDouble(double d) { simdInternal_.fill(d); }
  88
  89         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  90          *
  91          * This has to be public to enable usage in combination with static inline
  92          * functions, but it should never, EVER, be accessed by any code outside
  93          * the corresponding implementation directory since the type will depend
  94          * on the architecture.
  95          */
  96         std::array<double, GMX_SIMD_DOUBLE_WIDTH>  simdInternal_;
  97 };
  98
  99 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
 100  *
 101  * Available if GMX_SIMD_HAVE_DOUBLE is 1.
 102  *
 103  * \note The integer SIMD type will always be available, but on architectures
 104  * that do not have any real integer SIMD support it might be defined as the
 105  * floating-point type. This will work fine, since there are separate defines
 106  * for whether the implementation can actually do any operations on integer
 107  * SIMD types.
 108  *
 109  * \note This variable cannot be placed inside other structures or classes, since
 110  *       some compilers (including at least clang-3.7) appear to lose the
 111  *       alignment. This is likely particularly severe when allocating such
 112  *       memory on the heap, but it occurs for stack structures too.
 113  */
 114 class SimdDInt32
 115 {
 116     public:
 117         SimdDInt32() {}
 118
 119         //! \brief Construct from scalar
 120         SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
 121
 122         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 123          *
 124          * This has to be public to enable usage in combination with static inline
 125          * functions, but it should never, EVER, be accessed by any code outside
 126          * the corresponding implementation directory since the type will depend
 127          * on the architecture.
 128          */
 129         std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH>  simdInternal_;
 130 };
 131
 132 /*! \libinternal \brief Boolean type for double SIMD data.
 133  *
 134  *  Available if GMX_SIMD_HAVE_DOUBLE is 1.
 135  *
 136  * \note This variable cannot be placed inside other structures or classes, since
 137  *       some compilers (including at least clang-3.7) appear to lose the
 138  *       alignment. This is likely particularly severe when allocating such
 139  *       memory on the heap, but it occurs for stack structures too.
 140  */
 141 class SimdDBool
 142 {
 143     public:
 144         SimdDBool() {}
 145
 146         //! \brief Construct from scalar bool
 147         SimdDBool(bool b) { simdInternal_.fill(b); }
 148
 149         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 150          *
 151          * This has to be public to enable usage in combination with static inline
 152          * functions, but it should never, EVER, be accessed by any code outside
 153          * the corresponding implementation directory since the type will depend
 154          * on the architecture.
 155          */
 156         std::array<bool, GMX_SIMD_DOUBLE_WIDTH>  simdInternal_;
 157 };
 158
 159 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
 160  *
 161  * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
 162  *
 163  * \note This variable cannot be placed inside other structures or classes, since
 164  *       some compilers (including at least clang-3.7) appear to lose the
 165  *       alignment. This is likely particularly severe when allocating such
 166  *       memory on the heap, but it occurs for stack structures too.
 167  */
 168 class SimdDIBool
 169 {
 170     public:
 171         SimdDIBool() {}
 172
 173         //! \brief Construct from scalar
 174         SimdDIBool(bool b) { simdInternal_.fill(b); }
 175
 176         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 177          *
 178          * This has to be public to enable usage in combination with static inline
 179          * functions, but it should never, EVER, be accessed by any code outside
 180          * the corresponding implementation directory since the type will depend
 181          * on the architecture.
 182          */
 183         std::array<bool, GMX_SIMD_DINT32_WIDTH>  simdInternal_;
 184 };
 185
 186 /*! \}
 187  *
 188  * \name SIMD implementation load/store operations for double precision floating point
 189  * \{
 190  */
 191
 192 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
 193  *
 194  * \param m Pointer to memory aligned to the SIMD width.
 195  * \return SIMD variable with data loaded.
 196  */
 197 static inline SimdDouble gmx_simdcall
 198 simdLoad(const double *m)
 199 {
 200     SimdDouble a;
 201
 202     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 203
 204     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 205     return a;
 206 }
 207
 208 /*! \brief Store the contents of SIMD double variable to aligned memory m.
 209  *
 210  * \param[out] m Pointer to memory, aligned to SIMD width.
 211  * \param a SIMD variable to store
 212  */
 213 static inline void gmx_simdcall
 214 store(double *m, SimdDouble a)
 215 {
 216     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 217
 218     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 219 }
 220
 221 /*! \brief Load SIMD double from unaligned memory.
 222  *
 223  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 224  *
 225  * \param m Pointer to memory, no alignment requirement.
 226  * \return SIMD variable with data loaded.
 227  */
 228 static inline SimdDouble gmx_simdcall
 229 simdLoadU(const double *m)
 230 {
 231     SimdDouble a;
 232     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 233     return a;
 234 }
 235
 236 /*! \brief Store SIMD double to unaligned memory.
 237  *
 238  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 239  *
 240  * \param[out] m Pointer to memory, no alignment requirement.
 241  * \param a SIMD variable to store.
 242  */
 243 static inline void gmx_simdcall
 244 storeU(double *m, SimdDouble a)
 245 {
 246     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 247 }
 248
 249 /*! \brief Set all SIMD double variable elements to 0.0.
 250  *
 251  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 252  * internally to handle all types rather than adding the suffix used here.
 253  *
 254  * \return SIMD 0.0
 255  */
 256 static inline SimdDouble gmx_simdcall
 257 setZeroD()
 258 {
 259     return SimdDouble(0.0);
 260 }
 261
 262 /*! \}
 263  *
 264  * \name SIMD implementation load/store operations for integers (corresponding to double)
 265  * \{
 266  */
 267
 268 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 269  *
 270  * You should typically just call \ref gmx::load(), which uses proxy objects
 271  * internally to handle all types rather than adding the suffix used here.
 272  *
 273  * \param m Pointer to memory, aligned to (double) integer SIMD width.
 274  * \return SIMD integer variable.
 275  */
 276 static inline SimdDInt32 gmx_simdcall
 277 simdLoadDI(const std::int32_t * m)
 278 {
 279     SimdDInt32 a;
 280
 281     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 282
 283     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 284     return a;
 285 };
 286
 287 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 288  *
 289  * \param m Memory aligned to (double) integer SIMD width.
 290  * \param a SIMD (double) integer variable to store.
 291  */
 292 static inline void gmx_simdcall
 293 store(std::int32_t * m, SimdDInt32 a)
 294 {
 295     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
 296
 297     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 298 };
 299
 300 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
 301  *
 302  * You should typically just call \ref gmx::loadU(), which uses proxy objects
 303  * internally to handle all types rather than adding the suffix used here.
 304  *
 305  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 306  *
 307  * \param m Pointer to memory, no alignment requirements.
 308  * \return SIMD integer variable.
 309  */
 310 static inline SimdDInt32 gmx_simdcall
 311 simdLoadUDI(const std::int32_t *m)
 312 {
 313     SimdDInt32 a;
 314     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 315     return a;
 316 }
 317
 318 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
 319  *
 320  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 321  *
 322  * \param m Memory pointer, no alignment requirements.
 323  * \param a SIMD (double) integer variable to store.
 324  */
 325 static inline void gmx_simdcall
 326 storeU(std::int32_t * m, SimdDInt32 a)
 327 {
 328     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 329 }
 330
 331 /*! \brief Set all SIMD (double) integer variable elements to 0.
 332  *
 333  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 334  * internally to handle all types rather than adding the suffix used here.
 335  *
 336  * \return SIMD 0
 337  */
 338 static inline SimdDInt32 gmx_simdcall
 339 setZeroDI()
 340 {
 341     return SimdDInt32(0);
 342 }
 343
 344 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
 345  *
 346  * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
 347  *
 348  * \tparam index Compile-time constant, position to extract (first position is 0)
 349  * \param  a     SIMD variable from which to extract value.
 350  * \return Single integer from position index in SIMD variable.
 351  */
 352 template<int index>
 353 static inline std::int32_t gmx_simdcall
 354 extract(SimdDInt32 a)
 355 {
 356     return a.simdInternal_[index];
 357 }
 358
 359 /*! \}
 360  *
 361  * \name SIMD implementation double precision floating-point bitwise logical operations
 362  * \{
 363  */
 364
 365 /*! \brief Bitwise and for two SIMD double variables.
 366  *
 367  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 368  *
 369  * \param a data1
 370  * \param b data2
 371  * \return data1 & data2
 372  */
 373 static inline SimdDouble gmx_simdcall
 374 operator&(SimdDouble a, SimdDouble b)
 375 {
 376     SimdDouble         res;
 377
 378     union
 379     {
 380         double        r;
 381         std::int64_t  i;
 382     }
 383     conv1, conv2;
 384
 385     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 386     {
 387         conv1.r              = a.simdInternal_[i];
 388         conv2.r              = b.simdInternal_[i];
 389         conv1.i              = conv1.i & conv2.i;
 390         res.simdInternal_[i] = conv1.r;
 391     }
 392     return res;
 393 }
 394
 395 /*! \brief Bitwise andnot for SIMD double.
 396  *
 397  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 398  *
 399  * \param a data1
 400  * \param b data2
 401  * \return (~data1) & data2
 402  */
 403 static inline SimdDouble gmx_simdcall
 404 andNot(SimdDouble a, SimdDouble b)
 405 {
 406     SimdDouble         res;
 407
 408     union
 409     {
 410         double        r;
 411         std::int64_t  i;
 412     }
 413     conv1, conv2;
 414
 415     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 416     {
 417         conv1.r              = a.simdInternal_[i];
 418         conv2.r              = b.simdInternal_[i];
 419         conv1.i              = ~conv1.i & conv2.i;
 420         res.simdInternal_[i] = conv1.r;
 421     }
 422     return res;
 423 }
 424
 425 /*! \brief Bitwise or for SIMD double.
 426  *
 427  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 428  *
 429  * \param a data1
 430  * \param b data2
 431  * \return data1 | data2
 432  */
 433 static inline SimdDouble gmx_simdcall
 434 operator|(SimdDouble a, SimdDouble b)
 435 {
 436     SimdDouble         res;
 437
 438     union
 439     {
 440         double        r;
 441         std::int64_t  i;
 442     }
 443     conv1, conv2;
 444
 445     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 446     {
 447         conv1.r              = a.simdInternal_[i];
 448         conv2.r              = b.simdInternal_[i];
 449         conv1.i              = conv1.i | conv2.i;
 450         res.simdInternal_[i] = conv1.r;
 451     }
 452     return res;
 453 }
 454
 455 /*! \brief Bitwise xor for SIMD double.
 456  *
 457  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 458  *
 459  * \param a data1
 460  * \param b data2
 461  * \return data1 ^ data2
 462  */
 463 static inline SimdDouble gmx_simdcall
 464 operator^(SimdDouble a, SimdDouble b)
 465 {
 466     SimdDouble         res;
 467
 468     union
 469     {
 470         double        r;
 471         std::int64_t  i;
 472     }
 473     conv1, conv2;
 474
 475     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 476     {
 477         conv1.r              = a.simdInternal_[i];
 478         conv2.r              = b.simdInternal_[i];
 479         conv1.i              = conv1.i ^ conv2.i;
 480         res.simdInternal_[i] = conv1.r;
 481     }
 482     return res;
 483 }
 484
 485 /*! \}
 486  *
 487  * \name SIMD implementation double precision floating-point arithmetics
 488  * \{
 489  */
 490
 491 /*! \brief Add two double SIMD variables.
 492  *
 493  * \param a term1
 494  * \param b term2
 495  * \return a+b
 496  */
 497 static inline SimdDouble gmx_simdcall
 498 operator+(SimdDouble a, SimdDouble b)
 499 {
 500     SimdDouble         res;
 501
 502     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 503     {
 504         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 505     }
 506     return res;
 507 }
 508
 509 /*! \brief Subtract two double SIMD variables.
 510  *
 511  * \param a term1
 512  * \param b term2
 513  * \return a-b
 514  */
 515 static inline SimdDouble gmx_simdcall
 516 operator-(SimdDouble a, SimdDouble b)
 517 {
 518     SimdDouble         res;
 519
 520     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 521     {
 522         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 523     }
 524     return res;
 525 }
 526
 527 /*! \brief SIMD double precision negate.
 528  *
 529  * \param a SIMD double precision value
 530  * \return -a
 531  */
 532 static inline SimdDouble gmx_simdcall
 533 operator-(SimdDouble a)
 534 {
 535     SimdDouble         res;
 536
 537     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 538     {
 539         res.simdInternal_[i] = -a.simdInternal_[i];
 540     }
 541     return res;
 542 }
 543
 544 /*! \brief Multiply two double SIMD variables.
 545  *
 546  * \param a factor1
 547  * \param b factor2
 548  * \return a*b.
 549  */
 550 static inline SimdDouble gmx_simdcall
 551 operator*(SimdDouble a, SimdDouble b)
 552 {
 553     SimdDouble         res;
 554
 555     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 556     {
 557         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 558     }
 559     return res;
 560 }
 561
 562 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
 563  *
 564  * \param a factor1
 565  * \param b factor2
 566  * \param c term
 567  * \return a*b+c
 568  */
 569 static inline SimdDouble gmx_simdcall
 570 fma(SimdDouble a, SimdDouble b, SimdDouble c)
 571 {
 572     return a*b+c;
 573 }
 574
 575 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
 576  *
 577  * \param a factor1
 578  * \param b factor2
 579  * \param c term
 580  * \return a*b-c
 581  */
 582 static inline SimdDouble gmx_simdcall
 583 fms(SimdDouble a, SimdDouble b, SimdDouble c)
 584 {
 585     return a*b-c;
 586 }
 587
 588 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
 589  *
 590  * \param a factor1
 591  * \param b factor2
 592  * \param c term
 593  * \return -a*b+c
 594  */
 595 static inline SimdDouble gmx_simdcall
 596 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 597 {
 598     return c-a*b;
 599 }
 600
 601 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
 602  *
 603  * \param a factor1
 604  * \param b factor2
 605  * \param c term
 606  * \return -a*b-c
 607  */
 608 static inline SimdDouble gmx_simdcall
 609 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 610 {
 611     return -a*b-c;
 612 }
 613
 614 /*! \brief double SIMD 1.0/sqrt(x) lookup.
 615  *
 616  * This is a low-level instruction that should only be called from routines
 617  * implementing the inverse square root in simd_math.h.
 618  *
 619  * \param x Argument, x>0
 620  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 621  */
 622 static inline SimdDouble gmx_simdcall
 623 rsqrt(SimdDouble x)
 624 {
 625     SimdDouble         res;
 626
 627     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 628     {
 629         // sic - we only use single precision for the lookup
 630         res.simdInternal_[i] = 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 631     }
 632     return res;
 633 };
 634
 635 /*! \brief SIMD double 1.0/x lookup.
 636  *
 637  * This is a low-level instruction that should only be called from routines
 638  * implementing the reciprocal in simd_math.h.
 639  *
 640  * \param x Argument, x!=0
 641  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 642  */
 643 static inline SimdDouble gmx_simdcall
 644 rcp(SimdDouble x)
 645 {
 646     SimdDouble         res;
 647
 648     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 649     {
 650         // sic - we only use single precision for the lookup
 651         res.simdInternal_[i] = 1.0f / static_cast<float>(x.simdInternal_[i]);
 652     }
 653     return res;
 654 };
 655
 656 /*! \brief Add two double SIMD variables, masked version.
 657  *
 658  * \param a term1
 659  * \param b term2
 660  * \param m mask
 661  * \return a+b where mask is true, 0.0 otherwise.
 662  */
 663 static inline SimdDouble gmx_simdcall
 664 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 665 {
 666     SimdDouble         res;
 667
 668     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 669     {
 670         res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
 671     }
 672     return res;
 673 }
 674
 675 /*! \brief Multiply two double SIMD variables, masked version.
 676  *
 677  * \param a factor1
 678  * \param b factor2
 679  * \param m mask
 680  * \return a*b where mask is true, 0.0 otherwise.
 681  */
 682 static inline SimdDouble gmx_simdcall
 683 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 684 {
 685     SimdDouble         res;
 686
 687     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 688     {
 689         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
 690     }
 691     return res;
 692 }
 693
 694 /*! \brief SIMD double fused multiply-add, masked version.
 695  *
 696  * \param a factor1
 697  * \param b factor2
 698  * \param c term
 699  * \param m mask
 700  * \return a*b+c where mask is true, 0.0 otherwise.
 701  */
 702 static inline SimdDouble gmx_simdcall
 703 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 704 {
 705     SimdDouble         res;
 706
 707     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 708     {
 709         res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
 710     }
 711     return res;
 712 }
 713
 714 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
 715  *
 716  * This is a low-level instruction that should only be called from routines
 717  * implementing the inverse square root in simd_math.h.
 718  *
 719  * \param x Argument, x>0 for entries where mask is true.
 720  * \param m Mask
 721  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 722  *         The result for masked-out entries will be 0.0.
 723  */
 724 static inline SimdDouble gmx_simdcall
 725 maskzRsqrt(SimdDouble x, SimdDBool m)
 726 {
 727     SimdDouble         res;
 728
 729     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 730     {
 731         // sic - we only use single precision for the lookup
 732         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i])) : 0.0;
 733     }
 734     return res;
 735 }
 736
 737 /*! \brief SIMD double 1.0/x lookup, masked version.
 738  *
 739  * This is a low-level instruction that should only be called from routines
 740  * implementing the reciprocal in simd_math.h.
 741  *
 742  * \param x Argument, x>0 for entries where mask is true.
 743  * \param m Mask
 744  * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
 745  *         The result for masked-out entries will be 0.0.
 746  */
 747 static inline SimdDouble gmx_simdcall
 748 maskzRcp(SimdDouble x, SimdDBool m)
 749 {
 750     SimdDouble         res;
 751
 752     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 753     {
 754         res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / static_cast<float>(x.simdInternal_[i]) : 0.0;
 755     }
 756     return res;
 757 }
 758
 759 /*! \brief SIMD double floating-point fabs().
 760  *
 761  * \param a any floating point values
 762  * \return fabs(a) for each element.
 763  */
 764 static inline SimdDouble gmx_simdcall
 765 abs(SimdDouble a)
 766 {
 767     SimdDouble         res;
 768
 769     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 770     {
 771         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 772     }
 773     return res;
 774 }
 775
 776 /*! \brief Set each SIMD double element to the largest from two variables.
 777  *
 778  * \param a Any floating-point value
 779  * \param b Any floating-point value
 780  * \return max(a,b) for each element.
 781  */
 782 static inline SimdDouble gmx_simdcall
 783 max(SimdDouble a, SimdDouble b)
 784 {
 785     SimdDouble         res;
 786
 787     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 788     {
 789         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 790     }
 791     return res;
 792 }
 793
 794 /*! \brief Set each SIMD double element to the smallest from two variables.
 795  *
 796  * \param a Any floating-point value
 797  * \param b Any floating-point value
 798  * \return min(a,b) for each element.
 799  */
 800 static inline SimdDouble gmx_simdcall
 801 min(SimdDouble a, SimdDouble b)
 802 {
 803     SimdDouble         res;
 804
 805     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 806     {
 807         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 808     }
 809     return res;
 810 }
 811
 812 /*! \brief SIMD double round to nearest integer value (in floating-point format).
 813  *
 814  * \param a Any floating-point value
 815  * \return The nearest integer, represented in floating-point format.
 816  */
 817 static inline SimdDouble gmx_simdcall
 818 round(SimdDouble a)
 819 {
 820     SimdDouble         res;
 821
 822     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 823     {
 824         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 825     }
 826     return res;
 827 }
 828
 829 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
 830  *
 831  * \param a Any floating-point value
 832  * \return Integer rounded towards zero, represented in floating-point format.
 833  *
 834  * \note This is truncation towards zero, not floor(). The reason for this
 835  * is that truncation is virtually always present as a dedicated hardware
 836  * instruction, but floor() frequently isn't.
 837  */
 838 static inline SimdDouble gmx_simdcall
 839 trunc(SimdDouble a)
 840 {
 841     SimdDouble         res;
 842
 843     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 844     {
 845         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 846     }
 847     return res;
 848 }
 849
 850 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
 851  *
 852  * \param       value     Floating-point value to extract from
 853  * \param[out]  exponent  Returned exponent of value, integer SIMD format.
 854  * \return      Fraction of value, floating-point SIMD format.
 855  */
 856 static inline SimdDouble gmx_simdcall
 857 frexp(SimdDouble value, SimdDInt32 * exponent)
 858 {
 859     SimdDouble fraction;
 860
 861     for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
 862     {
 863         fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
 864     }
 865     return fraction;
 866 }
 867
 868 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
 869  *
 870  * \param value Floating-point number to multiply with new exponent
 871  * \param exponent Integer that will not overflow as 2^exponent.
 872  * \return value*2^exponent
 873  */
 874 static inline SimdDouble gmx_simdcall
 875 ldexp(SimdDouble value, SimdDInt32 exponent)
 876 {
 877     SimdDouble           res;
 878
 879     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 880     {
 881         res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
 882     }
 883     return res;
 884 }
 885
 886 /*! \brief Return sum of all elements in SIMD double variable.
 887  *
 888  * \param a SIMD variable to reduce/sum.
 889  * \return The sum of all elements in the argument variable.
 890  *
 891  */
 892 static inline double gmx_simdcall
 893 reduce(SimdDouble a)
 894 {
 895     double sum = 0.0;
 896
 897     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 898     {
 899         sum += a.simdInternal_[i];
 900     }
 901     return sum;
 902 }
 903
 904 /*! \}
 905  *
 906  * \name SIMD implementation double precision floating-point comparison, boolean, selection.
 907  * \{
 908  */
 909
 910 /*! \brief SIMD a==b for double SIMD.
 911  *
 912  * \param a value1
 913  * \param b value2
 914  * \return Each element of the boolean will be set to true if a==b.
 915  *
 916  * Beware that exact floating-point comparisons are difficult.
 917  */
 918 static inline SimdDBool gmx_simdcall
 919 operator==(SimdDouble a, SimdDouble b)
 920 {
 921     SimdDBool         res;
 922
 923     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 924     {
 925         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 926     }
 927     return res;
 928 }
 929
 930 /*! \brief SIMD a!=b for double SIMD.
 931  *
 932  * \param a value1
 933  * \param b value2
 934  * \return Each element of the boolean will be set to true if a!=b.
 935  *
 936  * Beware that exact floating-point comparisons are difficult.
 937  */
 938 static inline SimdDBool gmx_simdcall
 939 operator!=(SimdDouble a, SimdDouble b)
 940 {
 941     SimdDBool         res;
 942
 943     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 944     {
 945         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 946     }
 947     return res;
 948 }
 949
 950 /*! \brief SIMD a<b for double SIMD.
 951  *
 952  * \param a value1
 953  * \param b value2
 954  * \return Each element of the boolean will be set to true if a<b.
 955  */
 956 static inline SimdDBool gmx_simdcall
 957 operator<(SimdDouble a, SimdDouble b)
 958 {
 959     SimdDBool          res;
 960
 961     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 962     {
 963         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 964     }
 965     return res;
 966 }
 967
 968 /*! \brief SIMD a<=b for double SIMD.
 969  *
 970  * \param a value1
 971  * \param b value2
 972  * \return Each element of the boolean will be set to true if a<=b.
 973  */
 974 static inline SimdDBool gmx_simdcall
 975 operator<=(SimdDouble a, SimdDouble b)
 976 {
 977     SimdDBool          res;
 978
 979     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 980     {
 981         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 982     }
 983     return res;
 984 }
 985
 986 /*! \brief Return true if any bits are set in the single precision SIMD.
 987  *
 988  * This function is used to handle bitmasks, mainly for exclusions in the
 989  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
 990  * so it is not identical to not-equal.
 991  *
 992  * \param a value
 993  * \return Each element of the boolean will be true if any bit in a is nonzero.
 994  */
 995 static inline SimdDBool gmx_simdcall
 996 testBits(SimdDouble a)
 997 {
 998     SimdDBool         res;
 999
1000     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1001     {
1002         union
1003         {
1004             std::uint64_t i;
1005             double        d;
1006         } conv;
1007
1008         conv.d               = a.simdInternal_[i];
1009         res.simdInternal_[i] = (conv.i != 0);
1010     }
1011     return res;
1012 }
1013
1014 /*! \brief Logical \a and on double precision SIMD booleans.
1015  *
1016  * \param a logical vars 1
1017  * \param b logical vars 2
1018  * \return For each element, the result boolean is true if a \& b are true.
1019  *
1020  * \note This is not necessarily a bitwise operation - the storage format
1021  * of booleans is implementation-dependent.
1022  */
1023 static inline SimdDBool gmx_simdcall
1024 operator&&(SimdDBool a, SimdDBool b)
1025 {
1026     SimdDBool         res;
1027
1028     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1029     {
1030         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1031     }
1032     return res;
1033 }
1034
1035 /*! \brief Logical \a or on double precision SIMD booleans.
1036  *
1037  * \param a logical vars 1
1038  * \param b logical vars 2
1039  * \return For each element, the result boolean is true if a or b is true.
1040  *
1041  * Note that this is not necessarily a bitwise operation - the storage format
1042  * of booleans is implementation-dependent.
1043  *
1044  \ */
1045 static inline SimdDBool gmx_simdcall
1046 operator||(SimdDBool a, SimdDBool b)
1047 {
1048     SimdDBool         res;
1049
1050     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1051     {
1052         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1053     }
1054     return res;
1055 }
1056
1057 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1058  *
1059  * \param a Logical variable.
1060  * \return true if any element in a is true, otherwise false.
1061  *
1062  * The actual return value for truth will depend on the architecture,
1063  * so any non-zero value is considered truth.
1064  */
1065 static inline bool gmx_simdcall
1066 anyTrue(SimdDBool a)
1067 {
1068     bool res = false;
1069
1070     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1071     {
1072         res = res || a.simdInternal_[i];
1073     }
1074     return res;
1075 }
1076
1077 /*! \brief Select from double precision SIMD variable where boolean is true.
1078  *
1079  * \param a Floating-point variable to select from
1080  * \param mask Boolean selector
1081  * \return  For each element, a is selected for true, 0 for false.
1082  */
1083 static inline SimdDouble gmx_simdcall
1084 selectByMask(SimdDouble a, SimdDBool mask)
1085 {
1086     SimdDouble          res;
1087
1088     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1089     {
1090         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1091     }
1092     return res;
1093 }
1094
1095 /*! \brief Select from double precision SIMD variable where boolean is false.
1096  *
1097  * \param a Floating-point variable to select from
1098  * \param mask Boolean selector
1099  * \return  For each element, a is selected for false, 0 for true (sic).
1100  */
1101 static inline SimdDouble gmx_simdcall
1102 selectByNotMask(SimdDouble a, SimdDBool mask)
1103 {
1104     SimdDouble          res;
1105
1106     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1107     {
1108         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1109     }
1110     return res;
1111 }
1112
1113 /*! \brief Vector-blend SIMD double selection.
1114  *
1115  * \param a First source
1116  * \param b Second source
1117  * \param sel Boolean selector
1118  * \return For each element, select b if sel is true, a otherwise.
1119  */
1120 static inline SimdDouble gmx_simdcall
1121 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1122 {
1123     SimdDouble         res;
1124
1125     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1126     {
1127         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1128     }
1129     return res;
1130 }
1131
1132 /*! \}
1133  *
1134  * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1135  * \{
1136  */
1137
1138 /*! \brief SIMD integer shift left logical, based on immediate value.
1139  *
1140  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1141  *
1142  *  Logical shift. Each element is shifted (independently) up to 32 positions
1143  *  left, while zeros are shifted in from the right.
1144  *
1145  * \param a integer data to shift
1146  * \param n number of positions to shift left. n<=32.
1147  * \return shifted values
1148  */
1149 static inline SimdDInt32 gmx_simdcall
1150 operator<<(SimdDInt32 a, int n)
1151 {
1152     SimdDInt32         res;
1153
1154     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1155     {
1156         res.simdInternal_[i] = a.simdInternal_[i] << n;
1157     }
1158     return res;
1159 }
1160
1161 /*! \brief SIMD integer shift right logical, based on immediate value.
1162  *
1163  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1164  *
1165  *  Logical shift. Each element is shifted (independently) up to 32 positions
1166  *  right, while zeros are shifted in from the left.
1167  *
1168  * \param a integer data to shift
1169  * \param n number of positions to shift right. n<=32.
1170  * \return shifted values
1171  */
1172 static inline SimdDInt32 gmx_simdcall
1173 operator>>(SimdDInt32 a, int n)
1174 {
1175     SimdDInt32         res;
1176
1177     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1178     {
1179         res.simdInternal_[i] = a.simdInternal_[i] >> n;
1180     }
1181     return res;
1182 }
1183
1184 /*! \brief Integer SIMD bitwise and.
1185  *
1186  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1187  *
1188  * \note You can \a not use this operation directly to select based on a boolean
1189  * SIMD variable, since booleans are separate from integer SIMD. If that
1190  * is what you need, have a look at \ref gmx::selectByMask instead.
1191  *
1192  * \param a first integer SIMD
1193  * \param b second integer SIMD
1194  * \return a \& b (bitwise and)
1195  */
1196 static inline SimdDInt32 gmx_simdcall
1197 operator&(SimdDInt32 a, SimdDInt32 b)
1198 {
1199     SimdDInt32         res;
1200
1201     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1202     {
1203         res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1204     }
1205     return res;
1206 }
1207
1208 /*! \brief Integer SIMD bitwise not/complement.
1209  *
1210  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1211  *
1212  * \note You can \a not use this operation directly to select based on a boolean
1213  * SIMD variable, since booleans are separate from integer SIMD. If that
1214  * is what you need, have a look at \ref gmx::selectByMask instead.
1215  *
1216  * \param a integer SIMD
1217  * \param b integer SIMD
1218  * \return (~a) & b
1219  */
1220 static inline SimdDInt32 gmx_simdcall
1221 andNot(SimdDInt32 a, SimdDInt32 b)
1222 {
1223     SimdDInt32         res;
1224
1225     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1226     {
1227         res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1228     }
1229     return res;
1230 }
1231
1232 /*! \brief Integer SIMD bitwise or.
1233  *
1234  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1235  *
1236  * \param a first integer SIMD
1237  * \param b second integer SIMD
1238  * \return a \| b (bitwise or)
1239  */
1240 static inline SimdDInt32 gmx_simdcall
1241 operator|(SimdDInt32 a, SimdDInt32 b)
1242 {
1243     SimdDInt32         res;
1244
1245     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1246     {
1247         res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1248     }
1249     return res;
1250 }
1251
1252 /*! \brief Integer SIMD bitwise xor.
1253  *
1254  * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1255  *
1256  * \param a first integer SIMD
1257  * \param b second integer SIMD
1258  * \return a ^ b (bitwise xor)
1259  */
1260 static inline SimdDInt32 gmx_simdcall
1261 operator^(SimdDInt32 a, SimdDInt32 b)
1262 {
1263     SimdDInt32         res;
1264
1265     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1266     {
1267         res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1268     }
1269     return res;
1270 }
1271
1272 /*! \}
1273  *
1274  * \name SIMD implementation integer (corresponding to double) arithmetics
1275  * \{
1276  */
1277
1278 /*! \brief Add SIMD integers.
1279  *
1280  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1281  *
1282  * \param a term1
1283  * \param b term2
1284  * \return a+b
1285  */
1286 static inline SimdDInt32 gmx_simdcall
1287 operator+(SimdDInt32 a, SimdDInt32 b)
1288 {
1289     SimdDInt32         res;
1290
1291     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1292     {
1293         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1294     }
1295     return res;
1296 }
1297
1298 /*! \brief Subtract SIMD integers.
1299  *
1300  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1301  *
1302  * \param a term1
1303  * \param b term2
1304  * \return a-b
1305  */
1306 static inline SimdDInt32 gmx_simdcall
1307 operator-(SimdDInt32 a, SimdDInt32 b)
1308 {
1309     SimdDInt32         res;
1310
1311     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1312     {
1313         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1314     }
1315     return res;
1316 }
1317
1318 /*! \brief Multiply SIMD integers.
1319  *
1320  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1321  *
1322  * \param a factor1
1323  * \param b factor2
1324  * \return a*b.
1325  *
1326  * \note Only the low 32 bits are retained, so this can overflow.
1327  */
1328 static inline SimdDInt32 gmx_simdcall
1329 operator*(SimdDInt32 a, SimdDInt32 b)
1330 {
1331     SimdDInt32         res;
1332
1333     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1334     {
1335         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1336     }
1337     return res;
1338 }
1339
1340 /*! \}
1341  *
1342  * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1343  * \{
1344  */
1345
1346 /*! \brief Equality comparison of two integers corresponding to double values.
1347  *
1348  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1349  *
1350  * \param a SIMD integer1
1351  * \param b SIMD integer2
1352  * \return SIMD integer boolean with true for elements where a==b
1353  */
1354 static inline SimdDIBool gmx_simdcall
1355 operator==(SimdDInt32 a, SimdDInt32 b)
1356 {
1357     SimdDIBool         res;
1358
1359     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1360     {
1361         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1362     }
1363     return res;
1364 }
1365
1366 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1367  *
1368  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1369  *
1370  * \param a SIMD integer1
1371  * \param b SIMD integer2
1372  * \return SIMD integer boolean with true for elements where a<b
1373  */
1374 static inline SimdDIBool gmx_simdcall
1375 operator<(SimdDInt32 a, SimdDInt32 b)
1376 {
1377     SimdDIBool         res;
1378
1379     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1380     {
1381         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1382     }
1383     return res;
1384 }
1385
1386 /*! \brief Check if any bit is set in each element
1387  *
1388  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1389  *
1390  * \param a SIMD integer
1391  * \return SIMD integer boolean with true for elements where any bit is set
1392  */
1393 static inline SimdDIBool gmx_simdcall
1394 testBits(SimdDInt32 a)
1395 {
1396     SimdDIBool         res;
1397
1398     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1399     {
1400         res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1401     }
1402     return res;
1403 }
1404
1405 /*! \brief Logical AND on SimdDIBool.
1406  *
1407  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1408  *
1409  * \param a SIMD boolean 1
1410  * \param b SIMD boolean 2
1411  * \return True for elements where both a and b are true.
1412  */
1413 static inline SimdDIBool gmx_simdcall
1414 operator&&(SimdDIBool a, SimdDIBool b)
1415 {
1416     SimdDIBool        res;
1417
1418     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1419     {
1420         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1421     }
1422     return res;
1423 }
1424
1425 /*! \brief Logical OR on SimdDIBool.
1426  *
1427  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1428  *
1429  * \param a SIMD boolean 1
1430  * \param b SIMD boolean 2
1431  * \return True for elements where both a and b are true.
1432  */
1433 static inline SimdDIBool gmx_simdcall
1434 operator||(SimdDIBool a, SimdDIBool b)
1435 {
1436     SimdDIBool         res;
1437
1438     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1439     {
1440         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1441     }
1442     return res;
1443 }
1444
1445 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1446  *
1447  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1448  *
1449  * The actual return value for "any true" will depend on the architecture.
1450  * Any non-zero value should be considered truth.
1451  *
1452  * \param a SIMD boolean
1453  * \return True if any of the elements in a is true, otherwise 0.
1454  */
1455 static inline bool gmx_simdcall
1456 anyTrue(SimdDIBool a)
1457 {
1458     bool res = false;
1459
1460     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1461     {
1462         res = res || a.simdInternal_[i];
1463     }
1464     return res;
1465 }
1466
1467 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1468  *
1469  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1470  *
1471  * \param a SIMD integer to select from
1472  * \param mask Boolean selector
1473  * \return Elements from a where sel is true, 0 otherwise.
1474  */
1475 static inline SimdDInt32 gmx_simdcall
1476 selectByMask(SimdDInt32 a, SimdDIBool mask)
1477 {
1478     SimdDInt32         res;
1479
1480     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1481     {
1482         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1483     }
1484     return res;
1485 }
1486
1487 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1488  *
1489  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1490  *
1491  * \param a SIMD integer to select from
1492  * \param mask Boolean selector
1493  * \return Elements from a where sel is false, 0 otherwise (sic).
1494  */
1495 static inline SimdDInt32 gmx_simdcall
1496 selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1497 {
1498     SimdDInt32         res;
1499
1500     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1501     {
1502         res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1503     }
1504     return res;
1505 }
1506
1507 /*! \brief Vector-blend SIMD integer selection.
1508  *
1509  * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1510  *
1511  * \param a First source
1512  * \param b Second source
1513  * \param sel Boolean selector
1514  * \return For each element, select b if sel is true, a otherwise.
1515  */
1516 static inline SimdDInt32 gmx_simdcall
1517 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1518 {
1519     SimdDInt32        res;
1520
1521     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1522     {
1523         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1524     }
1525     return res;
1526 }
1527
1528 /*! \}
1529  *
1530  * \name SIMD implementation conversion operations
1531  * \{
1532  */
1533
1534 /*! \brief Round double precision floating point to integer.
1535  *
1536  * \param a SIMD floating-point
1537  * \return SIMD integer, rounded to nearest integer.
1538  */
1539 static inline SimdDInt32 gmx_simdcall
1540 cvtR2I(SimdDouble a)
1541 {
1542     SimdDInt32         b;
1543
1544     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1545     {
1546         b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1547     }
1548     return b;
1549 };
1550
1551 /*! \brief Truncate double precision floating point to integer.
1552  *
1553  * \param a SIMD floating-point
1554  * \return SIMD integer, truncated to nearest integer.
1555  */
1556 static inline SimdDInt32 gmx_simdcall
1557 cvttR2I(SimdDouble a)
1558 {
1559     SimdDInt32         b;
1560
1561     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1562     {
1563         b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1564     }
1565     return b;
1566 };
1567
1568 /*! \brief Convert integer to double precision floating point.
1569  *
1570  * \param a SIMD integer
1571  * \return SIMD floating-point
1572  */
1573 static inline SimdDouble gmx_simdcall
1574 cvtI2R(SimdDInt32 a)
1575 {
1576     SimdDouble         b;
1577
1578     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1579     {
1580         b.simdInternal_[i] = a.simdInternal_[i];
1581     }
1582     return b;
1583 };
1584
1585 /*! \brief Convert from double precision boolean to corresponding integer boolean
1586  *
1587  * \param a SIMD floating-point boolean
1588  * \return SIMD integer boolean
1589  */
1590 static inline SimdDIBool gmx_simdcall
1591 cvtB2IB(SimdDBool a)
1592 {
1593     SimdDIBool         b;
1594
1595     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1596     {
1597         b.simdInternal_[i] = a.simdInternal_[i];
1598     }
1599     return b;
1600 };
1601
1602 /*! \brief Convert from integer boolean to corresponding double precision boolean
1603  *
1604  * \param a SIMD integer boolean
1605  * \return SIMD floating-point boolean
1606  */
1607 static inline SimdDBool gmx_simdcall
1608 cvtIB2B(SimdDIBool a)
1609 {
1610     SimdDBool         b;
1611
1612     for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1613     {
1614         b.simdInternal_[i] = a.simdInternal_[i];
1615     }
1616     return b;
1617 };
1618
1619 /*! \brief Convert SIMD float to double.
1620  *
1621  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1622  * \ref GMX_SIMD_DOUBLE_WIDTH.
1623  *
1624  * Float/double conversions are complex since the SIMD width could either
1625  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1626  * need to check for the width in the code, and have different code paths.
1627  *
1628  * \param f Single-precision SIMD variable
1629  * \return Double-precision SIMD variable of the same width
1630  */
1631 static inline SimdDouble gmx_simdcall
1632 cvtF2D(SimdFloat gmx_unused f)
1633 {
1634 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1635     SimdDouble        d;
1636     for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1637     {
1638         d.simdInternal_[i] = f.simdInternal_[i];
1639     }
1640     return d;
1641 #else
1642     gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1643 #endif
1644 }
1645
1646 /*! \brief Convert SIMD double to float.
1647  *
1648  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1649  * \ref GMX_SIMD_DOUBLE_WIDTH.
1650  *
1651  * Float/double conversions are complex since the SIMD width could either
1652  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1653  * need to check for the width in the code, and have different code paths.
1654  *
1655  * \param d Double-precision SIMD variable
1656  * \return Single-precision SIMD variable of the same width
1657  */
1658 static inline SimdFloat gmx_simdcall
1659 cvtD2F(SimdDouble gmx_unused d)
1660 {
1661 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1662     SimdFloat        f;
1663     for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1664     {
1665         f.simdInternal_[i] = d.simdInternal_[i];
1666     }
1667     return f;
1668 #else
1669     gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1670 #endif
1671 }
1672
1673 /*! \brief Convert SIMD float to double.
1674  *
1675  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1676  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1677  *
1678  * Float/double conversions are complex since the SIMD width could either
1679  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1680  * need to check for the width in the code, and have different code paths.
1681  *
1682  * \param f Single-precision SIMD variable
1683  * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1684  * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1685  */
1686 static inline void gmx_simdcall
1687 cvtF2DD(SimdFloat gmx_unused f, SimdDouble gmx_unused * d0, SimdDouble gmx_unused * d1)
1688 {
1689 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1690     for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1691     {
1692         d0->simdInternal_[i] = f.simdInternal_[i];
1693         d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size()/2 + i];
1694     }
1695 #else
1696     gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1697 #endif
1698 }
1699
1700 /*! \brief Convert SIMD double to float.
1701  *
1702  * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1703  * as \ref GMX_SIMD_DOUBLE_WIDTH.
1704  *
1705  * Float/double conversions are complex since the SIMD width could either
1706  * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1707  * need to check for the width in the code, and have different code paths.
1708  *
1709  * \param d0 Double-precision SIMD variable, first half of values to put in f.
1710  * \param d1 Double-precision SIMD variable, second half of values to put in f.
1711  * \return Single-precision SIMD variable with all values.
1712  */
1713 static inline SimdFloat gmx_simdcall
1714 cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1715 {
1716 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1717     SimdFloat        f;
1718     for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1719     {
1720         f.simdInternal_[i]                            = d0.simdInternal_[i];
1721         f.simdInternal_[f.simdInternal_.size()/2 + i] = d1.simdInternal_[i];
1722     }
1723     return f;
1724 #else
1725     gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1726 #endif
1727 }
1728
1729 /*! \} */
1730
1731 /*! \} */
1732 /*! \endcond */
1733
1734 }      // namespace gmx
1735
1736 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H