src/gromacs/simd/impl_reference/impl_reference_simd4_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD4 single precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "impl_reference_definitions.h"
  59
  60 namespace gmx
  61 {
  62
  63 /*! \cond libapi */
  64 /*! \addtogroup module_simd */
  65 /*! \{ */
  66
  67 /*! \name Constant width-4 double precision SIMD types and instructions
  68  * \{
  69  */
  70
  71 /*! \libinternal \brief SIMD4 double type.
  72  *
  73  * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
  74  *
  75  * \note This variable cannot be placed inside other structures or classes, since
  76  *       some compilers (including at least clang-3.7) appear to lose the
  77  *       alignment. This is likely particularly severe when allocating such
  78  *       memory on the heap, but it occurs for stack structures too.
  79  */
  80 class Simd4Double
  81 {
  82 public:
  83     Simd4Double() {}
  84
  85     //! \brief Construct from scalar
  86     Simd4Double(double d) { simdInternal_.fill(d); }
  87
  88     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  89      *
  90      * This has to be public to enable usage in combination with static inline
  91      * functions, but it should never, EVER, be accessed by any code outside
  92      * the corresponding implementation directory since the type will depend
  93      * on the architecture.
  94      */
  95     std::array<double, GMX_SIMD4_WIDTH> simdInternal_;
  96 };
  97
  98 /*! \libinternal  \brief SIMD4 variable type to use for logical comparisons on doubles.
  99  *
 100  * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
 101  *
 102  * \note This variable cannot be placed inside other structures or classes, since
 103  *       some compilers (including at least clang-3.7) appear to lose the
 104  *       alignment. This is likely particularly severe when allocating such
 105  *       memory on the heap, but it occurs for stack structures too.
 106  */
 107 class Simd4DBool
 108 {
 109 public:
 110     Simd4DBool() {}
 111
 112     //! \brief Construct from scalar
 113     Simd4DBool(bool b) { simdInternal_.fill(b); }
 114
 115     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 116      *
 117      * This has to be public to enable usage in combination with static inline
 118      * functions, but it should never, EVER, be accessed by any code outside
 119      * the corresponding implementation directory since the type will depend
 120      * on the architecture.
 121      */
 122     std::array<bool, GMX_SIMD4_WIDTH> simdInternal_;
 123 };
 124
 125 /*! \brief Load 4 double values from aligned memory into SIMD4 variable.
 126  *
 127  * \param m Pointer to memory aligned to 4 elements.
 128  * \return SIMD4 variable with data loaded.
 129  */
 130 static inline Simd4Double gmx_simdcall load4(const double* m)
 131 {
 132     Simd4Double a;
 133
 134     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 135
 136     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 137     return a;
 138 }
 139
 140 /*! \brief Store the contents of SIMD4 double to aligned memory m.
 141  *
 142  * \param[out] m Pointer to memory, aligned to 4 elements.
 143  * \param a SIMD4 variable to store
 144  */
 145 static inline void gmx_simdcall store4(double* m, Simd4Double a)
 146 {
 147     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 148
 149     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 150 }
 151
 152 /*! \brief Load SIMD4 double from unaligned memory.
 153  *
 154  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 155  *
 156  * \param m Pointer to memory, no alignment requirement.
 157  * \return SIMD4 variable with data loaded.
 158  */
 159 static inline Simd4Double gmx_simdcall load4U(const double* m)
 160 {
 161     Simd4Double a;
 162     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 163     return a;
 164 }
 165
 166 /*! \brief Store SIMD4 double to unaligned memory.
 167  *
 168  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 169  *
 170  * \param[out] m Pointer to memory, no alignment requirement.
 171  * \param a SIMD4 variable to store.
 172  */
 173 static inline void gmx_simdcall store4U(double* m, Simd4Double a)
 174 {
 175     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 176 }
 177
 178 /*! \brief Set all SIMD4 double elements to 0.
 179  *
 180  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 181  * internally to handle all types rather than adding the suffix used here.
 182  *
 183  * \return SIMD4 0.0
 184  */
 185 static inline Simd4Double gmx_simdcall simd4SetZeroD()
 186 {
 187     return Simd4Double(0.0);
 188 }
 189
 190
 191 /*! \brief Bitwise and for two SIMD4 double variables.
 192  *
 193  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 194  *
 195  * \param a data1
 196  * \param b data2
 197  * \return data1 & data2
 198  */
 199 static inline Simd4Double gmx_simdcall operator&(Simd4Double a, Simd4Double b)
 200 {
 201     Simd4Double res;
 202
 203     union {
 204         double       r;
 205         std::int64_t i;
 206     } conv1, conv2;
 207
 208     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 209     {
 210         conv1.r              = a.simdInternal_[i];
 211         conv2.r              = b.simdInternal_[i];
 212         conv1.i              = conv1.i & conv2.i;
 213         res.simdInternal_[i] = conv1.r;
 214     }
 215     return res;
 216 }
 217
 218
 219 /*! \brief Bitwise andnot for two SIMD4 double variables. c=(~a) & b.
 220  *
 221  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 222  *
 223  * \param a data1
 224  * \param b data2
 225  * \return (~data1) & data2
 226  */
 227 static inline Simd4Double gmx_simdcall andNot(Simd4Double a, Simd4Double b)
 228 {
 229     Simd4Double res;
 230
 231     union {
 232         double       r;
 233         std::int64_t i;
 234     } conv1, conv2;
 235
 236     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 237     {
 238         conv1.r              = a.simdInternal_[i];
 239         conv2.r              = b.simdInternal_[i];
 240         conv1.i              = ~conv1.i & conv2.i;
 241         res.simdInternal_[i] = conv1.r;
 242     }
 243     return res;
 244 }
 245
 246
 247 /*! \brief Bitwise or for two SIMD4 doubles.
 248  *
 249  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 250  *
 251  * \param a data1
 252  * \param b data2
 253  * \return data1 | data2
 254  */
 255 static inline Simd4Double gmx_simdcall operator|(Simd4Double a, Simd4Double b)
 256 {
 257     Simd4Double res;
 258
 259     union {
 260         double       r;
 261         std::int64_t i;
 262     } conv1, conv2;
 263
 264     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 265     {
 266         conv1.r              = a.simdInternal_[i];
 267         conv2.r              = b.simdInternal_[i];
 268         conv1.i              = conv1.i | conv2.i;
 269         res.simdInternal_[i] = conv1.r;
 270     }
 271     return res;
 272 }
 273
 274 /*! \brief Bitwise xor for two SIMD4 double variables.
 275  *
 276  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 277  *
 278  * \param a data1
 279  * \param b data2
 280  * \return data1 ^ data2
 281  */
 282 static inline Simd4Double gmx_simdcall operator^(Simd4Double a, Simd4Double b)
 283 {
 284     Simd4Double res;
 285
 286     union {
 287         double       r;
 288         std::int64_t i;
 289     } conv1, conv2;
 290
 291     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 292     {
 293         conv1.r              = a.simdInternal_[i];
 294         conv2.r              = b.simdInternal_[i];
 295         conv1.i              = conv1.i ^ conv2.i;
 296         res.simdInternal_[i] = conv1.r;
 297     }
 298     return res;
 299 }
 300
 301 /*! \brief Add two double SIMD4 variables.
 302  *
 303  * \param a term1
 304  * \param b term2
 305  * \return a+b
 306  */
 307 static inline Simd4Double gmx_simdcall operator+(Simd4Double a, Simd4Double b)
 308 {
 309     Simd4Double res;
 310
 311     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 312     {
 313         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 314     }
 315     return res;
 316 }
 317
 318 /*! \brief Subtract two SIMD4 variables.
 319  *
 320  * \param a term1
 321  * \param b term2
 322  * \return a-b
 323  */
 324 static inline Simd4Double gmx_simdcall operator-(Simd4Double a, Simd4Double b)
 325 {
 326     Simd4Double res;
 327
 328     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 329     {
 330         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 331     }
 332     return res;
 333 }
 334
 335 /*! \brief SIMD4 floating-point negate.
 336  *
 337  * \param a SIMD4 floating-point value
 338  * \return -a
 339  */
 340 static inline Simd4Double gmx_simdcall operator-(Simd4Double a)
 341 {
 342     Simd4Double res;
 343
 344     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 345     {
 346         res.simdInternal_[i] = -a.simdInternal_[i];
 347     }
 348     return res;
 349 }
 350
 351 /*! \brief Multiply two SIMD4 variables.
 352  *
 353  * \param a factor1
 354  * \param b factor2
 355  * \return a*b.
 356  */
 357 static inline Simd4Double gmx_simdcall operator*(Simd4Double a, Simd4Double b)
 358 {
 359     Simd4Double res;
 360
 361     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 362     {
 363         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 364     }
 365     return res;
 366 }
 367
 368 /*! \brief SIMD4 Fused-multiply-add. Result is a*b+c.
 369  *
 370  * \param a factor1
 371  * \param b factor2
 372  * \param c term
 373  * \return a*b+c
 374  */
 375 static inline Simd4Double gmx_simdcall fma(Simd4Double a, Simd4Double b, Simd4Double c)
 376 {
 377     return a * b + c;
 378 }
 379
 380 /*! \brief SIMD4 Fused-multiply-subtract. Result is a*b-c.
 381  *
 382  * \param a factor1
 383  * \param b factor2
 384  * \param c term
 385  * \return a*b-c
 386  */
 387 static inline Simd4Double gmx_simdcall fms(Simd4Double a, Simd4Double b, Simd4Double c)
 388 {
 389     return a * b - c;
 390 }
 391
 392 /*! \brief SIMD4 Fused-negated-multiply-add. Result is -a*b+c.
 393  *
 394  * \param a factor1
 395  * \param b factor2
 396  * \param c term
 397  * \return -a*b+c
 398  */
 399 static inline Simd4Double gmx_simdcall fnma(Simd4Double a, Simd4Double b, Simd4Double c)
 400 {
 401     return c - a * b;
 402 }
 403
 404 /*! \brief SIMD4 Fused-negated-multiply-subtract. Result is -a*b-c.
 405  *
 406  * \param a factor1
 407  * \param b factor2
 408  * \param c term
 409  * \return -a*b-c
 410  */
 411 static inline Simd4Double gmx_simdcall fnms(Simd4Double a, Simd4Double b, Simd4Double c)
 412 {
 413     return -a * b - c;
 414 }
 415
 416 /*! \brief SIMD4 1.0/sqrt(x) lookup.
 417  *
 418  * This is a low-level instruction that should only be called from routines
 419  * implementing the inverse square root in simd_math.h.
 420  *
 421  * \param x Argument, x>0
 422  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 423  */
 424 static inline Simd4Double gmx_simdcall rsqrt(Simd4Double x)
 425 {
 426     Simd4Double res;
 427
 428     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 429     {
 430         // sic - we only use single precision for the lookup
 431         res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 432     }
 433     return res;
 434 };
 435
 436
 437 /*! \brief SIMD4 Floating-point abs().
 438  *
 439  * \param a any floating point values
 440  * \return fabs(a) for each element.
 441  */
 442 static inline Simd4Double gmx_simdcall abs(Simd4Double a)
 443 {
 444     Simd4Double res;
 445
 446     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 447     {
 448         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 449     }
 450     return res;
 451 }
 452
 453 /*! \brief Set each SIMD4 element to the largest from two variables.
 454  *
 455  * \param a Any floating-point value
 456  * \param b Any floating-point value
 457  * \return max(a,b) for each element.
 458  */
 459 static inline Simd4Double gmx_simdcall max(Simd4Double a, Simd4Double b)
 460 {
 461     Simd4Double res;
 462
 463     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 464     {
 465         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 466     }
 467     return res;
 468 }
 469
 470
 471 /*! \brief Set each SIMD4 element to the largest from two variables.
 472  *
 473  * \param a Any floating-point value
 474  * \param b Any floating-point value
 475  * \return max(a,b) for each element.
 476  */
 477 static inline Simd4Double gmx_simdcall min(Simd4Double a, Simd4Double b)
 478 {
 479     Simd4Double res;
 480
 481     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 482     {
 483         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 484     }
 485     return res;
 486 }
 487
 488
 489 /*! \brief SIMD4 Round to nearest integer value (in floating-point format).
 490  *
 491  * \param a Any floating-point value
 492  * \return The nearest integer, represented in floating-point format.
 493  */
 494 static inline Simd4Double gmx_simdcall round(Simd4Double a)
 495 {
 496     Simd4Double res;
 497
 498     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 499     {
 500         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 501     }
 502     return res;
 503 }
 504
 505
 506 /*! \brief Truncate SIMD4, i.e. round towards zero - common hardware instruction.
 507  *
 508  * \param a Any floating-point value
 509  * \return Integer rounded towards zero, represented in floating-point format.
 510  *
 511  * \note This is truncation towards zero, not floor(). The reason for this
 512  * is that truncation is virtually always present as a dedicated hardware
 513  * instruction, but floor() frequently isn't.
 514  */
 515 static inline Simd4Double gmx_simdcall trunc(Simd4Double a)
 516 {
 517     Simd4Double res;
 518
 519     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 520     {
 521         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 522     }
 523     return res;
 524 }
 525
 526 /*! \brief Return dot product of two double precision SIMD4 variables.
 527  *
 528  * The dot product is calculated between the first three elements in the two
 529  * vectors, while the fourth is ignored. The result is returned as a scalar.
 530  *
 531  * \param a vector1
 532  * \param b vector2
 533  * \result a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.
 534  */
 535 static inline double gmx_simdcall dotProduct(Simd4Double a, Simd4Double b)
 536 {
 537     return (a.simdInternal_[0] * b.simdInternal_[0] + a.simdInternal_[1] * b.simdInternal_[1]
 538             + a.simdInternal_[2] * b.simdInternal_[2]);
 539 }
 540
 541 /*! \brief SIMD4 double transpose
 542  *
 543  * \param[in,out] v0  Row 0 on input, column 0 on output
 544  * \param[in,out] v1  Row 1 on input, column 1 on output
 545  * \param[in,out] v2  Row 2 on input, column 2 on output
 546  * \param[in,out] v3  Row 3 on input, column 3 on output
 547  */
 548 static inline void gmx_simdcall transpose(Simd4Double* v0, Simd4Double* v1, Simd4Double* v2, Simd4Double* v3)
 549 {
 550     Simd4Double t0       = *v0;
 551     Simd4Double t1       = *v1;
 552     Simd4Double t2       = *v2;
 553     Simd4Double t3       = *v3;
 554     v0->simdInternal_[0] = t0.simdInternal_[0];
 555     v0->simdInternal_[1] = t1.simdInternal_[0];
 556     v0->simdInternal_[2] = t2.simdInternal_[0];
 557     v0->simdInternal_[3] = t3.simdInternal_[0];
 558     v1->simdInternal_[0] = t0.simdInternal_[1];
 559     v1->simdInternal_[1] = t1.simdInternal_[1];
 560     v1->simdInternal_[2] = t2.simdInternal_[1];
 561     v1->simdInternal_[3] = t3.simdInternal_[1];
 562     v2->simdInternal_[0] = t0.simdInternal_[2];
 563     v2->simdInternal_[1] = t1.simdInternal_[2];
 564     v2->simdInternal_[2] = t2.simdInternal_[2];
 565     v2->simdInternal_[3] = t3.simdInternal_[2];
 566     v3->simdInternal_[0] = t0.simdInternal_[3];
 567     v3->simdInternal_[1] = t1.simdInternal_[3];
 568     v3->simdInternal_[2] = t2.simdInternal_[3];
 569     v3->simdInternal_[3] = t3.simdInternal_[3];
 570 }
 571
 572 /*! \brief a==b for SIMD4 double
 573  *
 574  * \param a value1
 575  * \param b value2
 576  * \return Each element of the boolean will be set to true if a==b.
 577  */
 578 static inline Simd4DBool gmx_simdcall operator==(Simd4Double a, Simd4Double b)
 579 {
 580     Simd4DBool res;
 581
 582     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 583     {
 584         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 585     }
 586     return res;
 587 }
 588
 589 /*! \brief a!=b for SIMD4 double
 590  *
 591  * \param a value1
 592  * \param b value2
 593  * \return Each element of the boolean will be set to true if a!=b.
 594  */
 595 static inline Simd4DBool gmx_simdcall operator!=(Simd4Double a, Simd4Double b)
 596 {
 597     Simd4DBool res;
 598
 599     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 600     {
 601         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 602     }
 603     return res;
 604 }
 605
 606 /*! \brief a<b for SIMD4 double
 607  *
 608  * \param a value1
 609  * \param b value2
 610  * \return Each element of the boolean will be set to true if a<b.
 611  */
 612 static inline Simd4DBool gmx_simdcall operator<(Simd4Double a, Simd4Double b)
 613 {
 614     Simd4DBool res;
 615
 616     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 617     {
 618         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 619     }
 620     return res;
 621 }
 622
 623
 624 /*! \brief a<=b for SIMD4 double.
 625  *
 626  * \param a value1
 627  * \param b value2
 628  * \return Each element of the boolean will be set to true if a<=b.
 629  */
 630 static inline Simd4DBool gmx_simdcall operator<=(Simd4Double a, Simd4Double b)
 631 {
 632     Simd4DBool res;
 633
 634     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 635     {
 636         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 637     }
 638     return res;
 639 }
 640
 641 /*! \brief Logical \a and on single precision SIMD4 booleans.
 642  *
 643  * \param a logical vars 1
 644  * \param b logical vars 2
 645  * \return For each element, the result boolean is true if a \& b are true.
 646  *
 647  * \note This is not necessarily a bitwise operation - the storage format
 648  * of booleans is implementation-dependent.
 649  */
 650 static inline Simd4DBool gmx_simdcall operator&&(Simd4DBool a, Simd4DBool b)
 651 {
 652     Simd4DBool res;
 653
 654     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 655     {
 656         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
 657     }
 658     return res;
 659 }
 660
 661 /*! \brief Logical \a or on single precision SIMD4 booleans.
 662  *
 663  * \param a logical vars 1
 664  * \param b logical vars 2
 665  * \return For each element, the result boolean is true if a or b is true.
 666  *
 667  * Note that this is not necessarily a bitwise operation - the storage format
 668  * of booleans is implementation-dependent.
 669  */
 670 static inline Simd4DBool gmx_simdcall operator||(Simd4DBool a, Simd4DBool b)
 671 {
 672     Simd4DBool res;
 673
 674     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 675     {
 676         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
 677     }
 678     return res;
 679 }
 680
 681 /*! \brief Returns non-zero if any of the boolean in SIMD4 a is True, otherwise 0.
 682  *
 683  * \param a Logical variable.
 684  * \return true if any element in a is true, otherwise false.
 685  *
 686  * The actual return value for truth will depend on the architecture,
 687  * so any non-zero value is considered truth.
 688  */
 689 static inline bool gmx_simdcall anyTrue(Simd4DBool a)
 690 {
 691     bool res = false;
 692
 693     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 694     {
 695         res = res || a.simdInternal_[i];
 696     }
 697     return res;
 698 }
 699
 700 /*! \brief Select from single precision SIMD4 variable where boolean is true.
 701  *
 702  * \param a Floating-point variable to select from
 703  * \param mask Boolean selector
 704  * \return  For each element, a is selected for true, 0 for false.
 705  */
 706 static inline Simd4Double gmx_simdcall selectByMask(Simd4Double a, Simd4DBool mask)
 707 {
 708     Simd4Double res;
 709
 710     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 711     {
 712         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
 713     }
 714     return res;
 715 }
 716
 717 /*! \brief Select from single precision SIMD4 variable where boolean is false.
 718  *
 719  * \param a Floating-point variable to select from
 720  * \param mask Boolean selector
 721  * \return  For each element, a is selected for false, 0 for true (sic).
 722  */
 723 static inline Simd4Double gmx_simdcall selectByNotMask(Simd4Double a, Simd4DBool mask)
 724 {
 725     Simd4Double res;
 726
 727     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 728     {
 729         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
 730     }
 731     return res;
 732 }
 733
 734
 735 /*! \brief Vector-blend SIMD4 selection.
 736  *
 737  * \param a First source
 738  * \param b Second source
 739  * \param sel Boolean selector
 740  * \return For each element, select b if sel is true, a otherwise.
 741  */
 742 static inline Simd4Double gmx_simdcall blend(Simd4Double a, Simd4Double b, Simd4DBool sel)
 743 {
 744     Simd4Double res;
 745
 746     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 747     {
 748         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
 749     }
 750     return res;
 751 }
 752
 753
 754 /*! \brief Return sum of all elements in SIMD4 double variable.
 755  *
 756  * \param a SIMD4 variable to reduce/sum.
 757  * \return The sum of all elements in the argument variable.
 758  *
 759  */
 760 static inline double gmx_simdcall reduce(Simd4Double a)
 761 {
 762     double sum = 0.0;
 763
 764     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 765     {
 766         sum += a.simdInternal_[i];
 767     }
 768     return sum;
 769 }
 770
 771 //! \}
 772
 773 //! \}
 774
 775 //! \endcond
 776
 777 } // namespace gmx
 778
 779 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H