2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD4 single precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
58 #include "impl_reference_definitions.h"
64 /*! \addtogroup module_simd */
67 /*! \name Constant width-4 double precision SIMD types and instructions
71 /*! \libinternal \brief SIMD4 double type.
73 * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
75 * \note This variable cannot be placed inside other structures or classes, since
76 * some compilers (including at least clang-3.7) appear to lose the
77 * alignment. This is likely particularly severe when allocating such
78 * memory on the heap, but it occurs for stack structures too.
85 //! \brief Construct from scalar
86 Simd4Double(double d
) { simdInternal_
.fill(d
); }
88 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
90 * This has to be public to enable usage in combination with static inline
91 * functions, but it should never, EVER, be accessed by any code outside
92 * the corresponding implementation directory since the type will depend
93 * on the architecture.
95 std::array
<double, GMX_SIMD4_WIDTH
> simdInternal_
;
98 /*! \libinternal \brief SIMD4 variable type to use for logical comparisons on doubles.
100 * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
102 * \note This variable cannot be placed inside other structures or classes, since
103 * some compilers (including at least clang-3.7) appear to lose the
104 * alignment. This is likely particularly severe when allocating such
105 * memory on the heap, but it occurs for stack structures too.
112 //! \brief Construct from scalar
113 Simd4DBool(bool b
) { simdInternal_
.fill(b
); }
115 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
117 * This has to be public to enable usage in combination with static inline
118 * functions, but it should never, EVER, be accessed by any code outside
119 * the corresponding implementation directory since the type will depend
120 * on the architecture.
122 std::array
<bool, GMX_SIMD4_WIDTH
> simdInternal_
;
125 /*! \brief Load 4 double values from aligned memory into SIMD4 variable.
127 * \param m Pointer to memory aligned to 4 elements.
128 * \return SIMD4 variable with data loaded.
130 static inline Simd4Double gmx_simdcall
131 load4(const double *m
)
135 assert(std::size_t(m
) % (a
.simdInternal_
.size()*sizeof(double)) == 0);
137 std::copy(m
, m
+a
.simdInternal_
.size(), a
.simdInternal_
.begin());
141 /*! \brief Store the contents of SIMD4 double to aligned memory m.
143 * \param[out] m Pointer to memory, aligned to 4 elements.
144 * \param a SIMD4 variable to store
146 static inline void gmx_simdcall
147 store4(double *m
, Simd4Double a
)
149 assert(std::size_t(m
) % (a
.simdInternal_
.size()*sizeof(double)) == 0);
151 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
154 /*! \brief Load SIMD4 double from unaligned memory.
156 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
158 * \param m Pointer to memory, no alignment requirement.
159 * \return SIMD4 variable with data loaded.
161 static inline Simd4Double gmx_simdcall
162 load4U(const double *m
)
165 std::copy(m
, m
+a
.simdInternal_
.size(), a
.simdInternal_
.begin());
169 /*! \brief Store SIMD4 double to unaligned memory.
171 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
173 * \param[out] m Pointer to memory, no alignment requirement.
174 * \param a SIMD4 variable to store.
176 static inline void gmx_simdcall
177 store4U(double *m
, Simd4Double a
)
179 std::copy(a
.simdInternal_
.begin(), a
.simdInternal_
.end(), m
);
182 /*! \brief Set all SIMD4 double elements to 0.
184 * You should typically just call \ref gmx::setZero(), which uses proxy objects
185 * internally to handle all types rather than adding the suffix used here.
189 static inline Simd4Double gmx_simdcall
192 return Simd4Double(0.0);
196 /*! \brief Bitwise and for two SIMD4 double variables.
198 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
202 * \return data1 & data2
204 static inline Simd4Double gmx_simdcall
205 operator&(Simd4Double a
, Simd4Double b
)
216 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
218 conv1
.r
= a
.simdInternal_
[i
];
219 conv2
.r
= b
.simdInternal_
[i
];
220 conv1
.i
= conv1
.i
& conv2
.i
;
221 res
.simdInternal_
[i
] = conv1
.r
;
227 /*! \brief Bitwise andnot for two SIMD4 double variables. c=(~a) & b.
229 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
233 * \return (~data1) & data2
235 static inline Simd4Double gmx_simdcall
236 andNot(Simd4Double a
, Simd4Double b
)
247 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
249 conv1
.r
= a
.simdInternal_
[i
];
250 conv2
.r
= b
.simdInternal_
[i
];
251 conv1
.i
= ~conv1
.i
& conv2
.i
;
252 res
.simdInternal_
[i
] = conv1
.r
;
258 /*! \brief Bitwise or for two SIMD4 doubles.
260 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
264 * \return data1 | data2
266 static inline Simd4Double gmx_simdcall
267 operator|(Simd4Double a
, Simd4Double b
)
278 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
280 conv1
.r
= a
.simdInternal_
[i
];
281 conv2
.r
= b
.simdInternal_
[i
];
282 conv1
.i
= conv1
.i
| conv2
.i
;
283 res
.simdInternal_
[i
] = conv1
.r
;
288 /*! \brief Bitwise xor for two SIMD4 double variables.
290 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
294 * \return data1 ^ data2
296 static inline Simd4Double gmx_simdcall
297 operator^(Simd4Double a
, Simd4Double b
)
308 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
310 conv1
.r
= a
.simdInternal_
[i
];
311 conv2
.r
= b
.simdInternal_
[i
];
312 conv1
.i
= conv1
.i
^ conv2
.i
;
313 res
.simdInternal_
[i
] = conv1
.r
;
318 /*! \brief Add two double SIMD4 variables.
324 static inline Simd4Double gmx_simdcall
325 operator+(Simd4Double a
, Simd4Double b
)
329 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
331 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] + b
.simdInternal_
[i
];
336 /*! \brief Subtract two SIMD4 variables.
342 static inline Simd4Double gmx_simdcall
343 operator-(Simd4Double a
, Simd4Double b
)
347 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
349 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] - b
.simdInternal_
[i
];
354 /*! \brief SIMD4 floating-point negate.
356 * \param a SIMD4 floating-point value
359 static inline Simd4Double gmx_simdcall
360 operator-(Simd4Double a
)
364 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
366 res
.simdInternal_
[i
] = -a
.simdInternal_
[i
];
371 /*! \brief Multiply two SIMD4 variables.
377 static inline Simd4Double gmx_simdcall
378 operator*(Simd4Double a
, Simd4Double b
)
382 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
384 res
.simdInternal_
[i
] = a
.simdInternal_
[i
] * b
.simdInternal_
[i
];
389 /*! \brief SIMD4 Fused-multiply-add. Result is a*b+c.
396 static inline Simd4Double gmx_simdcall
397 fma(Simd4Double a
, Simd4Double b
, Simd4Double c
)
402 /*! \brief SIMD4 Fused-multiply-subtract. Result is a*b-c.
409 static inline Simd4Double gmx_simdcall
410 fms(Simd4Double a
, Simd4Double b
, Simd4Double c
)
415 /*! \brief SIMD4 Fused-negated-multiply-add. Result is -a*b+c.
422 static inline Simd4Double gmx_simdcall
423 fnma(Simd4Double a
, Simd4Double b
, Simd4Double c
)
428 /*! \brief SIMD4 Fused-negated-multiply-subtract. Result is -a*b-c.
435 static inline Simd4Double gmx_simdcall
436 fnms(Simd4Double a
, Simd4Double b
, Simd4Double c
)
441 /*! \brief SIMD4 1.0/sqrt(x) lookup.
443 * This is a low-level instruction that should only be called from routines
444 * implementing the inverse square root in simd_math.h.
446 * \param x Argument, x>0
447 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
449 static inline Simd4Double gmx_simdcall
454 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
456 // sic - we only use single precision for the lookup
457 res
.simdInternal_
[i
] = 1.0f
/ std::sqrt(static_cast<float>(x
.simdInternal_
[i
]));
463 /*! \brief SIMD4 Floating-point abs().
465 * \param a any floating point values
466 * \return fabs(a) for each element.
468 static inline Simd4Double gmx_simdcall
473 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
475 res
.simdInternal_
[i
] = std::abs(a
.simdInternal_
[i
]);
480 /*! \brief Set each SIMD4 element to the largest from two variables.
482 * \param a Any floating-point value
483 * \param b Any floating-point value
484 * \return max(a,b) for each element.
486 static inline Simd4Double gmx_simdcall
487 max(Simd4Double a
, Simd4Double b
)
491 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
493 res
.simdInternal_
[i
] = std::max(a
.simdInternal_
[i
], b
.simdInternal_
[i
]);
499 /*! \brief Set each SIMD4 element to the largest from two variables.
501 * \param a Any floating-point value
502 * \param b Any floating-point value
503 * \return max(a,b) for each element.
505 static inline Simd4Double gmx_simdcall
506 min(Simd4Double a
, Simd4Double b
)
510 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
512 res
.simdInternal_
[i
] = std::min(a
.simdInternal_
[i
], b
.simdInternal_
[i
]);
518 /*! \brief SIMD4 Round to nearest integer value (in floating-point format).
520 * \param a Any floating-point value
521 * \return The nearest integer, represented in floating-point format.
523 static inline Simd4Double gmx_simdcall
528 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
530 res
.simdInternal_
[i
] = std::round(a
.simdInternal_
[i
]);
536 /*! \brief Truncate SIMD4, i.e. round towards zero - common hardware instruction.
538 * \param a Any floating-point value
539 * \return Integer rounded towards zero, represented in floating-point format.
541 * \note This is truncation towards zero, not floor(). The reason for this
542 * is that truncation is virtually always present as a dedicated hardware
543 * instruction, but floor() frequently isn't.
545 static inline Simd4Double gmx_simdcall
550 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
552 res
.simdInternal_
[i
] = std::trunc(a
.simdInternal_
[i
]);
557 /*! \brief Return dot product of two double precision SIMD4 variables.
559 * The dot product is calculated between the first three elements in the two
560 * vectors, while the fourth is ignored. The result is returned as a scalar.
564 * \result a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.
566 static inline double gmx_simdcall
567 dotProduct(Simd4Double a
, Simd4Double b
)
570 (a
.simdInternal_
[0] * b
.simdInternal_
[0] +
571 a
.simdInternal_
[1] * b
.simdInternal_
[1] +
572 a
.simdInternal_
[2] * b
.simdInternal_
[2]);
575 /*! \brief SIMD4 double transpose
577 * \param[in,out] v0 Row 0 on input, column 0 on output
578 * \param[in,out] v1 Row 1 on input, column 1 on output
579 * \param[in,out] v2 Row 2 on input, column 2 on output
580 * \param[in,out] v3 Row 3 on input, column 3 on output
582 static inline void gmx_simdcall
583 transpose(Simd4Double
* v0
, Simd4Double
* v1
,
584 Simd4Double
* v2
, Simd4Double
* v3
)
586 Simd4Double t0
= *v0
;
587 Simd4Double t1
= *v1
;
588 Simd4Double t2
= *v2
;
589 Simd4Double t3
= *v3
;
590 v0
->simdInternal_
[0] = t0
.simdInternal_
[0];
591 v0
->simdInternal_
[1] = t1
.simdInternal_
[0];
592 v0
->simdInternal_
[2] = t2
.simdInternal_
[0];
593 v0
->simdInternal_
[3] = t3
.simdInternal_
[0];
594 v1
->simdInternal_
[0] = t0
.simdInternal_
[1];
595 v1
->simdInternal_
[1] = t1
.simdInternal_
[1];
596 v1
->simdInternal_
[2] = t2
.simdInternal_
[1];
597 v1
->simdInternal_
[3] = t3
.simdInternal_
[1];
598 v2
->simdInternal_
[0] = t0
.simdInternal_
[2];
599 v2
->simdInternal_
[1] = t1
.simdInternal_
[2];
600 v2
->simdInternal_
[2] = t2
.simdInternal_
[2];
601 v2
->simdInternal_
[3] = t3
.simdInternal_
[2];
602 v3
->simdInternal_
[0] = t0
.simdInternal_
[3];
603 v3
->simdInternal_
[1] = t1
.simdInternal_
[3];
604 v3
->simdInternal_
[2] = t2
.simdInternal_
[3];
605 v3
->simdInternal_
[3] = t3
.simdInternal_
[3];
608 /*! \brief a==b for SIMD4 double
612 * \return Each element of the boolean will be set to true if a==b.
614 static inline Simd4DBool gmx_simdcall
615 operator==(Simd4Double a
, Simd4Double b
)
619 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
621 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] == b
.simdInternal_
[i
]);
626 /*! \brief a!=b for SIMD4 double
630 * \return Each element of the boolean will be set to true if a!=b.
632 static inline Simd4DBool gmx_simdcall
633 operator!=(Simd4Double a
, Simd4Double b
)
637 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
639 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] != b
.simdInternal_
[i
]);
644 /*! \brief a<b for SIMD4 double
648 * \return Each element of the boolean will be set to true if a<b.
650 static inline Simd4DBool gmx_simdcall
651 operator<(Simd4Double a
, Simd4Double b
)
655 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
657 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] < b
.simdInternal_
[i
]);
663 /*! \brief a<=b for SIMD4 double.
667 * \return Each element of the boolean will be set to true if a<=b.
669 static inline Simd4DBool gmx_simdcall
670 operator<=(Simd4Double a
, Simd4Double b
)
674 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
676 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] <= b
.simdInternal_
[i
]);
681 /*! \brief Logical \a and on single precision SIMD4 booleans.
683 * \param a logical vars 1
684 * \param b logical vars 2
685 * \return For each element, the result boolean is true if a \& b are true.
687 * \note This is not necessarily a bitwise operation - the storage format
688 * of booleans is implementation-dependent.
690 static inline Simd4DBool gmx_simdcall
691 operator&&(Simd4DBool a
, Simd4DBool b
)
695 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
697 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] && b
.simdInternal_
[i
]);
702 /*! \brief Logical \a or on single precision SIMD4 booleans.
704 * \param a logical vars 1
705 * \param b logical vars 2
706 * \return For each element, the result boolean is true if a or b is true.
708 * Note that this is not necessarily a bitwise operation - the storage format
709 * of booleans is implementation-dependent.
711 static inline Simd4DBool gmx_simdcall
712 operator||(Simd4DBool a
, Simd4DBool b
)
716 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
718 res
.simdInternal_
[i
] = (a
.simdInternal_
[i
] || b
.simdInternal_
[i
]);
723 /*! \brief Returns non-zero if any of the boolean in SIMD4 a is True, otherwise 0.
725 * \param a Logical variable.
726 * \return true if any element in a is true, otherwise false.
728 * The actual return value for truth will depend on the architecture,
729 * so any non-zero value is considered truth.
731 static inline bool gmx_simdcall
732 anyTrue(Simd4DBool a
)
736 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
738 res
= res
|| a
.simdInternal_
[i
];
743 /*! \brief Select from single precision SIMD4 variable where boolean is true.
745 * \param a Floating-point variable to select from
746 * \param mask Boolean selector
747 * \return For each element, a is selected for true, 0 for false.
749 static inline Simd4Double gmx_simdcall
750 selectByMask(Simd4Double a
, Simd4DBool mask
)
754 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
756 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? a
.simdInternal_
[i
] : 0.0;
761 /*! \brief Select from single precision SIMD4 variable where boolean is false.
763 * \param a Floating-point variable to select from
764 * \param mask Boolean selector
765 * \return For each element, a is selected for false, 0 for true (sic).
767 static inline Simd4Double gmx_simdcall
768 selectByNotMask(Simd4Double a
, Simd4DBool mask
)
772 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
774 res
.simdInternal_
[i
] = mask
.simdInternal_
[i
] ? 0.0 : a
.simdInternal_
[i
];
780 /*! \brief Vector-blend SIMD4 selection.
782 * \param a First source
783 * \param b Second source
784 * \param sel Boolean selector
785 * \return For each element, select b if sel is true, a otherwise.
787 static inline Simd4Double gmx_simdcall
788 blend(Simd4Double a
, Simd4Double b
, Simd4DBool sel
)
792 for (std::size_t i
= 0; i
< res
.simdInternal_
.size(); i
++)
794 res
.simdInternal_
[i
] = sel
.simdInternal_
[i
] ? b
.simdInternal_
[i
] : a
.simdInternal_
[i
];
800 /*! \brief Return sum of all elements in SIMD4 double variable.
802 * \param a SIMD4 variable to reduce/sum.
803 * \return The sum of all elements in the argument variable.
806 static inline double gmx_simdcall
807 reduce(Simd4Double a
)
811 for (std::size_t i
= 0; i
< a
.simdInternal_
.size(); i
++)
813 sum
+= a
.simdInternal_
[i
];
826 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H