Update instructions in containers.rst
[gromacs.git] / src / gromacs / simd / impl_reference / impl_reference_simd4_double.h
blob6511c96b5dc06253a070ad03d206fbf665a3a117
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD4 single precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
48 #include "config.h"
50 #include <cassert>
51 #include <cmath>
52 #include <cstddef>
53 #include <cstdint>
55 #include <algorithm>
56 #include <array>
58 #include "impl_reference_definitions.h"
60 namespace gmx
63 /*! \cond libapi */
64 /*! \addtogroup module_simd */
65 /*! \{ */
67 /*! \name Constant width-4 double precision SIMD types and instructions
68 * \{
71 /*! \libinternal \brief SIMD4 double type.
73 * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
75 * \note This variable cannot be placed inside other structures or classes, since
76 * some compilers (including at least clang-3.7) appear to lose the
77 * alignment. This is likely particularly severe when allocating such
78 * memory on the heap, but it occurs for stack structures too.
80 class Simd4Double
82 public:
83 Simd4Double() {}
85 //! \brief Construct from scalar
86 Simd4Double(double d) { simdInternal_.fill(d); }
88 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
90 * This has to be public to enable usage in combination with static inline
91 * functions, but it should never, EVER, be accessed by any code outside
92 * the corresponding implementation directory since the type will depend
93 * on the architecture.
95 std::array<double, GMX_SIMD4_WIDTH> simdInternal_;
98 /*! \libinternal \brief SIMD4 variable type to use for logical comparisons on doubles.
100 * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
102 * \note This variable cannot be placed inside other structures or classes, since
103 * some compilers (including at least clang-3.7) appear to lose the
104 * alignment. This is likely particularly severe when allocating such
105 * memory on the heap, but it occurs for stack structures too.
107 class Simd4DBool
109 public:
110 Simd4DBool() {}
112 //! \brief Construct from scalar
113 Simd4DBool(bool b) { simdInternal_.fill(b); }
115 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
117 * This has to be public to enable usage in combination with static inline
118 * functions, but it should never, EVER, be accessed by any code outside
119 * the corresponding implementation directory since the type will depend
120 * on the architecture.
122 std::array<bool, GMX_SIMD4_WIDTH> simdInternal_;
125 /*! \brief Load 4 double values from aligned memory into SIMD4 variable.
127 * \param m Pointer to memory aligned to 4 elements.
128 * \return SIMD4 variable with data loaded.
130 static inline Simd4Double gmx_simdcall load4(const double* m)
132 Simd4Double a;
134 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
136 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
137 return a;
140 /*! \brief Store the contents of SIMD4 double to aligned memory m.
142 * \param[out] m Pointer to memory, aligned to 4 elements.
143 * \param a SIMD4 variable to store
145 static inline void gmx_simdcall store4(double* m, Simd4Double a)
147 assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
149 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
152 /*! \brief Load SIMD4 double from unaligned memory.
154 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
156 * \param m Pointer to memory, no alignment requirement.
157 * \return SIMD4 variable with data loaded.
159 static inline Simd4Double gmx_simdcall load4U(const double* m)
161 Simd4Double a;
162 std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
163 return a;
166 /*! \brief Store SIMD4 double to unaligned memory.
168 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
170 * \param[out] m Pointer to memory, no alignment requirement.
171 * \param a SIMD4 variable to store.
173 static inline void gmx_simdcall store4U(double* m, Simd4Double a)
175 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
178 /*! \brief Set all SIMD4 double elements to 0.
180 * You should typically just call \ref gmx::setZero(), which uses proxy objects
181 * internally to handle all types rather than adding the suffix used here.
183 * \return SIMD4 0.0
185 static inline Simd4Double gmx_simdcall simd4SetZeroD()
187 return Simd4Double(0.0);
191 /*! \brief Bitwise and for two SIMD4 double variables.
193 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
195 * \param a data1
196 * \param b data2
197 * \return data1 & data2
199 static inline Simd4Double gmx_simdcall operator&(Simd4Double a, Simd4Double b)
201 Simd4Double res;
203 union {
204 double r;
205 std::int64_t i;
206 } conv1, conv2;
208 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
210 conv1.r = a.simdInternal_[i];
211 conv2.r = b.simdInternal_[i];
212 conv1.i = conv1.i & conv2.i;
213 res.simdInternal_[i] = conv1.r;
215 return res;
219 /*! \brief Bitwise andnot for two SIMD4 double variables. c=(~a) & b.
221 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
223 * \param a data1
224 * \param b data2
225 * \return (~data1) & data2
227 static inline Simd4Double gmx_simdcall andNot(Simd4Double a, Simd4Double b)
229 Simd4Double res;
231 union {
232 double r;
233 std::int64_t i;
234 } conv1, conv2;
236 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
238 conv1.r = a.simdInternal_[i];
239 conv2.r = b.simdInternal_[i];
240 conv1.i = ~conv1.i & conv2.i;
241 res.simdInternal_[i] = conv1.r;
243 return res;
247 /*! \brief Bitwise or for two SIMD4 doubles.
249 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
251 * \param a data1
252 * \param b data2
253 * \return data1 | data2
255 static inline Simd4Double gmx_simdcall operator|(Simd4Double a, Simd4Double b)
257 Simd4Double res;
259 union {
260 double r;
261 std::int64_t i;
262 } conv1, conv2;
264 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
266 conv1.r = a.simdInternal_[i];
267 conv2.r = b.simdInternal_[i];
268 conv1.i = conv1.i | conv2.i;
269 res.simdInternal_[i] = conv1.r;
271 return res;
274 /*! \brief Bitwise xor for two SIMD4 double variables.
276 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
278 * \param a data1
279 * \param b data2
280 * \return data1 ^ data2
282 static inline Simd4Double gmx_simdcall operator^(Simd4Double a, Simd4Double b)
284 Simd4Double res;
286 union {
287 double r;
288 std::int64_t i;
289 } conv1, conv2;
291 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
293 conv1.r = a.simdInternal_[i];
294 conv2.r = b.simdInternal_[i];
295 conv1.i = conv1.i ^ conv2.i;
296 res.simdInternal_[i] = conv1.r;
298 return res;
301 /*! \brief Add two double SIMD4 variables.
303 * \param a term1
304 * \param b term2
305 * \return a+b
307 static inline Simd4Double gmx_simdcall operator+(Simd4Double a, Simd4Double b)
309 Simd4Double res;
311 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
313 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
315 return res;
318 /*! \brief Subtract two SIMD4 variables.
320 * \param a term1
321 * \param b term2
322 * \return a-b
324 static inline Simd4Double gmx_simdcall operator-(Simd4Double a, Simd4Double b)
326 Simd4Double res;
328 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
330 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
332 return res;
335 /*! \brief SIMD4 floating-point negate.
337 * \param a SIMD4 floating-point value
338 * \return -a
340 static inline Simd4Double gmx_simdcall operator-(Simd4Double a)
342 Simd4Double res;
344 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
346 res.simdInternal_[i] = -a.simdInternal_[i];
348 return res;
351 /*! \brief Multiply two SIMD4 variables.
353 * \param a factor1
354 * \param b factor2
355 * \return a*b.
357 static inline Simd4Double gmx_simdcall operator*(Simd4Double a, Simd4Double b)
359 Simd4Double res;
361 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
363 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
365 return res;
368 /*! \brief SIMD4 Fused-multiply-add. Result is a*b+c.
370 * \param a factor1
371 * \param b factor2
372 * \param c term
373 * \return a*b+c
375 static inline Simd4Double gmx_simdcall fma(Simd4Double a, Simd4Double b, Simd4Double c)
377 return a * b + c;
380 /*! \brief SIMD4 Fused-multiply-subtract. Result is a*b-c.
382 * \param a factor1
383 * \param b factor2
384 * \param c term
385 * \return a*b-c
387 static inline Simd4Double gmx_simdcall fms(Simd4Double a, Simd4Double b, Simd4Double c)
389 return a * b - c;
392 /*! \brief SIMD4 Fused-negated-multiply-add. Result is -a*b+c.
394 * \param a factor1
395 * \param b factor2
396 * \param c term
397 * \return -a*b+c
399 static inline Simd4Double gmx_simdcall fnma(Simd4Double a, Simd4Double b, Simd4Double c)
401 return c - a * b;
404 /*! \brief SIMD4 Fused-negated-multiply-subtract. Result is -a*b-c.
406 * \param a factor1
407 * \param b factor2
408 * \param c term
409 * \return -a*b-c
411 static inline Simd4Double gmx_simdcall fnms(Simd4Double a, Simd4Double b, Simd4Double c)
413 return -a * b - c;
416 /*! \brief SIMD4 1.0/sqrt(x) lookup.
418 * This is a low-level instruction that should only be called from routines
419 * implementing the inverse square root in simd_math.h.
421 * \param x Argument, x>0
422 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
424 static inline Simd4Double gmx_simdcall rsqrt(Simd4Double x)
426 Simd4Double res;
428 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
430 // sic - we only use single precision for the lookup
431 res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
433 return res;
437 /*! \brief SIMD4 Floating-point abs().
439 * \param a any floating point values
440 * \return fabs(a) for each element.
442 static inline Simd4Double gmx_simdcall abs(Simd4Double a)
444 Simd4Double res;
446 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
448 res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
450 return res;
453 /*! \brief Set each SIMD4 element to the largest from two variables.
455 * \param a Any floating-point value
456 * \param b Any floating-point value
457 * \return max(a,b) for each element.
459 static inline Simd4Double gmx_simdcall max(Simd4Double a, Simd4Double b)
461 Simd4Double res;
463 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
465 res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
467 return res;
471 /*! \brief Set each SIMD4 element to the largest from two variables.
473 * \param a Any floating-point value
474 * \param b Any floating-point value
475 * \return max(a,b) for each element.
477 static inline Simd4Double gmx_simdcall min(Simd4Double a, Simd4Double b)
479 Simd4Double res;
481 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
483 res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
485 return res;
489 /*! \brief SIMD4 Round to nearest integer value (in floating-point format).
491 * \param a Any floating-point value
492 * \return The nearest integer, represented in floating-point format.
494 static inline Simd4Double gmx_simdcall round(Simd4Double a)
496 Simd4Double res;
498 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
500 res.simdInternal_[i] = std::round(a.simdInternal_[i]);
502 return res;
506 /*! \brief Truncate SIMD4, i.e. round towards zero - common hardware instruction.
508 * \param a Any floating-point value
509 * \return Integer rounded towards zero, represented in floating-point format.
511 * \note This is truncation towards zero, not floor(). The reason for this
512 * is that truncation is virtually always present as a dedicated hardware
513 * instruction, but floor() frequently isn't.
515 static inline Simd4Double gmx_simdcall trunc(Simd4Double a)
517 Simd4Double res;
519 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
521 res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
523 return res;
526 /*! \brief Return dot product of two double precision SIMD4 variables.
528 * The dot product is calculated between the first three elements in the two
529 * vectors, while the fourth is ignored. The result is returned as a scalar.
531 * \param a vector1
532 * \param b vector2
533 * \result a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.
535 static inline double gmx_simdcall dotProduct(Simd4Double a, Simd4Double b)
537 return (a.simdInternal_[0] * b.simdInternal_[0] + a.simdInternal_[1] * b.simdInternal_[1]
538 + a.simdInternal_[2] * b.simdInternal_[2]);
541 /*! \brief SIMD4 double transpose
543 * \param[in,out] v0 Row 0 on input, column 0 on output
544 * \param[in,out] v1 Row 1 on input, column 1 on output
545 * \param[in,out] v2 Row 2 on input, column 2 on output
546 * \param[in,out] v3 Row 3 on input, column 3 on output
548 static inline void gmx_simdcall transpose(Simd4Double* v0, Simd4Double* v1, Simd4Double* v2, Simd4Double* v3)
550 Simd4Double t0 = *v0;
551 Simd4Double t1 = *v1;
552 Simd4Double t2 = *v2;
553 Simd4Double t3 = *v3;
554 v0->simdInternal_[0] = t0.simdInternal_[0];
555 v0->simdInternal_[1] = t1.simdInternal_[0];
556 v0->simdInternal_[2] = t2.simdInternal_[0];
557 v0->simdInternal_[3] = t3.simdInternal_[0];
558 v1->simdInternal_[0] = t0.simdInternal_[1];
559 v1->simdInternal_[1] = t1.simdInternal_[1];
560 v1->simdInternal_[2] = t2.simdInternal_[1];
561 v1->simdInternal_[3] = t3.simdInternal_[1];
562 v2->simdInternal_[0] = t0.simdInternal_[2];
563 v2->simdInternal_[1] = t1.simdInternal_[2];
564 v2->simdInternal_[2] = t2.simdInternal_[2];
565 v2->simdInternal_[3] = t3.simdInternal_[2];
566 v3->simdInternal_[0] = t0.simdInternal_[3];
567 v3->simdInternal_[1] = t1.simdInternal_[3];
568 v3->simdInternal_[2] = t2.simdInternal_[3];
569 v3->simdInternal_[3] = t3.simdInternal_[3];
572 /*! \brief a==b for SIMD4 double
574 * \param a value1
575 * \param b value2
576 * \return Each element of the boolean will be set to true if a==b.
578 static inline Simd4DBool gmx_simdcall operator==(Simd4Double a, Simd4Double b)
580 Simd4DBool res;
582 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
584 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
586 return res;
589 /*! \brief a!=b for SIMD4 double
591 * \param a value1
592 * \param b value2
593 * \return Each element of the boolean will be set to true if a!=b.
595 static inline Simd4DBool gmx_simdcall operator!=(Simd4Double a, Simd4Double b)
597 Simd4DBool res;
599 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
601 res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
603 return res;
606 /*! \brief a<b for SIMD4 double
608 * \param a value1
609 * \param b value2
610 * \return Each element of the boolean will be set to true if a<b.
612 static inline Simd4DBool gmx_simdcall operator<(Simd4Double a, Simd4Double b)
614 Simd4DBool res;
616 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
618 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
620 return res;
624 /*! \brief a<=b for SIMD4 double.
626 * \param a value1
627 * \param b value2
628 * \return Each element of the boolean will be set to true if a<=b.
630 static inline Simd4DBool gmx_simdcall operator<=(Simd4Double a, Simd4Double b)
632 Simd4DBool res;
634 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
636 res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
638 return res;
641 /*! \brief Logical \a and on single precision SIMD4 booleans.
643 * \param a logical vars 1
644 * \param b logical vars 2
645 * \return For each element, the result boolean is true if a \& b are true.
647 * \note This is not necessarily a bitwise operation - the storage format
648 * of booleans is implementation-dependent.
650 static inline Simd4DBool gmx_simdcall operator&&(Simd4DBool a, Simd4DBool b)
652 Simd4DBool res;
654 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
656 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
658 return res;
661 /*! \brief Logical \a or on single precision SIMD4 booleans.
663 * \param a logical vars 1
664 * \param b logical vars 2
665 * \return For each element, the result boolean is true if a or b is true.
667 * Note that this is not necessarily a bitwise operation - the storage format
668 * of booleans is implementation-dependent.
670 static inline Simd4DBool gmx_simdcall operator||(Simd4DBool a, Simd4DBool b)
672 Simd4DBool res;
674 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
676 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
678 return res;
681 /*! \brief Returns non-zero if any of the boolean in SIMD4 a is True, otherwise 0.
683 * \param a Logical variable.
684 * \return true if any element in a is true, otherwise false.
686 * The actual return value for truth will depend on the architecture,
687 * so any non-zero value is considered truth.
689 static inline bool gmx_simdcall anyTrue(Simd4DBool a)
691 bool res = false;
693 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
695 res = res || a.simdInternal_[i];
697 return res;
700 /*! \brief Select from single precision SIMD4 variable where boolean is true.
702 * \param a Floating-point variable to select from
703 * \param mask Boolean selector
704 * \return For each element, a is selected for true, 0 for false.
706 static inline Simd4Double gmx_simdcall selectByMask(Simd4Double a, Simd4DBool mask)
708 Simd4Double res;
710 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
712 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
714 return res;
717 /*! \brief Select from single precision SIMD4 variable where boolean is false.
719 * \param a Floating-point variable to select from
720 * \param mask Boolean selector
721 * \return For each element, a is selected for false, 0 for true (sic).
723 static inline Simd4Double gmx_simdcall selectByNotMask(Simd4Double a, Simd4DBool mask)
725 Simd4Double res;
727 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
729 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
731 return res;
735 /*! \brief Vector-blend SIMD4 selection.
737 * \param a First source
738 * \param b Second source
739 * \param sel Boolean selector
740 * \return For each element, select b if sel is true, a otherwise.
742 static inline Simd4Double gmx_simdcall blend(Simd4Double a, Simd4Double b, Simd4DBool sel)
744 Simd4Double res;
746 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
748 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
750 return res;
754 /*! \brief Return sum of all elements in SIMD4 double variable.
756 * \param a SIMD4 variable to reduce/sum.
757 * \return The sum of all elements in the argument variable.
760 static inline double gmx_simdcall reduce(Simd4Double a)
762 double sum = 0.0;
764 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
766 sum += a.simdInternal_[i];
768 return sum;
771 //! \}
773 //! \}
775 //! \endcond
777 } // namespace gmx
779 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H