128-bit AVX2 SIMD for AMD Ryzen
[gromacs.git] / src / gromacs / simd / impl_reference / impl_reference_simd_double.h
blob07a9cec67259a6331c36b3b11ec42fcc27bf1de4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD double precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
48 #include "config.h"
50 #include <cassert>
51 #include <cmath>
52 #include <cstddef>
53 #include <cstdint>
55 #include <algorithm>
56 #include <array>
58 #include "gromacs/utility/fatalerror.h"
60 #include "impl_reference_definitions.h"
61 #include "impl_reference_simd_float.h"
63 namespace gmx
66 /*! \cond libapi */
67 /*! \addtogroup module_simd */
68 /*! \{ */
70 /* \name SIMD implementation data types
71 * \{
74 /*! \libinternal \brief Double SIMD variable. Available if GMX_SIMD_HAVE_DOUBLE is 1.
76 * \note This variable cannot be placed inside other structures or classes, since
77 * some compilers (including at least clang-3.7) appear to lose the
78 * alignment. This is likely particularly severe when allocating such
79 * memory on the heap, but it occurs for stack structures too.
81 class SimdDouble
83 public:
84 SimdDouble() {}
86 //! \brief Construct from scalar
87 SimdDouble(double d) { simdInternal_.fill(d); }
89 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
91 * This has to be public to enable usage in combination with static inline
92 * functions, but it should never, EVER, be accessed by any code outside
93 * the corresponding implementation directory since the type will depend
94 * on the architecture.
96 std::array<double, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
99 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from double.
101 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
103 * \note The integer SIMD type will always be available, but on architectures
104 * that do not have any real integer SIMD support it might be defined as the
105 * floating-point type. This will work fine, since there are separate defines
106 * for whether the implementation can actually do any operations on integer
107 * SIMD types.
109 * \note This variable cannot be placed inside other structures or classes, since
110 * some compilers (including at least clang-3.7) appear to lose the
111 * alignment. This is likely particularly severe when allocating such
112 * memory on the heap, but it occurs for stack structures too.
114 class SimdDInt32
116 public:
117 SimdDInt32() {}
119 //! \brief Construct from scalar
120 SimdDInt32(std::int32_t i) { simdInternal_.fill(i); }
122 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
124 * This has to be public to enable usage in combination with static inline
125 * functions, but it should never, EVER, be accessed by any code outside
126 * the corresponding implementation directory since the type will depend
127 * on the architecture.
129 std::array<std::int32_t, GMX_SIMD_DINT32_WIDTH> simdInternal_;
132 /*! \libinternal \brief Boolean type for double SIMD data.
134 * Available if GMX_SIMD_HAVE_DOUBLE is 1.
136 * \note This variable cannot be placed inside other structures or classes, since
137 * some compilers (including at least clang-3.7) appear to lose the
138 * alignment. This is likely particularly severe when allocating such
139 * memory on the heap, but it occurs for stack structures too.
141 class SimdDBool
143 public:
144 SimdDBool() {}
146 //! \brief Construct from scalar bool
147 SimdDBool(bool b) { simdInternal_.fill(b); }
149 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
151 * This has to be public to enable usage in combination with static inline
152 * functions, but it should never, EVER, be accessed by any code outside
153 * the corresponding implementation directory since the type will depend
154 * on the architecture.
156 std::array<bool, GMX_SIMD_DOUBLE_WIDTH> simdInternal_;
159 /*! \libinternal \brief Boolean type for integer datatypes corresponding to double SIMD.
161 * Available if GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
163 * \note This variable cannot be placed inside other structures or classes, since
164 * some compilers (including at least clang-3.7) appear to lose the
165 * alignment. This is likely particularly severe when allocating such
166 * memory on the heap, but it occurs for stack structures too.
168 class SimdDIBool
170 public:
171 SimdDIBool() {}
173 //! \brief Construct from scalar
174 SimdDIBool(bool b) { simdInternal_.fill(b); }
176 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
178 * This has to be public to enable usage in combination with static inline
179 * functions, but it should never, EVER, be accessed by any code outside
180 * the corresponding implementation directory since the type will depend
181 * on the architecture.
183 std::array<bool, GMX_SIMD_DINT32_WIDTH> simdInternal_;
186 /*! \}
188 * \name SIMD implementation load/store operations for double precision floating point
189 * \{
192 /*! \brief Load \ref GMX_SIMD_DOUBLE_WIDTH numbers from aligned memory.
194 * \param m Pointer to memory aligned to the SIMD width.
195 * \return SIMD variable with data loaded.
197 static inline SimdDouble gmx_simdcall
198 simdLoad(const double *m)
200 SimdDouble a;
202 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
204 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
205 return a;
208 /*! \brief Store the contents of SIMD double variable to aligned memory m.
210 * \param[out] m Pointer to memory, aligned to SIMD width.
211 * \param a SIMD variable to store
213 static inline void gmx_simdcall
214 store(double *m, SimdDouble a)
216 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
218 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
221 /*! \brief Load SIMD double from unaligned memory.
223 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
225 * \param m Pointer to memory, no alignment requirement.
226 * \return SIMD variable with data loaded.
228 static inline SimdDouble gmx_simdcall
229 simdLoadU(const double *m)
231 SimdDouble a;
232 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
233 return a;
236 /*! \brief Store SIMD double to unaligned memory.
238 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
240 * \param[out] m Pointer to memory, no alignment requirement.
241 * \param a SIMD variable to store.
243 static inline void gmx_simdcall
244 storeU(double *m, SimdDouble a)
246 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
249 /*! \brief Set all SIMD double variable elements to 0.0.
251 * You should typically just call \ref gmx::setZero(), which uses proxy objects
252 * internally to handle all types rather than adding the suffix used here.
254 * \return SIMD 0.0
256 static inline SimdDouble gmx_simdcall
257 setZeroD()
259 return SimdDouble(0.0);
262 /*! \}
264 * \name SIMD implementation load/store operations for integers (corresponding to double)
265 * \{
268 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
270 * You should typically just call \ref gmx::load(), which uses proxy objects
271 * internally to handle all types rather than adding the suffix used here.
273 * \param m Pointer to memory, aligned to (double) integer SIMD width.
274 * \return SIMD integer variable.
276 static inline SimdDInt32 gmx_simdcall
277 simdLoadDI(const std::int32_t * m)
279 SimdDInt32 a;
281 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
283 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
284 return a;
287 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
289 * \param m Memory aligned to (double) integer SIMD width.
290 * \param a SIMD (double) integer variable to store.
292 static inline void gmx_simdcall
293 store(std::int32_t * m, SimdDInt32 a)
295 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
297 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
300 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdDouble.
302 * You should typically just call \ref gmx::loadU(), which uses proxy objects
303 * internally to handle all types rather than adding the suffix used here.
305 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
307 * \param m Pointer to memory, no alignment requirements.
308 * \return SIMD integer variable.
310 static inline SimdDInt32 gmx_simdcall
311 simdLoadUDI(const std::int32_t *m)
313 SimdDInt32 a;
314 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
315 return a;
318 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdDouble.
320 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
322 * \param m Memory pointer, no alignment requirements.
323 * \param a SIMD (double) integer variable to store.
325 static inline void gmx_simdcall
326 storeU(std::int32_t * m, SimdDInt32 a)
328 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
331 /*! \brief Set all SIMD (double) integer variable elements to 0.
333 * You should typically just call \ref gmx::setZero(), which uses proxy objects
334 * internally to handle all types rather than adding the suffix used here.
336 * \return SIMD 0
338 static inline SimdDInt32 gmx_simdcall
339 setZeroDI()
341 return SimdDInt32(0);
344 /*! \brief Extract element with index i from \ref gmx::SimdDInt32.
346 * Available if \ref GMX_SIMD_HAVE_DINT32_EXTRACT is 1.
348 * \tparam index Compile-time constant, position to extract (first position is 0)
349 * \param a SIMD variable from which to extract value.
350 * \return Single integer from position index in SIMD variable.
352 template<int index>
353 static inline std::int32_t gmx_simdcall
354 extract(SimdDInt32 a)
356 return a.simdInternal_[index];
359 /*! \}
361 * \name SIMD implementation double precision floating-point bitwise logical operations
362 * \{
365 /*! \brief Bitwise and for two SIMD double variables.
367 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
369 * \param a data1
370 * \param b data2
371 * \return data1 & data2
373 static inline SimdDouble gmx_simdcall
374 operator&(SimdDouble a, SimdDouble b)
376 SimdDouble res;
378 union
380 double r;
381 std::int64_t i;
383 conv1, conv2;
385 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
387 conv1.r = a.simdInternal_[i];
388 conv2.r = b.simdInternal_[i];
389 conv1.i = conv1.i & conv2.i;
390 res.simdInternal_[i] = conv1.r;
392 return res;
395 /*! \brief Bitwise andnot for SIMD double.
397 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
399 * \param a data1
400 * \param b data2
401 * \return (~data1) & data2
403 static inline SimdDouble gmx_simdcall
404 andNot(SimdDouble a, SimdDouble b)
406 SimdDouble res;
408 union
410 double r;
411 std::int64_t i;
413 conv1, conv2;
415 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
417 conv1.r = a.simdInternal_[i];
418 conv2.r = b.simdInternal_[i];
419 conv1.i = ~conv1.i & conv2.i;
420 res.simdInternal_[i] = conv1.r;
422 return res;
425 /*! \brief Bitwise or for SIMD double.
427 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
429 * \param a data1
430 * \param b data2
431 * \return data1 | data2
433 static inline SimdDouble gmx_simdcall
434 operator|(SimdDouble a, SimdDouble b)
436 SimdDouble res;
438 union
440 double r;
441 std::int64_t i;
443 conv1, conv2;
445 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
447 conv1.r = a.simdInternal_[i];
448 conv2.r = b.simdInternal_[i];
449 conv1.i = conv1.i | conv2.i;
450 res.simdInternal_[i] = conv1.r;
452 return res;
455 /*! \brief Bitwise xor for SIMD double.
457 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
459 * \param a data1
460 * \param b data2
461 * \return data1 ^ data2
463 static inline SimdDouble gmx_simdcall
464 operator^(SimdDouble a, SimdDouble b)
466 SimdDouble res;
468 union
470 double r;
471 std::int64_t i;
473 conv1, conv2;
475 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
477 conv1.r = a.simdInternal_[i];
478 conv2.r = b.simdInternal_[i];
479 conv1.i = conv1.i ^ conv2.i;
480 res.simdInternal_[i] = conv1.r;
482 return res;
485 /*! \}
487 * \name SIMD implementation double precision floating-point arithmetics
488 * \{
491 /*! \brief Add two double SIMD variables.
493 * \param a term1
494 * \param b term2
495 * \return a+b
497 static inline SimdDouble gmx_simdcall
498 operator+(SimdDouble a, SimdDouble b)
500 SimdDouble res;
502 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
504 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
506 return res;
509 /*! \brief Subtract two double SIMD variables.
511 * \param a term1
512 * \param b term2
513 * \return a-b
515 static inline SimdDouble gmx_simdcall
516 operator-(SimdDouble a, SimdDouble b)
518 SimdDouble res;
520 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
522 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
524 return res;
527 /*! \brief SIMD double precision negate.
529 * \param a SIMD double precision value
530 * \return -a
532 static inline SimdDouble gmx_simdcall
533 operator-(SimdDouble a)
535 SimdDouble res;
537 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
539 res.simdInternal_[i] = -a.simdInternal_[i];
541 return res;
544 /*! \brief Multiply two double SIMD variables.
546 * \param a factor1
547 * \param b factor2
548 * \return a*b.
550 static inline SimdDouble gmx_simdcall
551 operator*(SimdDouble a, SimdDouble b)
553 SimdDouble res;
555 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
557 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
559 return res;
562 /*! \brief SIMD double Fused-multiply-add. Result is a*b+c.
564 * \param a factor1
565 * \param b factor2
566 * \param c term
567 * \return a*b+c
569 static inline SimdDouble gmx_simdcall
570 fma(SimdDouble a, SimdDouble b, SimdDouble c)
572 return a*b+c;
575 /*! \brief SIMD double Fused-multiply-subtract. Result is a*b-c.
577 * \param a factor1
578 * \param b factor2
579 * \param c term
580 * \return a*b-c
582 static inline SimdDouble gmx_simdcall
583 fms(SimdDouble a, SimdDouble b, SimdDouble c)
585 return a*b-c;
588 /*! \brief SIMD double Fused-negated-multiply-add. Result is -a*b+c.
590 * \param a factor1
591 * \param b factor2
592 * \param c term
593 * \return -a*b+c
595 static inline SimdDouble gmx_simdcall
596 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
598 return c-a*b;
601 /*! \brief SIMD double Fused-negated-multiply-subtract. Result is -a*b-c.
603 * \param a factor1
604 * \param b factor2
605 * \param c term
606 * \return -a*b-c
608 static inline SimdDouble gmx_simdcall
609 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
611 return -a*b-c;
614 /*! \brief double SIMD 1.0/sqrt(x) lookup.
616 * This is a low-level instruction that should only be called from routines
617 * implementing the inverse square root in simd_math.h.
619 * \param x Argument, x>0
620 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
622 static inline SimdDouble gmx_simdcall
623 rsqrt(SimdDouble x)
625 SimdDouble res;
627 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
629 // sic - we only use single precision for the lookup
630 res.simdInternal_[i] = 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i]));
632 return res;
635 /*! \brief SIMD double 1.0/x lookup.
637 * This is a low-level instruction that should only be called from routines
638 * implementing the reciprocal in simd_math.h.
640 * \param x Argument, x!=0
641 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
643 static inline SimdDouble gmx_simdcall
644 rcp(SimdDouble x)
646 SimdDouble res;
648 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
650 // sic - we only use single precision for the lookup
651 res.simdInternal_[i] = 1.0f / static_cast<float>(x.simdInternal_[i]);
653 return res;
656 /*! \brief Add two double SIMD variables, masked version.
658 * \param a term1
659 * \param b term2
660 * \param m mask
661 * \return a+b where mask is true, 0.0 otherwise.
663 static inline SimdDouble gmx_simdcall
664 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
666 SimdDouble res;
668 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
670 res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0);
672 return res;
675 /*! \brief Multiply two double SIMD variables, masked version.
677 * \param a factor1
678 * \param b factor2
679 * \param m mask
680 * \return a*b where mask is true, 0.0 otherwise.
682 static inline SimdDouble gmx_simdcall
683 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
685 SimdDouble res;
687 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
689 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0;
691 return res;
694 /*! \brief SIMD double fused multiply-add, masked version.
696 * \param a factor1
697 * \param b factor2
698 * \param c term
699 * \param m mask
700 * \return a*b+c where mask is true, 0.0 otherwise.
702 static inline SimdDouble gmx_simdcall
703 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
705 SimdDouble res;
707 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
709 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0;
711 return res;
714 /*! \brief SIMD double 1.0/sqrt(x) lookup, masked version.
716 * This is a low-level instruction that should only be called from routines
717 * implementing the inverse square root in simd_math.h.
719 * \param x Argument, x>0 for entries where mask is true.
720 * \param m Mask
721 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
722 * The result for masked-out entries will be 0.0.
724 static inline SimdDouble gmx_simdcall
725 maskzRsqrt(SimdDouble x, SimdDBool m)
727 SimdDouble res;
729 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
731 // sic - we only use single precision for the lookup
732 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / std::sqrt(static_cast<float>(x.simdInternal_[i])) : 0.0;
734 return res;
737 /*! \brief SIMD double 1.0/x lookup, masked version.
739 * This is a low-level instruction that should only be called from routines
740 * implementing the reciprocal in simd_math.h.
742 * \param x Argument, x>0 for entries where mask is true.
743 * \param m Mask
744 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
745 * The result for masked-out entries will be 0.0.
747 static inline SimdDouble gmx_simdcall
748 maskzRcp(SimdDouble x, SimdDBool m)
750 SimdDouble res;
752 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
754 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / static_cast<float>(x.simdInternal_[i]) : 0.0;
756 return res;
759 /*! \brief SIMD double floating-point fabs().
761 * \param a any floating point values
762 * \return fabs(a) for each element.
764 static inline SimdDouble gmx_simdcall
765 abs(SimdDouble a)
767 SimdDouble res;
769 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
771 res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
773 return res;
776 /*! \brief Set each SIMD double element to the largest from two variables.
778 * \param a Any floating-point value
779 * \param b Any floating-point value
780 * \return max(a,b) for each element.
782 static inline SimdDouble gmx_simdcall
783 max(SimdDouble a, SimdDouble b)
785 SimdDouble res;
787 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
789 res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
791 return res;
794 /*! \brief Set each SIMD double element to the smallest from two variables.
796 * \param a Any floating-point value
797 * \param b Any floating-point value
798 * \return min(a,b) for each element.
800 static inline SimdDouble gmx_simdcall
801 min(SimdDouble a, SimdDouble b)
803 SimdDouble res;
805 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
807 res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
809 return res;
812 /*! \brief SIMD double round to nearest integer value (in floating-point format).
814 * \param a Any floating-point value
815 * \return The nearest integer, represented in floating-point format.
817 static inline SimdDouble gmx_simdcall
818 round(SimdDouble a)
820 SimdDouble res;
822 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
824 res.simdInternal_[i] = std::round(a.simdInternal_[i]);
826 return res;
829 /*! \brief Truncate SIMD double, i.e. round towards zero - common hardware instruction.
831 * \param a Any floating-point value
832 * \return Integer rounded towards zero, represented in floating-point format.
834 * \note This is truncation towards zero, not floor(). The reason for this
835 * is that truncation is virtually always present as a dedicated hardware
836 * instruction, but floor() frequently isn't.
838 static inline SimdDouble gmx_simdcall
839 trunc(SimdDouble a)
841 SimdDouble res;
843 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
845 res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
847 return res;
850 /*! \brief Extract (integer) exponent and fraction from double precision SIMD.
852 * \param value Floating-point value to extract from
853 * \param[out] exponent Returned exponent of value, integer SIMD format.
854 * \return Fraction of value, floating-point SIMD format.
856 static inline SimdDouble gmx_simdcall
857 frexp(SimdDouble value, SimdDInt32 * exponent)
859 SimdDouble fraction;
861 for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
863 fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
865 return fraction;
868 /*! \brief Multiply a SIMD double value by the number 2 raised to an exp power.
870 * \param value Floating-point number to multiply with new exponent
871 * \param exponent Integer that will not overflow as 2^exponent.
872 * \return value*2^exponent
874 static inline SimdDouble gmx_simdcall
875 ldexp(SimdDouble value, SimdDInt32 exponent)
877 SimdDouble res;
879 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
881 res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
883 return res;
886 /*! \brief Return sum of all elements in SIMD double variable.
888 * \param a SIMD variable to reduce/sum.
889 * \return The sum of all elements in the argument variable.
892 static inline double gmx_simdcall
893 reduce(SimdDouble a)
895 double sum = 0.0;
897 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
899 sum += a.simdInternal_[i];
901 return sum;
904 /*! \}
906 * \name SIMD implementation double precision floating-point comparison, boolean, selection.
907 * \{
910 /*! \brief SIMD a==b for double SIMD.
912 * \param a value1
913 * \param b value2
914 * \return Each element of the boolean will be set to true if a==b.
916 * Beware that exact floating-point comparisons are difficult.
918 static inline SimdDBool gmx_simdcall
919 operator==(SimdDouble a, SimdDouble b)
921 SimdDBool res;
923 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
925 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
927 return res;
930 /*! \brief SIMD a!=b for double SIMD.
932 * \param a value1
933 * \param b value2
934 * \return Each element of the boolean will be set to true if a!=b.
936 * Beware that exact floating-point comparisons are difficult.
938 static inline SimdDBool gmx_simdcall
939 operator!=(SimdDouble a, SimdDouble b)
941 SimdDBool res;
943 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
945 res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
947 return res;
950 /*! \brief SIMD a<b for double SIMD.
952 * \param a value1
953 * \param b value2
954 * \return Each element of the boolean will be set to true if a<b.
956 static inline SimdDBool gmx_simdcall
957 operator<(SimdDouble a, SimdDouble b)
959 SimdDBool res;
961 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
963 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
965 return res;
968 /*! \brief SIMD a<=b for double SIMD.
970 * \param a value1
971 * \param b value2
972 * \return Each element of the boolean will be set to true if a<=b.
974 static inline SimdDBool gmx_simdcall
975 operator<=(SimdDouble a, SimdDouble b)
977 SimdDBool res;
979 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
981 res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
983 return res;
986 /*! \brief Return true if any bits are set in the single precision SIMD.
988 * This function is used to handle bitmasks, mainly for exclusions in the
989 * inner kernels. Note that it will return true even for -0.0 (sign bit set),
990 * so it is not identical to not-equal.
992 * \param a value
993 * \return Each element of the boolean will be true if any bit in a is nonzero.
995 static inline SimdDBool gmx_simdcall
996 testBits(SimdDouble a)
998 SimdDBool res;
1000 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1002 union
1004 std::uint64_t i;
1005 double d;
1006 } conv;
1008 conv.d = a.simdInternal_[i];
1009 res.simdInternal_[i] = (conv.i != 0);
1011 return res;
1014 /*! \brief Logical \a and on double precision SIMD booleans.
1016 * \param a logical vars 1
1017 * \param b logical vars 2
1018 * \return For each element, the result boolean is true if a \& b are true.
1020 * \note This is not necessarily a bitwise operation - the storage format
1021 * of booleans is implementation-dependent.
1023 static inline SimdDBool gmx_simdcall
1024 operator&&(SimdDBool a, SimdDBool b)
1026 SimdDBool res;
1028 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1030 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1032 return res;
1035 /*! \brief Logical \a or on double precision SIMD booleans.
1037 * \param a logical vars 1
1038 * \param b logical vars 2
1039 * \return For each element, the result boolean is true if a or b is true.
1041 * Note that this is not necessarily a bitwise operation - the storage format
1042 * of booleans is implementation-dependent.
1044 \ */
1045 static inline SimdDBool gmx_simdcall
1046 operator||(SimdDBool a, SimdDBool b)
1048 SimdDBool res;
1050 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1052 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1054 return res;
1057 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1059 * \param a Logical variable.
1060 * \return true if any element in a is true, otherwise false.
1062 * The actual return value for truth will depend on the architecture,
1063 * so any non-zero value is considered truth.
1065 static inline bool gmx_simdcall
1066 anyTrue(SimdDBool a)
1068 bool res = false;
1070 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1072 res = res || a.simdInternal_[i];
1074 return res;
1077 /*! \brief Select from double precision SIMD variable where boolean is true.
1079 * \param a Floating-point variable to select from
1080 * \param mask Boolean selector
1081 * \return For each element, a is selected for true, 0 for false.
1083 static inline SimdDouble gmx_simdcall
1084 selectByMask(SimdDouble a, SimdDBool mask)
1086 SimdDouble res;
1088 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1090 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
1092 return res;
1095 /*! \brief Select from double precision SIMD variable where boolean is false.
1097 * \param a Floating-point variable to select from
1098 * \param mask Boolean selector
1099 * \return For each element, a is selected for false, 0 for true (sic).
1101 static inline SimdDouble gmx_simdcall
1102 selectByNotMask(SimdDouble a, SimdDBool mask)
1104 SimdDouble res;
1106 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1108 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
1110 return res;
1113 /*! \brief Vector-blend SIMD double selection.
1115 * \param a First source
1116 * \param b Second source
1117 * \param sel Boolean selector
1118 * \return For each element, select b if sel is true, a otherwise.
1120 static inline SimdDouble gmx_simdcall
1121 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
1123 SimdDouble res;
1125 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1127 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1129 return res;
1132 /*! \}
1134 * \name SIMD implementation integer (corresponding to double) bitwise logical operations
1135 * \{
1138 /*! \brief SIMD integer shift left logical, based on immediate value.
1140 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1142 * Logical shift. Each element is shifted (independently) up to 32 positions
1143 * left, while zeros are shifted in from the right.
1145 * \param a integer data to shift
1146 * \param n number of positions to shift left. n<=32.
1147 * \return shifted values
1149 static inline SimdDInt32 gmx_simdcall
1150 operator<<(SimdDInt32 a, int n)
1152 SimdDInt32 res;
1154 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1156 res.simdInternal_[i] = a.simdInternal_[i] << n;
1158 return res;
1161 /*! \brief SIMD integer shift right logical, based on immediate value.
1163 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1165 * Logical shift. Each element is shifted (independently) up to 32 positions
1166 * right, while zeros are shifted in from the left.
1168 * \param a integer data to shift
1169 * \param n number of positions to shift right. n<=32.
1170 * \return shifted values
1172 static inline SimdDInt32 gmx_simdcall
1173 operator>>(SimdDInt32 a, int n)
1175 SimdDInt32 res;
1177 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1179 res.simdInternal_[i] = a.simdInternal_[i] >> n;
1181 return res;
1184 /*! \brief Integer SIMD bitwise and.
1186 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1188 * \note You can \a not use this operation directly to select based on a boolean
1189 * SIMD variable, since booleans are separate from integer SIMD. If that
1190 * is what you need, have a look at \ref gmx::selectByMask instead.
1192 * \param a first integer SIMD
1193 * \param b second integer SIMD
1194 * \return a \& b (bitwise and)
1196 static inline SimdDInt32 gmx_simdcall
1197 operator&(SimdDInt32 a, SimdDInt32 b)
1199 SimdDInt32 res;
1201 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1203 res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1205 return res;
1208 /*! \brief Integer SIMD bitwise not/complement.
1210 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1212 * \note You can \a not use this operation directly to select based on a boolean
1213 * SIMD variable, since booleans are separate from integer SIMD. If that
1214 * is what you need, have a look at \ref gmx::selectByMask instead.
1216 * \param a integer SIMD
1217 * \param b integer SIMD
1218 * \return (~a) & b
1220 static inline SimdDInt32 gmx_simdcall
1221 andNot(SimdDInt32 a, SimdDInt32 b)
1223 SimdDInt32 res;
1225 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1227 res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1229 return res;
1232 /*! \brief Integer SIMD bitwise or.
1234 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1236 * \param a first integer SIMD
1237 * \param b second integer SIMD
1238 * \return a \| b (bitwise or)
1240 static inline SimdDInt32 gmx_simdcall
1241 operator|(SimdDInt32 a, SimdDInt32 b)
1243 SimdDInt32 res;
1245 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1247 res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1249 return res;
1252 /*! \brief Integer SIMD bitwise xor.
1254 * Available if \ref GMX_SIMD_HAVE_DINT32_LOGICAL is 1.
1256 * \param a first integer SIMD
1257 * \param b second integer SIMD
1258 * \return a ^ b (bitwise xor)
1260 static inline SimdDInt32 gmx_simdcall
1261 operator^(SimdDInt32 a, SimdDInt32 b)
1263 SimdDInt32 res;
1265 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1267 res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1269 return res;
1272 /*! \}
1274 * \name SIMD implementation integer (corresponding to double) arithmetics
1275 * \{
1278 /*! \brief Add SIMD integers.
1280 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1282 * \param a term1
1283 * \param b term2
1284 * \return a+b
1286 static inline SimdDInt32 gmx_simdcall
1287 operator+(SimdDInt32 a, SimdDInt32 b)
1289 SimdDInt32 res;
1291 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1293 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1295 return res;
1298 /*! \brief Subtract SIMD integers.
1300 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1302 * \param a term1
1303 * \param b term2
1304 * \return a-b
1306 static inline SimdDInt32 gmx_simdcall
1307 operator-(SimdDInt32 a, SimdDInt32 b)
1309 SimdDInt32 res;
1311 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1313 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1315 return res;
1318 /*! \brief Multiply SIMD integers.
1320 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1322 * \param a factor1
1323 * \param b factor2
1324 * \return a*b.
1326 * \note Only the low 32 bits are retained, so this can overflow.
1328 static inline SimdDInt32 gmx_simdcall
1329 operator*(SimdDInt32 a, SimdDInt32 b)
1331 SimdDInt32 res;
1333 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1335 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1337 return res;
1340 /*! \}
1342 * \name SIMD implementation integer (corresponding to double) comparisons, boolean selection
1343 * \{
1346 /*! \brief Equality comparison of two integers corresponding to double values.
1348 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1350 * \param a SIMD integer1
1351 * \param b SIMD integer2
1352 * \return SIMD integer boolean with true for elements where a==b
1354 static inline SimdDIBool gmx_simdcall
1355 operator==(SimdDInt32 a, SimdDInt32 b)
1357 SimdDIBool res;
1359 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1361 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1363 return res;
1366 /*! \brief Less-than comparison of two SIMD integers corresponding to double values.
1368 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1370 * \param a SIMD integer1
1371 * \param b SIMD integer2
1372 * \return SIMD integer boolean with true for elements where a<b
1374 static inline SimdDIBool gmx_simdcall
1375 operator<(SimdDInt32 a, SimdDInt32 b)
1377 SimdDIBool res;
1379 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1381 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1383 return res;
1386 /*! \brief Check if any bit is set in each element
1388 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1390 * \param a SIMD integer
1391 * \return SIMD integer boolean with true for elements where any bit is set
1393 static inline SimdDIBool gmx_simdcall
1394 testBits(SimdDInt32 a)
1396 SimdDIBool res;
1398 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1400 res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1402 return res;
1405 /*! \brief Logical AND on SimdDIBool.
1407 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1409 * \param a SIMD boolean 1
1410 * \param b SIMD boolean 2
1411 * \return True for elements where both a and b are true.
1413 static inline SimdDIBool gmx_simdcall
1414 operator&&(SimdDIBool a, SimdDIBool b)
1416 SimdDIBool res;
1418 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1420 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1422 return res;
1425 /*! \brief Logical OR on SimdDIBool.
1427 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1429 * \param a SIMD boolean 1
1430 * \param b SIMD boolean 2
1431 * \return True for elements where both a and b are true.
1433 static inline SimdDIBool gmx_simdcall
1434 operator||(SimdDIBool a, SimdDIBool b)
1436 SimdDIBool res;
1438 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1440 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1442 return res;
1445 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1447 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1449 * The actual return value for "any true" will depend on the architecture.
1450 * Any non-zero value should be considered truth.
1452 * \param a SIMD boolean
1453 * \return True if any of the elements in a is true, otherwise 0.
1455 static inline bool gmx_simdcall
1456 anyTrue(SimdDIBool a)
1458 bool res = false;
1460 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1462 res = res || a.simdInternal_[i];
1464 return res;
1467 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is true.
1469 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1471 * \param a SIMD integer to select from
1472 * \param mask Boolean selector
1473 * \return Elements from a where sel is true, 0 otherwise.
1475 static inline SimdDInt32 gmx_simdcall
1476 selectByMask(SimdDInt32 a, SimdDIBool mask)
1478 SimdDInt32 res;
1480 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1482 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0;
1484 return res;
1487 /*! \brief Select from \ref gmx::SimdDInt32 variable where boolean is false.
1489 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1491 * \param a SIMD integer to select from
1492 * \param mask Boolean selector
1493 * \return Elements from a where sel is false, 0 otherwise (sic).
1495 static inline SimdDInt32 gmx_simdcall
1496 selectByNotMask(SimdDInt32 a, SimdDIBool mask)
1498 SimdDInt32 res;
1500 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1502 res.simdInternal_[i] = mask.simdInternal_[i] ? 0 : a.simdInternal_[i];
1504 return res;
1507 /*! \brief Vector-blend SIMD integer selection.
1509 * Available if \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS is 1.
1511 * \param a First source
1512 * \param b Second source
1513 * \param sel Boolean selector
1514 * \return For each element, select b if sel is true, a otherwise.
1516 static inline SimdDInt32 gmx_simdcall
1517 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
1519 SimdDInt32 res;
1521 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1523 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1525 return res;
1528 /*! \}
1530 * \name SIMD implementation conversion operations
1531 * \{
1534 /*! \brief Round double precision floating point to integer.
1536 * \param a SIMD floating-point
1537 * \return SIMD integer, rounded to nearest integer.
1539 static inline SimdDInt32 gmx_simdcall
1540 cvtR2I(SimdDouble a)
1542 SimdDInt32 b;
1544 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1546 b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1548 return b;
1551 /*! \brief Truncate double precision floating point to integer.
1553 * \param a SIMD floating-point
1554 * \return SIMD integer, truncated to nearest integer.
1556 static inline SimdDInt32 gmx_simdcall
1557 cvttR2I(SimdDouble a)
1559 SimdDInt32 b;
1561 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1563 b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1565 return b;
1568 /*! \brief Convert integer to double precision floating point.
1570 * \param a SIMD integer
1571 * \return SIMD floating-point
1573 static inline SimdDouble gmx_simdcall
1574 cvtI2R(SimdDInt32 a)
1576 SimdDouble b;
1578 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1580 b.simdInternal_[i] = a.simdInternal_[i];
1582 return b;
1585 /*! \brief Convert from double precision boolean to corresponding integer boolean
1587 * \param a SIMD floating-point boolean
1588 * \return SIMD integer boolean
1590 static inline SimdDIBool gmx_simdcall
1591 cvtB2IB(SimdDBool a)
1593 SimdDIBool b;
1595 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1597 b.simdInternal_[i] = a.simdInternal_[i];
1599 return b;
1602 /*! \brief Convert from integer boolean to corresponding double precision boolean
1604 * \param a SIMD integer boolean
1605 * \return SIMD floating-point boolean
1607 static inline SimdDBool gmx_simdcall
1608 cvtIB2B(SimdDIBool a)
1610 SimdDBool b;
1612 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1614 b.simdInternal_[i] = a.simdInternal_[i];
1616 return b;
1619 /*! \brief Convert SIMD float to double.
1621 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1622 * \ref GMX_SIMD_DOUBLE_WIDTH.
1624 * Float/double conversions are complex since the SIMD width could either
1625 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1626 * need to check for the width in the code, and have different code paths.
1628 * \param f Single-precision SIMD variable
1629 * \return Double-precision SIMD variable of the same width
1631 static inline SimdDouble gmx_simdcall
1632 cvtF2D(SimdFloat gmx_unused f)
1634 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1635 SimdDouble d;
1636 for (std::size_t i = 0; i < d.simdInternal_.size(); i++)
1638 d.simdInternal_[i] = f.simdInternal_[i];
1640 return d;
1641 #else
1642 gmx_fatal(FARGS, "cvtF2D() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1643 #endif
1646 /*! \brief Convert SIMD double to float.
1648 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is identical to
1649 * \ref GMX_SIMD_DOUBLE_WIDTH.
1651 * Float/double conversions are complex since the SIMD width could either
1652 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1653 * need to check for the width in the code, and have different code paths.
1655 * \param d Double-precision SIMD variable
1656 * \return Single-precision SIMD variable of the same width
1658 static inline SimdFloat gmx_simdcall
1659 cvtD2F(SimdDouble gmx_unused d)
1661 #if (GMX_SIMD_FLOAT_WIDTH == GMX_SIMD_DOUBLE_WIDTH)
1662 SimdFloat f;
1663 for (std::size_t i = 0; i < f.simdInternal_.size(); i++)
1665 f.simdInternal_[i] = d.simdInternal_[i];
1667 return f;
1668 #else
1669 gmx_fatal(FARGS, "cvtD2F() requires GMX_SIMD_FLOAT_WIDTH==GMX_SIMD_DOUBLE_WIDTH");
1670 #endif
1673 /*! \brief Convert SIMD float to double.
1675 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1676 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1678 * Float/double conversions are complex since the SIMD width could either
1679 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1680 * need to check for the width in the code, and have different code paths.
1682 * \param f Single-precision SIMD variable
1683 * \param[out] d0 Double-precision SIMD variable, first half of values from f.
1684 * \param[out] d1 Double-precision SIMD variable, second half of values from f.
1686 static inline void gmx_simdcall
1687 cvtF2DD(SimdFloat gmx_unused f, SimdDouble gmx_unused * d0, SimdDouble gmx_unused * d1)
1689 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1690 for (std::size_t i = 0; i < d0->simdInternal_.size(); i++)
1692 d0->simdInternal_[i] = f.simdInternal_[i];
1693 d1->simdInternal_[i] = f.simdInternal_[f.simdInternal_.size()/2 + i];
1695 #else
1696 gmx_fatal(FARGS, "simdCvtF2DD() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1697 #endif
1700 /*! \brief Convert SIMD double to float.
1702 * This version is available if \ref GMX_SIMD_FLOAT_WIDTH is twice as large
1703 * as \ref GMX_SIMD_DOUBLE_WIDTH.
1705 * Float/double conversions are complex since the SIMD width could either
1706 * be different (e.g. on x86) or identical (e.g. IBM QPX). This means you will
1707 * need to check for the width in the code, and have different code paths.
1709 * \param d0 Double-precision SIMD variable, first half of values to put in f.
1710 * \param d1 Double-precision SIMD variable, second half of values to put in f.
1711 * \return Single-precision SIMD variable with all values.
1713 static inline SimdFloat gmx_simdcall
1714 cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
1716 #if (GMX_SIMD_FLOAT_WIDTH == 2*GMX_SIMD_DOUBLE_WIDTH)
1717 SimdFloat f;
1718 for (std::size_t i = 0; i < d0.simdInternal_.size(); i++)
1720 f.simdInternal_[i] = d0.simdInternal_[i];
1721 f.simdInternal_[f.simdInternal_.size()/2 + i] = d1.simdInternal_[i];
1723 return f;
1724 #else
1725 gmx_fatal(FARGS, "simdCvtDD2F() requires GMX_SIMD_FLOAT_WIDTH==2*GMX_SIMD_DOUBLE_WIDTH");
1726 #endif
1729 /*! \} */
1731 /*! \} */
1732 /*! \endcond */
1734 } // namespace gmx
1736 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_DOUBLE_H