Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / simd / impl_reference / impl_reference_simd_float.h
blobc8e4c13d255a63d92a28deab087f7fa815be5c1c
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
37 #define GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H
39 /*! \libinternal \file
41 * \brief Reference implementation, SIMD single precision.
43 * \author Erik Lindahl <erik.lindahl@scilifelab.se>
45 * \ingroup module_simd
48 #include "config.h"
50 #include <cassert>
51 #include <cmath>
52 #include <cstddef>
53 #include <cstdint>
55 #include <algorithm>
56 #include <array>
58 #include "gromacs/math/utilities.h"
60 #include "impl_reference_definitions.h"
62 namespace gmx
65 /*! \cond libapi */
66 /*! \addtogroup module_simd */
67 /*! \{ */
69 /* \name SIMD implementation data types and built-in conversions between types
70 * \{
73 /*! \libinternal \brief Float SIMD variable. Available if GMX_SIMD_HAVE_FLOAT is 1.
75 * \note This variable cannot be placed inside other structures or classes, since
76 * some compilers (including at least clang-3.7) appear to lose the
77 * alignment. This is likely particularly severe when allocating such
78 * memory on the heap, but it occurs for stack structures too.
80 class SimdFloat
82 public:
83 SimdFloat() {}
85 //! \brief Construct from scalar
86 SimdFloat(float f) { simdInternal_.fill(f); }
88 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
90 * This has to be public to enable usage in combination with static inline
91 * functions, but it should never, EVER, be accessed by any code outside
92 * the corresponding implementation directory since the type will depend
93 * on the architecture.
95 std::array<float, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
98 /*! \libinternal \brief Integer SIMD variable type to use for conversions to/from float.
100 * This is also the widest integer SIMD type. Available if GMX_SIMD_HAVE_FLOAT is 1.
102 * \note The integer SIMD type will always be available, but on architectures
103 * that do not have any real integer SIMD support it might be defined as the
104 * floating-point type. This will work fine, since there are separate defines
105 * for whether the implementation can actually do any operations on integer
106 * SIMD types.
107 * \note This variable cannot be placed inside other structures or classes, since
108 * some compilers (including at least clang-3.7) appear to lose the
109 * alignment. This is likely particularly severe when allocating such
110 * memory on the heap, but it occurs for stack structures too.
112 class SimdFInt32
114 public:
115 SimdFInt32() {}
117 //! \brief Construct from scalar
118 SimdFInt32(std::int32_t i) { simdInternal_.fill(i); }
120 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
122 * This has to be public to enable usage in combination with static inline
123 * functions, but it should never, EVER, be accessed by any code outside
124 * the corresponding implementation directory since the type will depend
125 * on the architecture.
127 std::array<std::int32_t, GMX_SIMD_FINT32_WIDTH> simdInternal_;
130 /*! \libinternal \brief Boolean type for float SIMD data.
132 * Available if GMX_SIMD_HAVE_FLOAT is 1.
134 * \note This variable cannot be placed inside other structures or classes, since
135 * some compilers (including at least clang-3.7) appear to lose the
136 * alignment. This is likely particularly severe when allocating such
137 * memory on the heap, but it occurs for stack structures too.
139 class SimdFBool
141 public:
142 SimdFBool() {}
144 //! \brief Construct from scalar
145 SimdFBool(bool b) { simdInternal_.fill(b); }
147 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
149 * This has to be public to enable usage in combination with static inline
150 * functions, but it should never, EVER, be accessed by any code outside
151 * the corresponding implementation directory since the type will depend
152 * on the architecture.
154 std::array<bool, GMX_SIMD_FLOAT_WIDTH> simdInternal_;
157 /*! \libinternal \brief Boolean type for integer datatypes corresponding to float SIMD.
159 * Available if GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
161 * \note This variable cannot be placed inside other structures or classes, since
162 * some compilers (including at least clang-3.7) appear to lose the
163 * alignment. This is likely particularly severe when allocating such
164 * memory on the heap, but it occurs for stack structures too.
166 class SimdFIBool
168 public:
169 SimdFIBool() {}
171 //! \brief Construct from scalar
172 SimdFIBool(bool b) { simdInternal_.fill(b); }
174 /*! \brief Internal SIMD data. Implementation dependent, don't touch.
176 * This has to be public to enable usage in combination with static inline
177 * functions, but it should never, EVER, be accessed by any code outside
178 * the corresponding implementation directory since the type will depend
179 * on the architecture.
181 std::array<bool, GMX_SIMD_FINT32_WIDTH> simdInternal_;
184 /*! \}
186 * \name SIMD implementation load/store operations for single precision floating point
187 * \{
190 /*! \brief Load \ref GMX_SIMD_FLOAT_WIDTH float numbers from aligned memory.
192 * \param m Pointer to memory aligned to the SIMD width.
193 * \return SIMD variable with data loaded.
195 static inline SimdFloat gmx_simdcall
196 simdLoad(const float *m, SimdFloatTag = {})
198 SimdFloat a;
200 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(float)) == 0);
202 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
203 return a;
206 /*! \brief Store the contents of SIMD float variable to aligned memory m.
208 * \param[out] m Pointer to memory, aligned to SIMD width.
209 * \param a SIMD variable to store
211 static inline void gmx_simdcall
212 store(float *m, SimdFloat a)
214 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(float)) == 0);
216 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
219 /*! \brief Load SIMD float from unaligned memory.
221 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
223 * \param m Pointer to memory, no alignment requirement.
224 * \return SIMD variable with data loaded.
226 static inline SimdFloat gmx_simdcall
227 simdLoadU(const float *m, SimdFloatTag = {})
229 SimdFloat a;
230 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
231 return a;
234 /*! \brief Store SIMD float to unaligned memory.
236 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
238 * \param[out] m Pointer to memory, no alignment requirement.
239 * \param a SIMD variable to store.
241 static inline void gmx_simdcall
242 storeU(float *m, SimdFloat a)
244 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
247 /*! \brief Set all SIMD float variable elements to 0.0.
249 * You should typically just call \ref gmx::setZero(), which uses proxy objects
250 * internally to handle all types rather than adding the suffix used here.
252 * \return SIMD 0.0f
254 static inline SimdFloat gmx_simdcall
255 setZeroF()
257 return SimdFloat(0.0f);
260 /*! \} */
264 * \name SIMD implementation load/store operations for integers (corresponding to float)
265 * \{
268 /*! \brief Load aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
270 * You should typically just call \ref gmx::load(), which uses proxy objects
271 * internally to handle all types rather than adding the suffix used here.
273 * \param m Pointer to memory, aligned to (float) integer SIMD width.
274 * \return SIMD integer variable.
276 static inline SimdFInt32 gmx_simdcall
277 simdLoad(const std::int32_t * m, SimdFInt32Tag)
279 SimdFInt32 a;
281 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
283 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
284 return a;
287 /*! \brief Store aligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
289 * \param m Memory aligned to (float) integer SIMD width.
290 * \param a SIMD variable to store.
292 static inline void gmx_simdcall
293 store(std::int32_t * m, SimdFInt32 a)
295 assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(std::int32_t)) == 0);
297 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
300 /*! \brief Load unaligned integer SIMD data, width corresponds to \ref gmx::SimdFloat.
302 * You should typically just call \ref gmx::loadU(), which uses proxy objects
303 * internally to handle all types rather than adding the suffix used here.
305 * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
307 * \param m Pointer to memory, no alignment requirements.
308 * \return SIMD integer variable.
310 static inline SimdFInt32 gmx_simdcall
311 simdLoadU(const std::int32_t *m, SimdFInt32Tag)
313 SimdFInt32 a;
314 std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
315 return a;
318 /*! \brief Store unaligned SIMD integer data, width corresponds to \ref gmx::SimdFloat.
320 * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
322 * \param m Memory pointer, no alignment requirements.
323 * \param a SIMD variable to store.
325 static inline void gmx_simdcall
326 storeU(std::int32_t * m, SimdFInt32 a)
328 std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
331 /*! \brief Set all SIMD (float) integer variable elements to 0.
333 * You should typically just call \ref gmx::setZero(), which uses proxy objects
334 * internally to handle all types rather than adding the suffix used here.
336 * \return SIMD 0
338 static inline SimdFInt32 gmx_simdcall
339 setZeroFI()
341 return SimdFInt32(0);
344 /*! \brief Extract element with index i from \ref gmx::SimdFInt32.
346 * Available if \ref GMX_SIMD_HAVE_FINT32_EXTRACT is 1.
348 * \tparam index Compile-time constant, position to extract (first position is 0)
349 * \param a SIMD variable from which to extract value.
350 * \return Single integer from position index in SIMD variable.
352 template<int index>
353 static inline std::int32_t gmx_simdcall
354 extract(SimdFInt32 a)
356 return a.simdInternal_[index];
359 /*! \}
361 * \name SIMD implementation single precision floating-point bitwise logical operations
362 * \{
365 /*! \brief Bitwise and for two SIMD float variables.
367 * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
369 * \param a data1
370 * \param b data2
371 * \return data1 & data2
373 static inline SimdFloat gmx_simdcall
374 operator&(SimdFloat a, SimdFloat b)
376 SimdFloat res;
378 union
380 float r;
381 std::int32_t i;
383 conv1, conv2;
385 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
387 conv1.r = a.simdInternal_[i];
388 conv2.r = b.simdInternal_[i];
389 conv1.i = conv1.i & conv2.i;
390 res.simdInternal_[i] = conv1.r;
392 return res;
395 /*! \brief Bitwise andnot for SIMD float.
397 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
399 * \param a data1
400 * \param b data2
401 * \return (~data1) & data2
403 static inline SimdFloat gmx_simdcall
404 andNot(SimdFloat a, SimdFloat b)
406 SimdFloat res;
408 union
410 float r;
411 std::int32_t i;
413 conv1, conv2;
415 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
417 conv1.r = a.simdInternal_[i];
418 conv2.r = b.simdInternal_[i];
419 conv1.i = ~conv1.i & conv2.i;
420 res.simdInternal_[i] = conv1.r;
422 return res;
425 /*! \brief Bitwise or for SIMD float.
427 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
429 * \param a data1
430 * \param b data2
431 * \return data1 | data2
433 static inline SimdFloat gmx_simdcall
434 operator|(SimdFloat a, SimdFloat b)
436 SimdFloat res;
438 union
440 float r;
441 std::int32_t i;
443 conv1, conv2;
445 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
447 conv1.r = a.simdInternal_[i];
448 conv2.r = b.simdInternal_[i];
449 conv1.i = conv1.i | conv2.i;
450 res.simdInternal_[i] = conv1.r;
452 return res;
455 /*! \brief Bitwise xor for SIMD float.
457 * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
459 * \param a data1
460 * \param b data2
461 * \return data1 ^ data2
463 static inline SimdFloat gmx_simdcall
464 operator^(SimdFloat a, SimdFloat b)
466 SimdFloat res;
468 union
470 float r;
471 std::int32_t i;
473 conv1, conv2;
475 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
477 conv1.r = a.simdInternal_[i];
478 conv2.r = b.simdInternal_[i];
479 conv1.i = conv1.i ^ conv2.i;
480 res.simdInternal_[i] = conv1.r;
482 return res;
485 /*! \}
487 * \name SIMD implementation single precision floating-point arithmetics
488 * \{
491 /*! \brief Add two float SIMD variables.
493 * \param a term1
494 * \param b term2
495 * \return a+b
497 static inline SimdFloat gmx_simdcall
498 operator+(SimdFloat a, SimdFloat b)
500 SimdFloat res;
502 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
504 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
506 return res;
509 /*! \brief Subtract two float SIMD variables.
511 * \param a term1
512 * \param b term2
513 * \return a-b
515 static inline SimdFloat gmx_simdcall
516 operator-(SimdFloat a, SimdFloat b)
518 SimdFloat res;
520 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
522 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
524 return res;
527 /*! \brief SIMD single precision negate.
529 * \param a SIMD double precision value
530 * \return -a
532 static inline SimdFloat gmx_simdcall
533 operator-(SimdFloat a)
535 SimdFloat res;
537 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
539 res.simdInternal_[i] = -a.simdInternal_[i];
541 return res;
544 /*! \brief Multiply two float SIMD variables.
546 * \param a factor1
547 * \param b factor2
548 * \return a*b.
550 static inline SimdFloat gmx_simdcall
551 operator*(SimdFloat a, SimdFloat b)
553 SimdFloat res;
555 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
557 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
559 return res;
562 /*! \brief SIMD float Fused-multiply-add. Result is a*b+c.
564 * \param a factor1
565 * \param b factor2
566 * \param c term
567 * \return a*b+c
569 static inline SimdFloat gmx_simdcall
570 fma(SimdFloat a, SimdFloat b, SimdFloat c)
572 return a*b+c;
575 /*! \brief SIMD float Fused-multiply-subtract. Result is a*b-c.
577 * \param a factor1
578 * \param b factor2
579 * \param c term
580 * \return a*b-c
582 static inline SimdFloat gmx_simdcall
583 fms(SimdFloat a, SimdFloat b, SimdFloat c)
585 return a*b-c;
588 /*! \brief SIMD float Fused-negated-multiply-add. Result is -a*b+c.
590 * \param a factor1
591 * \param b factor2
592 * \param c term
593 * \return -a*b+c
595 static inline SimdFloat gmx_simdcall
596 fnma(SimdFloat a, SimdFloat b, SimdFloat c)
598 return c-a*b;
601 /*! \brief SIMD float Fused-negated-multiply-subtract. Result is -a*b-c.
603 * \param a factor1
604 * \param b factor2
605 * \param c term
606 * \return -a*b-c
608 static inline SimdFloat gmx_simdcall
609 fnms(SimdFloat a, SimdFloat b, SimdFloat c)
611 return -a*b-c;
614 /*! \brief SIMD float 1.0/sqrt(x) lookup.
616 * This is a low-level instruction that should only be called from routines
617 * implementing the inverse square root in simd_math.h.
619 * \param x Argument, x>0
620 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
622 static inline SimdFloat gmx_simdcall
623 rsqrt(SimdFloat x)
625 SimdFloat res;
627 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
629 res.simdInternal_[i] = 1.0f / std::sqrt(x.simdInternal_[i]);
631 return res;
634 /*! \brief SIMD float 1.0/x lookup.
636 * This is a low-level instruction that should only be called from routines
637 * implementing the reciprocal in simd_math.h.
639 * \param x Argument, x!=0
640 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
642 static inline SimdFloat gmx_simdcall
643 rcp(SimdFloat x)
645 SimdFloat res;
647 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
649 res.simdInternal_[i] = 1.0f / x.simdInternal_[i];
651 return res;
654 /*! \brief Add two float SIMD variables, masked version.
656 * \param a term1
657 * \param b term2
658 * \param m mask
659 * \return a+b where mask is true, a otherwise.
661 static inline SimdFloat gmx_simdcall
662 maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
664 SimdFloat res;
666 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
668 res.simdInternal_[i] = a.simdInternal_[i] + (m.simdInternal_[i] ? b.simdInternal_[i] : 0.0f);
670 return res;
673 /*! \brief Multiply two float SIMD variables, masked version.
675 * \param a factor1
676 * \param b factor2
677 * \param m mask
678 * \return a*b where mask is true, 0.0 otherwise.
680 static inline SimdFloat gmx_simdcall
681 maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
683 SimdFloat res;
685 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
687 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i]) : 0.0f;
689 return res;
692 /*! \brief SIMD float fused multiply-add, masked version.
694 * \param a factor1
695 * \param b factor2
696 * \param c term
697 * \param m mask
698 * \return a*b+c where mask is true, 0.0 otherwise.
700 static inline SimdFloat gmx_simdcall
701 maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
703 SimdFloat res;
705 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
707 res.simdInternal_[i] = m.simdInternal_[i] ? (a.simdInternal_[i] * b.simdInternal_[i] + c.simdInternal_[i]) : 0.0f;
709 return res;
712 /*! \brief SIMD float 1.0/sqrt(x) lookup, masked version.
714 * This is a low-level instruction that should only be called from routines
715 * implementing the inverse square root in simd_math.h.
717 * \param x Argument, x>0 for entries where mask is true.
718 * \param m Mask
719 * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
720 * The result for masked-out entries will be 0.0.
722 static inline SimdFloat gmx_simdcall
723 maskzRsqrt(SimdFloat x, SimdFBool m)
725 SimdFloat res;
727 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
729 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / std::sqrt(x.simdInternal_[i]) : 0.0f;
731 return res;
734 /*! \brief SIMD float 1.0/x lookup, masked version.
736 * This is a low-level instruction that should only be called from routines
737 * implementing the reciprocal in simd_math.h.
739 * \param x Argument, x>0 for entries where mask is true.
740 * \param m Mask
741 * \return Approximation of 1/x, accuracy is \ref GMX_SIMD_RCP_BITS.
742 * The result for masked-out entries will be 0.0.
744 static inline SimdFloat gmx_simdcall
745 maskzRcp(SimdFloat x, SimdFBool m)
747 SimdFloat res;
749 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
751 res.simdInternal_[i] = (m.simdInternal_[i] != 0) ? 1.0f / x.simdInternal_[i] : 0.0f;
753 return res;
756 /*! \brief SIMD float Floating-point abs().
758 * \param a any floating point values
759 * \return abs(a) for each element.
761 static inline SimdFloat gmx_simdcall
762 abs(SimdFloat a)
764 SimdFloat res;
766 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
768 res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
770 return res;
773 /*! \brief Set each SIMD float element to the largest from two variables.
775 * \param a Any floating-point value
776 * \param b Any floating-point value
777 * \return max(a,b) for each element.
779 static inline SimdFloat gmx_simdcall
780 max(SimdFloat a, SimdFloat b)
782 SimdFloat res;
784 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
786 res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
788 return res;
791 /*! \brief Set each SIMD float element to the smallest from two variables.
793 * \param a Any floating-point value
794 * \param b Any floating-point value
795 * \return min(a,b) for each element.
797 static inline SimdFloat gmx_simdcall
798 min(SimdFloat a, SimdFloat b)
800 SimdFloat res;
802 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
804 res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
806 return res;
809 /*! \brief SIMD float round to nearest integer value (in floating-point format).
811 * \param a Any floating-point value
812 * \return The nearest integer, represented in floating-point format.
814 * \note Round mode is implementation defined. The only guarantee is that it
815 * is consistent between rounding functions (round, cvtR2I).
817 static inline SimdFloat gmx_simdcall
818 round(SimdFloat a)
820 SimdFloat res;
822 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
824 res.simdInternal_[i] = std::round(a.simdInternal_[i]);
826 return res;
829 /*! \brief Truncate SIMD float, i.e. round towards zero - common hardware instruction.
831 * \param a Any floating-point value
832 * \return Integer rounded towards zero, represented in floating-point format.
834 * \note This is truncation towards zero, not floor(). The reason for this
835 * is that truncation is virtually always present as a dedicated hardware
836 * instruction, but floor() frequently isn't.
838 static inline SimdFloat gmx_simdcall
839 trunc(SimdFloat a)
841 SimdFloat res;
843 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
845 res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
847 return res;
850 /*! \brief Extract (integer) exponent and fraction from single precision SIMD.
852 * \param value Floating-point value to extract from
853 * \param[out] exponent Returned exponent of value, integer SIMD format.
854 * \return Fraction of value, floating-point SIMD format.
856 static inline SimdFloat gmx_simdcall
857 frexp(SimdFloat value, SimdFInt32 * exponent)
859 SimdFloat fraction;
861 for (std::size_t i = 0; i < fraction.simdInternal_.size(); i++)
863 fraction.simdInternal_[i] = std::frexp(value.simdInternal_[i], &exponent->simdInternal_[i]);
865 return fraction;
868 /*! \brief Multiply a SIMD float value by the number 2 raised to an exp power.
870 * \tparam opt By default, this routine will return zero for input arguments
871 * that are so small they cannot be reproduced in the current
872 * precision. If the unsafe math optimization template parameter
873 * setting is used, these tests are skipped, and the result will
874 * be undefined (possible even NaN). This might happen below -127
875 * in single precision or -1023 in double, although some
876 * might use denormal support to extend the range.
878 * \param value Floating-point number to multiply with new exponent
879 * \param exponent Integer that will not overflow as 2^exponent.
880 * \return value*2^exponent
882 template <MathOptimization opt = MathOptimization::Safe>
883 static inline SimdFloat gmx_simdcall
884 ldexp(SimdFloat value, SimdFInt32 exponent)
886 SimdFloat res;
888 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
890 // std::ldexp already takes care of clamping arguments, so we do not
891 // need to do anything in the reference implementation
892 res.simdInternal_[i] = std::ldexp(value.simdInternal_[i], exponent.simdInternal_[i]);
894 return res;
897 /*! \brief Return sum of all elements in SIMD float variable.
899 * \param a SIMD variable to reduce/sum.
900 * \return The sum of all elements in the argument variable.
903 static inline float gmx_simdcall
904 reduce(SimdFloat a)
906 float sum = 0.0f;
908 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
910 sum += a.simdInternal_[i];
912 return sum;
915 /*! \}
917 * \name SIMD implementation single precision floating-point comparisons, boolean, selection.
918 * \{
921 /*! \brief SIMD a==b for single SIMD.
923 * \param a value1
924 * \param b value2
925 * \return Each element of the boolean will be set to true if a==b.
927 * Beware that exact floating-point comparisons are difficult.
929 static inline SimdFBool gmx_simdcall
930 operator==(SimdFloat a, SimdFloat b)
932 SimdFBool res;
934 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
936 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
938 return res;
941 /*! \brief SIMD a!=b for single SIMD.
943 * \param a value1
944 * \param b value2
945 * \return Each element of the boolean will be set to true if a!=b.
947 * Beware that exact floating-point comparisons are difficult.
949 static inline SimdFBool gmx_simdcall
950 operator!=(SimdFloat a, SimdFloat b)
952 SimdFBool res;
954 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
956 res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
958 return res;
961 /*! \brief SIMD a<b for single SIMD.
963 * \param a value1
964 * \param b value2
965 * \return Each element of the boolean will be set to true if a<b.
967 static inline SimdFBool gmx_simdcall
968 operator<(SimdFloat a, SimdFloat b)
970 SimdFBool res;
972 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
974 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
976 return res;
979 /*! \brief SIMD a<=b for single SIMD.
981 * \param a value1
982 * \param b value2
983 * \return Each element of the boolean will be set to true if a<=b.
985 static inline SimdFBool gmx_simdcall
986 operator<=(SimdFloat a, SimdFloat b)
988 SimdFBool res;
990 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
992 res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
994 return res;
997 /*! \brief Return true if any bits are set in the single precision SIMD.
999 * This function is used to handle bitmasks, mainly for exclusions in the
1000 * inner kernels. Note that it will return true even for -0.0f (sign bit set),
1001 * so it is not identical to not-equal.
1003 * \param a value
1004 * \return Each element of the boolean will be true if any bit in a is nonzero.
1006 static inline SimdFBool gmx_simdcall
1007 testBits(SimdFloat a)
1009 SimdFBool res;
1011 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1013 union
1015 std::uint32_t i;
1016 float f;
1017 } conv;
1019 conv.f = a.simdInternal_[i];
1020 res.simdInternal_[i] = (conv.i != 0);
1022 return res;
1025 /*! \brief Logical \a and on single precision SIMD booleans.
1027 * \param a logical vars 1
1028 * \param b logical vars 2
1029 * \return For each element, the result boolean is true if a \& b are true.
1031 * \note This is not necessarily a bitwise operation - the storage format
1032 * of booleans is implementation-dependent.
1034 static inline SimdFBool gmx_simdcall
1035 operator&&(SimdFBool a, SimdFBool b)
1037 SimdFBool res;
1039 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1041 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1043 return res;
1046 /*! \brief Logical \a or on single precision SIMD booleans.
1048 * \param a logical vars 1
1049 * \param b logical vars 2
1050 * \return For each element, the result boolean is true if a or b is true.
1052 * Note that this is not necessarily a bitwise operation - the storage format
1053 * of booleans is implementation-dependent.
1055 \ */
1056 static inline SimdFBool gmx_simdcall
1057 operator||(SimdFBool a, SimdFBool b)
1059 SimdFBool res;
1061 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1063 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1065 return res;
1068 /*! \brief Returns non-zero if any of the boolean in SIMD a is True, otherwise 0.
1070 * \param a Logical variable.
1071 * \return true if any element in a is true, otherwise false.
1073 * The actual return value for truth will depend on the architecture,
1074 * so any non-zero value is considered truth.
1076 static inline bool gmx_simdcall
1077 anyTrue(SimdFBool a)
1079 bool res = false;
1081 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1083 res = res || a.simdInternal_[i];
1085 return res;
1088 /*! \brief Select from single precision SIMD variable where boolean is true.
1090 * \param a Floating-point variable to select from
1091 * \param mask Boolean selector
1092 * \return For each element, a is selected for true, 0 for false.
1094 static inline SimdFloat gmx_simdcall
1095 selectByMask(SimdFloat a, SimdFBool mask)
1097 SimdFloat res;
1099 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1101 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0f;
1103 return res;
1106 /*! \brief Select from single precision SIMD variable where boolean is false.
1108 * \param a Floating-point variable to select from
1109 * \param mask Boolean selector
1110 * \return For each element, a is selected for false, 0 for true (sic).
1112 static inline SimdFloat gmx_simdcall
1113 selectByNotMask(SimdFloat a, SimdFBool mask)
1115 SimdFloat res;
1117 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1119 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0f : a.simdInternal_[i];
1121 return res;
1124 /*! \brief Vector-blend SIMD float selection.
1126 * \param a First source
1127 * \param b Second source
1128 * \param sel Boolean selector
1129 * \return For each element, select b if sel is true, a otherwise.
1131 static inline SimdFloat gmx_simdcall
1132 blend(SimdFloat a, SimdFloat b, SimdFBool sel)
1134 SimdFloat res;
1136 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1138 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1140 return res;
1143 /*! \}
1145 * \name SIMD implementation integer (corresponding to float) bitwise logical operations
1146 * \{
1149 /*! \brief Integer SIMD bitwise and.
1151 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1153 * \note You can \a not use this operation directly to select based on a boolean
1154 * SIMD variable, since booleans are separate from integer SIMD. If that
1155 * is what you need, have a look at \ref gmx::selectByMask instead.
1157 * \param a first integer SIMD
1158 * \param b second integer SIMD
1159 * \return a \& b (bitwise and)
1161 static inline SimdFInt32 gmx_simdcall
1162 operator&(SimdFInt32 a, SimdFInt32 b)
1164 SimdFInt32 res;
1166 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1168 res.simdInternal_[i] = a.simdInternal_[i] & b.simdInternal_[i];
1170 return res;
1173 /*! \brief Integer SIMD bitwise not/complement.
1175 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1177 * \note You can \a not use this operation directly to select based on a boolean
1178 * SIMD variable, since booleans are separate from integer SIMD. If that
1179 * is what you need, have a look at \ref gmx::selectByMask instead.
1181 * \param a integer SIMD
1182 * \param b integer SIMD
1183 * \return (~a) & b
1185 static inline SimdFInt32 gmx_simdcall
1186 andNot(SimdFInt32 a, SimdFInt32 b)
1188 SimdFInt32 res;
1190 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1192 res.simdInternal_[i] = ~a.simdInternal_[i] & b.simdInternal_[i];
1194 return res;
1197 /*! \brief Integer SIMD bitwise or.
1199 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1201 * \param a first integer SIMD
1202 * \param b second integer SIMD
1203 * \return a \| b (bitwise or)
1205 static inline SimdFInt32 gmx_simdcall
1206 operator|(SimdFInt32 a, SimdFInt32 b)
1208 SimdFInt32 res;
1210 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1212 res.simdInternal_[i] = a.simdInternal_[i] | b.simdInternal_[i];
1214 return res;
1217 /*! \brief Integer SIMD bitwise xor.
1219 * Available if \ref GMX_SIMD_HAVE_FINT32_LOGICAL is 1.
1221 * \param a first integer SIMD
1222 * \param b second integer SIMD
1223 * \return a ^ b (bitwise xor)
1225 static inline SimdFInt32 gmx_simdcall
1226 operator^(SimdFInt32 a, SimdFInt32 b)
1228 SimdFInt32 res;
1230 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1232 res.simdInternal_[i] = a.simdInternal_[i] ^ b.simdInternal_[i];
1234 return res;
1237 /*! \}
1239 * \name SIMD implementation integer (corresponding to float) arithmetics
1240 * \{
1243 /*! \brief Add SIMD integers.
1245 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1246 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1248 * \param a term1
1249 * \param b term2
1250 * \return a+b
1252 static inline SimdFInt32 gmx_simdcall
1253 operator+(SimdFInt32 a, SimdFInt32 b)
1255 SimdFInt32 res;
1257 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1259 res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
1261 return res;
1264 /*! \brief Subtract SIMD integers.
1266 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1267 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1269 * \param a term1
1270 * \param b term2
1271 * \return a-b
1273 static inline SimdFInt32 gmx_simdcall
1274 operator-(SimdFInt32 a, SimdFInt32 b)
1276 SimdFInt32 res;
1278 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1280 res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
1282 return res;
1285 /*! \brief Multiply SIMD integers.
1287 * This routine is only available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS (single)
1288 * or \ref GMX_SIMD_HAVE_DINT32_ARITHMETICS (double) is 1.
1290 * \param a factor1
1291 * \param b factor2
1292 * \return a*b.
1294 * \note Only the low 32 bits are retained, so this can overflow.
1296 static inline SimdFInt32 gmx_simdcall
1297 operator*(SimdFInt32 a, SimdFInt32 b)
1299 SimdFInt32 res;
1301 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1303 res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
1305 return res;
1308 /*! \}
1310 * \name SIMD implementation integer (corresponding to float) comparisons, boolean, selection
1311 * \{
1314 /*! \brief Equality comparison of two integers corresponding to float values.
1316 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1318 * \param a SIMD integer1
1319 * \param b SIMD integer2
1320 * \return SIMD integer boolean with true for elements where a==b
1322 static inline SimdFIBool gmx_simdcall
1323 operator==(SimdFInt32 a, SimdFInt32 b)
1325 SimdFIBool res;
1327 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1329 res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
1331 return res;
1334 /*! \brief Less-than comparison of two SIMD integers corresponding to float values.
1336 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1338 * \param a SIMD integer1
1339 * \param b SIMD integer2
1340 * \return SIMD integer boolean with true for elements where a<b
1342 static inline SimdFIBool gmx_simdcall
1343 operator<(SimdFInt32 a, SimdFInt32 b)
1345 SimdFIBool res;
1347 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1349 res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
1351 return res;
1354 /*! \brief Check if any bit is set in each element
1356 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1358 * \param a SIMD integer
1359 * \return SIMD integer boolean with true for elements where any bit is set
1361 static inline SimdFIBool gmx_simdcall
1362 testBits(SimdFInt32 a)
1364 SimdFIBool res;
1366 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1368 res.simdInternal_[i] = (a.simdInternal_[i] != 0);
1370 return res;
1373 /*! \brief Logical AND on SimdFIBool.
1375 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1377 * \param a SIMD boolean 1
1378 * \param b SIMD boolean 2
1379 * \return True for elements where both a and b are true.
1381 static inline SimdFIBool gmx_simdcall
1382 operator&&(SimdFIBool a, SimdFIBool b)
1384 SimdFIBool res;
1386 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1388 res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
1390 return res;
1393 /*! \brief Logical OR on SimdFIBool.
1395 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1397 * \param a SIMD boolean 1
1398 * \param b SIMD boolean 2
1399 * \return True for elements where both a and b are true.
1401 static inline SimdFIBool gmx_simdcall
1402 operator||(SimdFIBool a, SimdFIBool b)
1404 SimdFIBool res;
1406 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1408 res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
1410 return res;
1413 /*! \brief Returns true if any of the boolean in x is True, otherwise 0.
1415 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1417 * The actual return value for "any true" will depend on the architecture.
1418 * Any non-zero value should be considered truth.
1420 * \param a SIMD boolean
1421 * \return True if any of the elements in a is true, otherwise 0.
1423 static inline bool gmx_simdcall
1424 anyTrue(SimdFIBool a)
1426 bool res = false;
1428 for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
1430 res = res || a.simdInternal_[i];
1432 return res;
1435 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is true.
1437 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1439 * \param a SIMD integer to select from
1440 * \param mask Boolean selector
1441 * \return Elements from a where sel is true, 0 otherwise.
1443 static inline SimdFInt32 gmx_simdcall
1444 selectByMask(SimdFInt32 a, SimdFIBool mask)
1446 SimdFInt32 res;
1448 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1450 res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0f;
1452 return res;
1455 /*! \brief Select from \ref gmx::SimdFInt32 variable where boolean is false.
1457 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1459 * \param a SIMD integer to select from
1460 * \param mask Boolean selector
1461 * \return Elements from a where sel is false, 0 otherwise (sic).
1463 static inline SimdFInt32 gmx_simdcall
1464 selectByNotMask(SimdFInt32 a, SimdFIBool mask)
1466 SimdFInt32 res;
1468 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1470 res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0f : a.simdInternal_[i];
1472 return res;
1475 /*! \brief Vector-blend SIMD integer selection.
1477 * Available if \ref GMX_SIMD_HAVE_FINT32_ARITHMETICS is 1.
1479 * \param a First source
1480 * \param b Second source
1481 * \param sel Boolean selector
1482 * \return For each element, select b if sel is true, a otherwise.
1484 static inline SimdFInt32 gmx_simdcall
1485 blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
1487 SimdFInt32 res;
1489 for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
1491 res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
1493 return res;
1496 /*! \}
1498 * \name SIMD implementation conversion operations
1499 * \{
1502 /*! \brief Round single precision floating point to integer.
1504 * \param a SIMD floating-point
1505 * \return SIMD integer, rounded to nearest integer.
1507 * \note Round mode is implementation defined. The only guarantee is that it
1508 * is consistent between rounding functions (round, cvtR2I).
1510 static inline SimdFInt32 gmx_simdcall
1511 cvtR2I(SimdFloat a)
1513 SimdFInt32 b;
1515 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1517 b.simdInternal_[i] = std::round(a.simdInternal_[i]);
1519 return b;
1522 /*! \brief Truncate single precision floating point to integer.
1524 * \param a SIMD floating-point
1525 * \return SIMD integer, truncated to nearest integer.
1527 static inline SimdFInt32 gmx_simdcall
1528 cvttR2I(SimdFloat a)
1530 SimdFInt32 b;
1532 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1534 b.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
1536 return b;
1539 /*! \brief Convert integer to single precision floating point.
1541 * \param a SIMD integer
1542 * \return SIMD floating-point
1544 static inline SimdFloat gmx_simdcall
1545 cvtI2R(SimdFInt32 a)
1547 SimdFloat b;
1549 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1551 b.simdInternal_[i] = a.simdInternal_[i];
1553 return b;
1556 /*! \brief Convert from single precision boolean to corresponding integer boolean
1558 * \param a SIMD floating-point boolean
1559 * \return SIMD integer boolean
1561 static inline SimdFIBool gmx_simdcall
1562 cvtB2IB(SimdFBool a)
1564 SimdFIBool b;
1566 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1568 b.simdInternal_[i] = a.simdInternal_[i];
1570 return b;
1573 /*! \brief Convert from integer boolean to corresponding single precision boolean
1575 * \param a SIMD integer boolean
1576 * \return SIMD floating-point boolean
1578 static inline SimdFBool gmx_simdcall
1579 cvtIB2B(SimdFIBool a)
1581 SimdFBool b;
1583 for (std::size_t i = 0; i < b.simdInternal_.size(); i++)
1585 b.simdInternal_[i] = a.simdInternal_[i];
1587 return b;
1590 /*! \} */
1592 /*! \} */
1593 /*! \endcond */
1595 } // namespace gmx
1597 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD_FLOAT_H